# What is the best place to open a moderately priced, fast casual asian dessert spot in NYC?
### Considerations
* Where are all bakeries primarily aggregated?
* Using review count as a measure of success, does their price point affect the amount of reviews they recieve?
* Does the number/type of categories these businesses put on their profile affect their review_count?
* Integrate census data, how does median income affect these parameters?
    * Business 'success' using review_count (see if I can find out information for how long they've been open, take the average rev/year)
    * How income, price, success and location relate to eachother
  

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import glob
import re
import seaborn as sns

In [2]:
# Importing dataframes
dataframes = []
for file in glob.glob('Resources/*.csv'):
    dataframes.append(pd.read_csv(file))

# Concatenate dataframes
df_main = pd.concat(dataframes, ignore_index=True)
df_main.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,epvSBt9LldIgyc08f8nIAw,la-bicyclette-bakery-brooklyn-4,La Bicyclette Bakery,https://s3-media2.fl.yelpcdn.com/bphoto/aPBPM3...,False,https://www.yelp.com/biz/la-bicyclette-bakery-...,3,"[{'alias': 'bakeries', 'title': 'Bakeries'}]",4.5,"{'latitude': 40.68358704921419, 'longitude': -...",[],"{'address1': '305 Court St', 'address2': '', '...",,,2431.404071,
1,-k_5NsYnKCHGTdBaqegkrw,le-fournil-new-york,Le Fournil,https://s3-media2.fl.yelpcdn.com/bphoto/JxNmFc...,False,https://www.yelp.com/biz/le-fournil-new-york?a...,118,"[{'alias': 'bakeries', 'title': 'Bakeries'}]",4.5,"{'latitude': 40.727947, 'longitude': -73.988489}",['delivery'],"{'address1': '115 2nd Ave', 'address2': '', 'a...",,,2548.470477,$$
2,0kW0112jMERVjpRzWT4F7Q,la-bicyclette-bakery-brooklyn-3,La Bicyclette Bakery,https://s3-media3.fl.yelpcdn.com/bphoto/_qCjVd...,False,https://www.yelp.com/biz/la-bicyclette-bakery-...,84,"[{'alias': 'bakeries', 'title': 'Bakeries'}]",5.0,"{'latitude': 40.714400045936806, 'longitude': ...",['delivery'],"{'address1': '667 Driggs Ave', 'address2': Non...",13479160000.0,(347) 916-1417,3128.240171,
3,XSwagBljEsxG2i1rq4GEAQ,almondine-bakery-brooklyn,Almondine Bakery,https://s3-media2.fl.yelpcdn.com/bphoto/n_j7ya...,False,https://www.yelp.com/biz/almondine-bakery-broo...,404,"[{'alias': 'bakeries', 'title': 'Bakeries'}]",4.0,"{'latitude': 40.7033174, 'longitude': -73.9912...",['delivery'],"{'address1': '85 Water St', 'address2': None, ...",17187980000.0,(718) 797-5026,348.916339,$$
4,fPQ2eE9lm8tc87O5-GLjyA,mille-feuille-bakery-new-york-3,Mille-Feuille Bakery,https://s3-media1.fl.yelpcdn.com/bphoto/HHm3mm...,False,https://www.yelp.com/biz/mille-feuille-bakery-...,441,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...",4.5,"{'latitude': 40.782425, 'longitude': -73.9811603}","['delivery', 'pickup']","{'address1': '2175 Broadway', 'address2': '', ...",12123630000.0,(212) 362-6261,8630.996628,$$


In [3]:
# Create sublist
master_df_sub = df_main[['name', 'review_count', 'categories', 'rating', 'coordinates', 'price']]
master_df_sub.head()

Unnamed: 0,name,review_count,categories,rating,coordinates,price
0,La Bicyclette Bakery,3,"[{'alias': 'bakeries', 'title': 'Bakeries'}]",4.5,"{'latitude': 40.68358704921419, 'longitude': -...",
1,Le Fournil,118,"[{'alias': 'bakeries', 'title': 'Bakeries'}]",4.5,"{'latitude': 40.727947, 'longitude': -73.988489}",$$
2,La Bicyclette Bakery,84,"[{'alias': 'bakeries', 'title': 'Bakeries'}]",5.0,"{'latitude': 40.714400045936806, 'longitude': ...",
3,Almondine Bakery,404,"[{'alias': 'bakeries', 'title': 'Bakeries'}]",4.0,"{'latitude': 40.7033174, 'longitude': -73.9912...",$$
4,Mille-Feuille Bakery,441,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...",4.5,"{'latitude': 40.782425, 'longitude': -73.9811603}",$$


In [4]:
# Cleaning function
chars_to_remove = ['[', '{', '\'', 'alias', 'title', 'latitude', 'longitude', "}", ":", "}", "]"]
col_list = ['categories', 'coordinates']
def string_cleaner(df, col_list, char_list):
    for item in char_list:
        for col in col_list:
            df.loc[:, col] = df[col].str.replace(item, "", case=False)
    return df

string_cleaner(string_cleaner(master_df_sub, col_list, chars_to_remove), col_list, chars_to_remove)

Unnamed: 0,name,review_count,categories,rating,coordinates,price
0,La Bicyclette Bakery,3,"bakeries, Bakeries",4.5,"40.68358704921419, -73.99509019999999",
1,Le Fournil,118,"bakeries, Bakeries",4.5,"40.727947, -73.988489",$$
2,La Bicyclette Bakery,84,"bakeries, Bakeries",5.0,"40.714400045936806, -73.95911000669003",
3,Almondine Bakery,404,"bakeries, Bakeries",4.0,"40.7033174, -73.9912506",$$
4,Mille-Feuille Bakery,441,"coffee, Coffee & Tea, bakeries, Bakeries",4.5,"40.782425, -73.9811603",$$
...,...,...,...,...,...,...
5093,D'Orsi's Bakery,55,"catering, Caterers, bakeries, Bakeries",4.0,"40.56669, -74.2519499",$$
5094,Loqma Cafe,20,"turkish, Turkish, cafes, Cafes, seafood, ...",3.5,"40.585861, -73.953604",$$
5095,Scarpetta,2264,"italian, Italian, bars, Bars, desserts, ...",4.0,"40.744509, -73.9856",$$$
5096,Miss American Pie,112,"bakeries, Bakeries, desserts, Desserts, c...",5.0,"40.68049, -73.97785",


In [5]:
# Cleaning duplicate aliases by split and delete
def remove_alias(df, column):
    new_df = df[column].str.split(pat=', ', expand=True)
    new_df = new_df.drop(labels=range(0, 8, 2), axis=1)
    df = df.drop(column, axis=1)
    df = pd.concat([df, new_df], axis=1)
    return df

master_df_sub = remove_alias(master_df_sub, 'categories')

In [6]:
# Split Coordinates & rename columns
master_df_sub[['coord_lat', 'coord_long']] = master_df_sub['coordinates'].str.split(pat=', ', expand=True)
master_df_sub = master_df_sub.drop('coordinates', axis=1)
master_df_sub.columns = ['name', 'review_count', 'rating', 'price', 'cat1', 'cat2', 'cat3','cat4', 'coord_lat', 'coord_long']
master_df_sub.head()

Unnamed: 0,name,review_count,rating,price,cat1,cat2,cat3,cat4,coord_lat,coord_long
0,La Bicyclette Bakery,3,4.5,,Bakeries,,,,40.68358704921419,-73.99509019999999
1,Le Fournil,118,4.5,$$,Bakeries,,,,40.727947,-73.988489
2,La Bicyclette Bakery,84,5.0,,Bakeries,,,,40.714400045936806,-73.95911000669003
3,Almondine Bakery,404,4.0,$$,Bakeries,,,,40.7033174,-73.9912506
4,Mille-Feuille Bakery,441,4.5,$$,Coffee & Tea,Bakeries,,,40.782425,-73.9811603


In [7]:
# Manipulation & Statistics
# Convert price col to int

In [8]:
# Visualization