## Preparation

In [1]:
# Using SQL + Pandas
import pandas as pd
import numpy as np

# Data Visualization
from plotnine import *
import seaborn as sns
import matplotlib.pyplot as plt

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Read in the data
DC_Yelp = pd.read_csv("Data/Yelp/DC_Yelp_cleaned.csv")
Chicago_Yelp = pd.read_csv("Data/Yelp/Chicago_Yelp_cleaned.csv")
NY_Yelp = pd.read_csv("Data/Yelp/NY_Yelp_cleaned.csv")
SF_Yelp = pd.read_csv("Data/Yelp/SF_Yelp_cleaned.csv")
LA_Yelp = pd.read_csv("Data/Yelp/LA_Yelp_cleaned.csv")

DC_Wiki = pd.read_csv("Data/Wikipedia/Wiki_DC.csv")
Chicago_Wiki = pd.read_csv("Data/Wikipedia/Wiki_Chicago.csv")
NY_Wiki = pd.read_csv("Data/Wikipedia/Wiki_NY.csv")
SF_Wiki = pd.read_csv("Data/Wikipedia/Wiki_SF.csv")
LA_Wiki = pd.read_csv("Data/Wikipedia/Wiki_LA.csv")

## Merge DC data

In [3]:
DC_Yelp

Unnamed: 0,business_name,rating,review_count,price_range,category
0,Le Diplomate,4.5,3596,$$$,"['Brasseries', 'French', 'Breakfast & Brunch']"
1,Gypsy Kitchen,4.5,70,,"['Tapas/Small Plates', 'Mediterranean']"
2,Butter Me Up,4.5,103,,['Breakfast & Brunch']
3,The Block,5.0,12,,"['Food Court', 'Bars', 'Asian Fusion']"
4,The Alibi,4.5,441,$$,"['Pubs', 'Sandwiches', 'Barbeque']"
...,...,...,...,...,...
235,Logan Tavern,4.0,872,$$,"['American (New)', 'Sports Bars', 'American (T..."
236,Uzu,4.5,87,$$,['Ramen']
237,Akira Ramen & Izakaya DC,4.5,49,$$,['Ramen']
238,City Kabob and Curry House - Washington,4.5,138,$$,"['Pakistani', 'Indian', 'Halal']"


In [4]:
DC_Wiki

Unnamed: 0,Name,Neighborhood/City,2017,2018,2019,2020
0,Blue Duck Tavern,West End,1 Michelin star,1 Michelin star,1 Michelin star,
1,Bresca,Logan Circle,,,1 Michelin star,1 Michelin star
2,Fiola,Penn Quarter,1 Michelin star,1 Michelin star,1 Michelin star,1 Michelin star
3,Gravitas,Ivy City,,,,1 Michelin star
4,Inn at Little Washington,"Washington, VA",2 Michelin stars,2 Michelin stars,3 Michelin stars,3 Michelin stars
5,Kinship,Mount Vernon Square,1 Michelin star,1 Michelin star,1 Michelin star,1 Michelin star
6,Komi,Dupont Circle,,1 Michelin star,1 Michelin star,1 Michelin star
7,Maydan,U Street,,,,1 Michelin star
8,Masseria,Union Market,1 Michelin star,1 Michelin star,1 Michelin star,1 Michelin star
9,Little Pearl,Eastern Market,,,,1 Michelin star


In [5]:
list(DC_Wiki.columns)[:-1]

['Name', 'Neighborhood/City', '2017', '2018', '2019']

In [6]:
DC_YW = (pd
         .merge(left = DC_Yelp,
                right = DC_Wiki,
                how = "left",
                left_on = "business_name",
                right_on="Name")
         .drop(list(DC_Wiki.columns)[:-1],axis=1)
         .rename(columns={"2020":"michelin_star"}))

DC_YW

Unnamed: 0,business_name,rating,review_count,price_range,category,michelin_star
0,Le Diplomate,4.5,3596,$$$,"['Brasseries', 'French', 'Breakfast & Brunch']",
1,Gypsy Kitchen,4.5,70,,"['Tapas/Small Plates', 'Mediterranean']",
2,Butter Me Up,4.5,103,,['Breakfast & Brunch'],
3,The Block,5.0,12,,"['Food Court', 'Bars', 'Asian Fusion']",
4,The Alibi,4.5,441,$$,"['Pubs', 'Sandwiches', 'Barbeque']",
...,...,...,...,...,...,...
235,Logan Tavern,4.0,872,$$,"['American (New)', 'Sports Bars', 'American (T...",
236,Uzu,4.5,87,$$,['Ramen'],
237,Akira Ramen & Izakaya DC,4.5,49,$$,['Ramen'],
238,City Kabob and Curry House - Washington,4.5,138,$$,"['Pakistani', 'Indian', 'Halal']",


In [7]:
DC_YW['michelin_star'] = DC_YW.michelin_star.fillna(0)
DC_YW['michelin_star'] = DC_YW.michelin_star.replace('1 Michelin star',1)
DC_YW['michelin_star'] = DC_YW.michelin_star.replace('2 Michelin stars',2)
DC_YW['michelin_star'] = DC_YW.michelin_star.replace('3 Michelin stars',3)
DC_YW['region'] = 'Washington DC'

DC_YW

Unnamed: 0,business_name,rating,review_count,price_range,category,michelin_star,region
0,Le Diplomate,4.5,3596,$$$,"['Brasseries', 'French', 'Breakfast & Brunch']",0,Washington DC
1,Gypsy Kitchen,4.5,70,,"['Tapas/Small Plates', 'Mediterranean']",0,Washington DC
2,Butter Me Up,4.5,103,,['Breakfast & Brunch'],0,Washington DC
3,The Block,5.0,12,,"['Food Court', 'Bars', 'Asian Fusion']",0,Washington DC
4,The Alibi,4.5,441,$$,"['Pubs', 'Sandwiches', 'Barbeque']",0,Washington DC
...,...,...,...,...,...,...,...
235,Logan Tavern,4.0,872,$$,"['American (New)', 'Sports Bars', 'American (T...",0,Washington DC
236,Uzu,4.5,87,$$,['Ramen'],0,Washington DC
237,Akira Ramen & Izakaya DC,4.5,49,$$,['Ramen'],0,Washington DC
238,City Kabob and Curry House - Washington,4.5,138,$$,"['Pakistani', 'Indian', 'Halal']",0,Washington DC


In [8]:
DC_YW.query('michelin_star != 0')

Unnamed: 0,business_name,rating,review_count,price_range,category,michelin_star,region
17,Maydan,4.5,826,$$,"['Moroccan', 'Lebanese']",1,Washington DC
175,Tail Up Goat,4.5,677,$$$,"['Cocktail Bars', 'American (New)', 'Italian']",1,Washington DC


## Merge Chicago data

In [9]:
Chicago_YW = (pd
              .merge(left = Chicago_Yelp,
                     right = Chicago_Wiki,
                     how = "left",
                     left_on = "business_name",
                     right_on="Name")
              .drop(list(Chicago_Wiki.columns)[:-1],axis=1)
              .rename(columns={"2020":"michelin_star"}))

Chicago_YW['michelin_star'] = Chicago_YW.michelin_star.fillna(0)
Chicago_YW['michelin_star'] = Chicago_YW.michelin_star.replace('1 Michelin star',1)
Chicago_YW['michelin_star'] = Chicago_YW.michelin_star.replace('2 Michelin stars',2)
Chicago_YW['michelin_star'] = Chicago_YW.michelin_star.replace('3 Michelin stars',3)
Chicago_YW['region'] = 'Chicago'

Chicago_YW

Unnamed: 0,business_name,rating,review_count,price_range,category,michelin_star,region
0,Cheba Hut Toasted Subs - Wicker Park,5.0,13,,"['Sandwiches', 'American (Traditional)', 'Beer...",0,Chicago
1,Au Cheval,4.5,7220,$$,"['Bars', 'American (Traditional)']",0,Chicago
2,The Purple Pig,4.0,7211,$$,"['Tapas/Small Plates', 'Mediterranean', 'Break...",0,Chicago
3,Girl & the Goat,4.5,8688,$$$,"['American (New)', 'Bakeries', 'Coffee & Tea']",0,Chicago
4,Little Goat,4.0,4590,$$,['Diners'],0,Chicago
...,...,...,...,...,...,...,...
235,AVVIO,4.5,28,,"['Italian', 'Salad', 'Soup']",0,Chicago
236,Wok N’ Bao,4.5,19,,['Chinese'],0,Chicago
237,Farm Bar Lakeview,4.0,291,$$,"['Bars', 'American (New)']",0,Chicago
238,Robert Et Fils,5.0,6,,['French'],0,Chicago


In [10]:
Chicago_YW.query('michelin_star != 0')

Unnamed: 0,business_name,rating,review_count,price_range,category,michelin_star,region
46,Alinea,4.5,2265,$$$$,"['American (New)', 'Modern European']",3,Chicago
53,Boka,4.5,1452,$$$,['American (New)'],1,Chicago
228,Oriole,4.5,307,$$$$,['American (New)'],2,Chicago


## Merge NY data

In [11]:
NY_YW = (pd
         .merge(left = NY_Yelp,
                right = NY_Wiki,
                how = "left",
                left_on = "business_name",
                right_on="Name")
         .drop(list(NY_Wiki.columns)[:-1],axis=1)
         .rename(columns={"2020":"michelin_star"}))

NY_YW['michelin_star'] = NY_YW.michelin_star.fillna(0)
NY_YW['michelin_star'] = NY_YW.michelin_star.replace('1 Michelin star',1)
NY_YW['michelin_star'] = NY_YW.michelin_star.replace('2 Michelin stars',2)
NY_YW['michelin_star'] = NY_YW.michelin_star.replace('3 Michelin stars',3)
NY_YW['region'] = 'New York City'

NY_YW

Unnamed: 0,business_name,rating,review_count,price_range,category,michelin_star,region
0,Birria Landia Williamsburg,5.0,13,,"['Food Trucks', 'Tacos']",0,New York City
1,Jacob’s Pickles,4.0,4276,$$,"['Comfort Food', 'Southern', 'American (Tradit...",0,New York City
2,Amélie,4.5,2693,$$,"['French', 'Wine Bars']",0,New York City
3,The Osprey,4.0,227,$$,['American (New)'],0,New York City
4,Peak,5.0,16,,"['American (New)', 'Cocktail Bars', 'Venues & ...",0,New York City
...,...,...,...,...,...,...,...
235,Gotham West Market,4.0,553,$$,['Food Court'],0,New York City
236,Chick’nCone,4.5,149,$$,"['Chicken Shop', 'Waffles']",0,New York City
237,MOKYO,4.5,58,,"['Korean', 'Tapas/Small Plates', 'Asian Fusion']",0,New York City
238,Au Za’atar,4.0,948,$$,"['Wine Bars', 'Lebanese', 'Mediterranean']",0,New York City


In [12]:
NY_YW.query('michelin_star != 0')

Unnamed: 0,business_name,rating,review_count,price_range,category,michelin_star,region
21,Oxomoco,4.0,314,$$$,['Mexican'],1,New York City
171,Oxalis,4.5,131,$$$,"['French', 'American (New)']",1,New York City


## Merge SF data

In [13]:
SF_YW = (pd
         .merge(left = SF_Yelp,
                right = SF_Wiki,
                how = "left",
                left_on = "business_name",
                right_on="Name")
         .drop(list(SF_Wiki.columns)[:-1],axis=1)
         .rename(columns={"2019":"michelin_star"}))

SF_YW['michelin_star'] = SF_YW.michelin_star.fillna(0)
SF_YW['michelin_star'] = SF_YW.michelin_star.replace('1 Michelin star',1)
SF_YW['michelin_star'] = SF_YW.michelin_star.replace('2 Michelin stars',2)
SF_YW['michelin_star'] = SF_YW.michelin_star.replace('3 Michelin stars',3)
SF_YW['region'] = 'San Francisco'

SF_YW

Unnamed: 0,business_name,rating,review_count,price_range,category,michelin_star,region
0,Wawa Thai Food,5.0,16,,['Thai'],0,San Francisco
1,World Famous Hotboys,4.5,689,$$,['Southern'],0,San Francisco
2,Daughter Thai Kitchen,4.0,1143,$$,"['Thai', 'Cocktail Bars']",0,San Francisco
3,Farmhouse Kitchen Thai Cuisine,4.0,1110,$$,"['Thai', 'Cocktail Bars', 'Desserts']",0,San Francisco
4,Pomella,4.5,177,,['Middle Eastern'],0,San Francisco
...,...,...,...,...,...,...,...
225,Bardo Lounge & Supper Club,4.5,122,$$,"['Lounges', 'American (New)', 'Cocktail Bars']",0,San Francisco
226,Trabocco Kitchen & Cocktails,4.0,1317,$$,"['Italian', 'Cocktail Bars']",0,San Francisco
227,Sidewalk Street Food,4.0,40,$$,['Vietnamese'],0,San Francisco
228,Roli Roti Gourmet Rotisserie,4.0,89,$$,"['Specialty Food', 'Chicken Shop', 'Food Trucks']",0,San Francisco


In [14]:
SF_YW.query('michelin_star != 0')

Unnamed: 0,business_name,rating,review_count,price_range,category,michelin_star,region
172,Commis,4.5,1020,$$$$,['American (New)'],2,San Francisco


## Merge LA Data

In [15]:
LA_YW = (pd
         .merge(left = LA_Yelp,
                right = LA_Wiki,
                how = "left",
                left_on = "business_name",
                right_on="Name")
         .drop(list(LA_Wiki.columns)[:-1],axis=1)
         .rename(columns={"2019":"michelin_star"}))

LA_YW['michelin_star'] = LA_YW.michelin_star.fillna(0)
LA_YW['michelin_star'] = LA_YW.michelin_star.replace('1 Michelin star',1)
LA_YW['michelin_star'] = LA_YW.michelin_star.replace('2 Michelin stars',2)
LA_YW['michelin_star'] = LA_YW.michelin_star.replace('3 Michelin stars',3)
LA_YW['region'] = 'Los Angeles'

LA_YW

Unnamed: 0,business_name,rating,review_count,price_range,category,michelin_star,region
0,FishPop,5.0,36,,"['Fish & Chips', 'Pop-Up Restaurants']",0,Los Angeles
1,Running Goose,4.5,1077,$$,"['Tapas/Small Plates', 'Gastropubs', 'Latin Am...",0,Los Angeles
2,Howlin’ Ray’s,4.5,6543,$$,"['Southern', 'Chicken Shop', 'American (Tradit...",0,Los Angeles
3,Republique,4.0,5607,$$,"['French', 'Breakfast & Brunch', 'Cocktail Bars']",0,Los Angeles
4,The Rooftop at The Wayfarer Downtown LA,4.0,80,$$$,"['American (New)', 'Cocktail Bars', 'Tiki Bars']",0,Los Angeles
...,...,...,...,...,...,...,...
235,My Dung Sandwich Shop,4.5,134,$,"['Sandwiches', 'Vietnamese']",0,Los Angeles
236,Genwa Korean BBQ,4.5,2826,$$,"['Korean', 'Barbeque', 'Seafood']",0,Los Angeles
237,Hasiba,4.5,217,$$,"['Middle Eastern', 'Vegetarian', 'Kosher']",0,Los Angeles
238,Eataly Los Angeles,3.5,2141,$$,"['Specialty Food', 'Italian', 'Food Court']",0,Los Angeles


In [16]:
LA_YW.query('michelin_star != 0')

Unnamed: 0,business_name,rating,review_count,price_range,category,michelin_star,region
128,Osteria Mozza,4.0,2816,$$$$,"['Italian', 'Wine Bars']",1,Los Angeles
182,Providence,4.5,2680,$$$$,"['American (New)', 'Seafood']",2,Los Angeles


## Stack All Data

In [17]:
Yelp_Wiki = pd.concat([DC_YW,Chicago_YW,NY_YW,SF_YW,LA_YW],ignore_index=True)
Yelp_Wiki

Unnamed: 0,business_name,rating,review_count,price_range,category,michelin_star,region
0,Le Diplomate,4.5,3596,$$$,"['Brasseries', 'French', 'Breakfast & Brunch']",0,Washington DC
1,Gypsy Kitchen,4.5,70,,"['Tapas/Small Plates', 'Mediterranean']",0,Washington DC
2,Butter Me Up,4.5,103,,['Breakfast & Brunch'],0,Washington DC
3,The Block,5.0,12,,"['Food Court', 'Bars', 'Asian Fusion']",0,Washington DC
4,The Alibi,4.5,441,$$,"['Pubs', 'Sandwiches', 'Barbeque']",0,Washington DC
...,...,...,...,...,...,...,...
1185,My Dung Sandwich Shop,4.5,134,$,"['Sandwiches', 'Vietnamese']",0,Los Angeles
1186,Genwa Korean BBQ,4.5,2826,$$,"['Korean', 'Barbeque', 'Seafood']",0,Los Angeles
1187,Hasiba,4.5,217,$$,"['Middle Eastern', 'Vegetarian', 'Kosher']",0,Los Angeles
1188,Eataly Los Angeles,3.5,2141,$$,"['Specialty Food', 'Italian', 'Food Court']",0,Los Angeles


In [18]:
Yelp_Wiki.dtypes

business_name     object
rating           float64
review_count       int64
price_range       object
category          object
michelin_star      int64
region            object
dtype: object

In [19]:
# Read in the Michelin data
Michelin = pd.read_csv("Data/Michelin/Michelin.csv")
Michelin = Michelin.drop(['region','url'],axis=1)
Michelin

Unnamed: 0,name,michelin_guide
0,Himitsu,1
1,BlackSalt,1
2,Tico,1
3,Ottoman Taverna,1
4,1789,1
...,...,...
1424,Baar Baar,1
1425,Enoteca Maria,1
1426,Little Park,1
1427,I Sodi,1


In [20]:
df = (pd
      .merge(left = Yelp_Wiki,
             right = Michelin,
             how = "left",
             left_on = "business_name",
             right_on="name")
      .drop('name',axis=1))

df['michelin_guide'] = df.michelin_guide.fillna(0).astype(int)

df

Unnamed: 0,business_name,rating,review_count,price_range,category,michelin_star,region,michelin_guide
0,Le Diplomate,4.5,3596,$$$,"['Brasseries', 'French', 'Breakfast & Brunch']",0,Washington DC,0
1,Gypsy Kitchen,4.5,70,,"['Tapas/Small Plates', 'Mediterranean']",0,Washington DC,0
2,Butter Me Up,4.5,103,,['Breakfast & Brunch'],0,Washington DC,0
3,The Block,5.0,12,,"['Food Court', 'Bars', 'Asian Fusion']",0,Washington DC,0
4,The Alibi,4.5,441,$$,"['Pubs', 'Sandwiches', 'Barbeque']",0,Washington DC,0
...,...,...,...,...,...,...,...,...
1185,My Dung Sandwich Shop,4.5,134,$,"['Sandwiches', 'Vietnamese']",0,Los Angeles,0
1186,Genwa Korean BBQ,4.5,2826,$$,"['Korean', 'Barbeque', 'Seafood']",0,Los Angeles,0
1187,Hasiba,4.5,217,$$,"['Middle Eastern', 'Vegetarian', 'Kosher']",0,Los Angeles,0
1188,Eataly Los Angeles,3.5,2141,$$,"['Specialty Food', 'Italian', 'Food Court']",0,Los Angeles,0


In [21]:
# Export to a CSV file
df.to_csv('Data/Merged_Data.csv',index=False)