In [1]:
import pandas as pd
import numpy as np

In [2]:
yelp = pd.read_csv('Python-Yelp-Final-Project/yelp_academic_dataset_business.csv',
                   usecols = ["categories","city","latitude","longitude","name","review_count","stars","state"])

In [3]:
no_stars = pd.read_csv('Python-Yelp-Final-Project/yelp_business_official_test_empty.csv')
no_stars

Unnamed: 0,business_id,name,neighborhood,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories
0,1,"""Zingerman's Delicatessen""",,"""422 Detroit St""",Ann Arbor,MI,48104,42.284682,-83.745071,,1754,1,Delis;Breakfast & Brunch;Sandwiches;Restaurants
1,2,"""A & R Auto Care""",,"""1202 N Cannon Blvd""",Kannapolis,NC,28083,35.510807,-80.608472,,1,1,Automotive;Towing
2,3,"""Starbucks""",,"""1135 Washington Blvd""",Ogden,UT,84404,41.245215,-111.970461,,21,1,Food;Coffee & Tea
3,4,"""Starbucks""",,"""5210 S Cicero Ave""",Chicago,IL,60638,41.798023,-87.743579,,2,1,Food;Coffee & Tea
4,5,"""Starbucks""",,"""4200 Conroy Rd""",Orlando,FL,32839,28.485466,-81.432003,,30,1,Food;Coffee & Tea
5,6,"""The Tin Fox""",,"""2616 Monroe St""",Madison,WI,53711,43.057715,-89.42837,,7,1,American (New);Restaurants;Coffee & Tea;Food;N...
6,7,"""Working Draft Beer Company""",,"""1129 E Wilson St""",Madison,WI,53703,43.083359,-89.365438,,24,1,Food;Breweries
7,8,"""Il Covo""",,"""585 College Street""",Toronto,ON,M6G 1B2,43.655166,-79.413312,,21,1,Restaurants;Italian
8,9,"""Hawaii Nails & Spa""",,"""1642 Bloor Street W""",Toronto,ON,M6P 1A7,43.655774,-79.456633,,4,1,Beauty & Spas;Day Spas;Nail Salons
9,10,"""Radiant Acupuncture""",,"""572 Bloor Street W""",Toronto,ON,M6G 1K1,43.665242,-79.412033,,1,1,Day Spas;Beauty & Spas;Health & Medical;Acupun...


In [4]:
#split categories up into separate columns instead of comma separated all in the same column
yelp2 = pd.concat([yelp,yelp.categories.str.split(',',expand=True)],1)
#drop old categories column
yelp2 = yelp2.drop(['categories'],axis = 1)
#take wide data and make it long by grouping category columns to one 
yelp_long = pd.melt(yelp2,id_vars = ["city","latitude","longitude","name","review_count","stars","state"],value_name = 'category')
#get rid of variable column
yelp_long = yelp_long.drop(['variable'],axis = 1)
#strip white spaces from category column
yelp_long.category = yelp_long.category.str.strip()

In [5]:
#remove categories that are blank from the str split function earlier
yelp_long = yelp_long[pd.notnull(yelp_long.category)]
yelp_long.index = range(len(yelp_long))

In [6]:
yelp_long_cities = yelp_long.groupby(['city','category']).agg({'stars':np.mean})

In [9]:
#print(sum(yelp_long.city.isin(['Kannapolis'])))
for index, row in no_stars.iterrows():
    categories = row['categories'].split(';')
    # First Method is all categories and the same city
    mask = yelp_long.city.isin([row['city']])  & yelp_long.category.isin(categories)
    star_avg = yelp_long[mask].stars.mean()
    # Second Method is all categories and the same State only if first is NaN
    if(pd.isna(star_avg)):
        mask =  yelp_long.state.isin([row['state']]) & yelp_long.category.isin(categories)
        star_avg = yelp_long[mask].stars.mean()
        print("State avg used")
        # Third Method is just categories not location if first and second are NaN
        if(pd.isna(star_avg)):
            mask =  yelp_long.category.isin(categories)
            star_avg = yelp_long[mask].stars.mean()
            print("general avg used")
    else:
        print ('City avg used')
    print(row['business_id'],":",star_avg)
    no_stars.at[index,'stars'] = round(star_avg,2)
    

State avg used
general avg used
1 : 3.460834754662194
City avg used
2 : 3.5
State avg used
general avg used
3 : 3.6230398778410757
State avg used
4 : 3.5539473684210527
State avg used
5 : 4.0
City avg used
6 : 3.5893801965230536
City avg used
7 : 3.7524916943521593
City avg used
8 : 3.4271915882137183
City avg used
9 : 3.607265774378585
City avg used
10 : 3.7180690072639226


In [8]:
#Final dataframe with replaced Star Values
no_stars

Unnamed: 0,business_id,name,neighborhood,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories
0,1,"""Zingerman's Delicatessen""",,"""422 Detroit St""",Ann Arbor,MI,48104,42.284682,-83.745071,3.46,1754,1,Delis;Breakfast & Brunch;Sandwiches;Restaurants
1,2,"""A & R Auto Care""",,"""1202 N Cannon Blvd""",Kannapolis,NC,28083,35.510807,-80.608472,3.5,1,1,Automotive;Towing
2,3,"""Starbucks""",,"""1135 Washington Blvd""",Ogden,UT,84404,41.245215,-111.970461,3.62,21,1,Food;Coffee & Tea
3,4,"""Starbucks""",,"""5210 S Cicero Ave""",Chicago,IL,60638,41.798023,-87.743579,3.55,2,1,Food;Coffee & Tea
4,5,"""Starbucks""",,"""4200 Conroy Rd""",Orlando,FL,32839,28.485466,-81.432003,4.0,30,1,Food;Coffee & Tea
5,6,"""The Tin Fox""",,"""2616 Monroe St""",Madison,WI,53711,43.057715,-89.42837,3.59,7,1,American (New);Restaurants;Coffee & Tea;Food;N...
6,7,"""Working Draft Beer Company""",,"""1129 E Wilson St""",Madison,WI,53703,43.083359,-89.365438,3.75,24,1,Food;Breweries
7,8,"""Il Covo""",,"""585 College Street""",Toronto,ON,M6G 1B2,43.655166,-79.413312,3.43,21,1,Restaurants;Italian
8,9,"""Hawaii Nails & Spa""",,"""1642 Bloor Street W""",Toronto,ON,M6P 1A7,43.655774,-79.456633,3.61,4,1,Beauty & Spas;Day Spas;Nail Salons
9,10,"""Radiant Acupuncture""",,"""572 Bloor Street W""",Toronto,ON,M6G 1K1,43.665242,-79.412033,3.72,1,1,Day Spas;Beauty & Spas;Health & Medical;Acupun...
