# Making Recommendations Based on Popularity

In [1]:
import numpy as np
import pandas as pd

In [2]:
# rating_final.csv
url = 'https://drive.google.com/file/d/1ptu4AlEXO4qQ8GytxKHoeuS1y4l_zWkC/view?usp=sharing' 
path = 'https://drive.google.com/uc?export=download&id='+url.split('/')[-2]
frame = pd.read_csv(path)

# chefmozcuisine.csv
url = 'https://drive.google.com/file/d/1S0_EGSRERIkSKW4D8xHPGZMqvlhuUzp1/view?usp=sharing' 
path = 'https://drive.google.com/uc?export=download&id='+url.split('/')[-2]
cuisine = pd.read_csv(path)

# 'geoplaces2.csv'
url = 'https://drive.google.com/file/d/1ee3ib7LqGsMUksY68SD9yBItRvTFELxo/view?usp=sharing' 
path = 'https://drive.google.com/uc?export=download&id='+url.split('/')[-2]
geodata = pd.read_csv(path, encoding = 'CP1252') # change encoding to 'mbcs' in Windows

In [3]:
frame.head(3)

Unnamed: 0,userID,placeID,rating,food_rating,service_rating
0,U1077,135085,2,2,2
1,U1077,135038,2,2,1
2,U1077,132825,2,2,2


In [4]:
frame['rating'].unique()

array([2, 1, 0], dtype=int64)

In [5]:
geodata.head(2)

Unnamed: 0,placeID,latitude,longitude,the_geom_meter,name,address,city,state,country,fax,...,alcohol,smoking_area,dress_code,accessibility,price,url,Rambience,franchise,area,other_services
0,134999,18.915421,-99.184871,0101000020957F000088568DE356715AC138C0A525FC46...,Kiku Cuernavaca,Revolucion,Cuernavaca,Morelos,Mexico,?,...,No_Alcohol_Served,none,informal,no_accessibility,medium,kikucuernavaca.com.mx,familiar,f,closed,none
1,132825,22.147392,-100.983092,0101000020957F00001AD016568C4858C1243261274BA5...,puesto de tacos,esquina santos degollado y leon guzman,s.l.p.,s.l.p.,mexico,?,...,No_Alcohol_Served,none,informal,completely,low,?,familiar,f,open,none


In [6]:
places =  geodata[['placeID', 'name']]
places.head()

Unnamed: 0,placeID,name
0,134999,Kiku Cuernavaca
1,132825,puesto de tacos
2,135106,El Rincón de San Francisco
3,132667,little pizza Emilio Portes Gil
4,132613,carnitas_mata


In [7]:
cuisine.head(3)

Unnamed: 0,placeID,Rcuisine
0,135110,Spanish
1,135109,Italian
2,135107,Latin_American


In [8]:
rating = pd.DataFrame(frame.groupby('placeID')['rating'].mean())
rating.sort_values("rating", ascending=False).head()

Unnamed: 0_level_0,rating
placeID,Unnamed: 1_level_1
132955,2.0
135034,2.0
134986,2.0
132922,1.833333
132755,1.8


In [9]:
frame.query("placeID==132955")

Unnamed: 0,userID,placeID,rating,food_rating,service_rating
934,U1004,132955,2,2,2
960,U1061,132955,2,2,2
996,U1059,132955,2,1,2
1014,U1097,132955,2,2,1
1080,U1096,132955,2,2,2


In [10]:
rating['rating_count'] = frame.groupby('placeID')['rating'].count()
rating.sort_values("rating_count", ascending=False).head()

Unnamed: 0_level_0,rating,rating_count
placeID,Unnamed: 1_level_1,Unnamed: 2_level_1
135085,1.333333,36
132825,1.28125,32
135032,1.178571,28
135052,1.28,25
132834,1.0,25


In [11]:
rating.sort_values('rating_count', ascending=False).head()

Unnamed: 0_level_0,rating,rating_count
placeID,Unnamed: 1_level_1,Unnamed: 2_level_1
135085,1.333333,36
132825,1.28125,32
135032,1.178571,28
135052,1.28,25
132834,1.0,25


In [12]:
# placeId of most popular place
top_popular_placeID = rating.sort_values('rating_count', ascending=False).head(1).index[0]

# name of the most popular place
places[places['placeID']==top_popular_placeID]

Unnamed: 0,placeID,name
121,135085,Tortas Locas Hipocampo


In [13]:
# cuisine of the most popular place
cuisine[cuisine['placeID']==top_popular_placeID]

Unnamed: 0,placeID,Rcuisine
44,135085,Fast_Food


In [14]:
# Challenge

In [15]:
frame.head(1)

Unnamed: 0,userID,placeID,rating,food_rating,service_rating
0,U1077,135085,2,2,2


In [16]:
n=5
new_frame = (
frame.groupby('placeID').agg(avg_rating=('rating','mean'),n_ratings=('userID','count'))
                        .query(f'n_ratings > {n}')
                        .nlargest(10,'avg_rating')
)
new_frame.head(3)

Unnamed: 0_level_0,avg_rating,n_ratings
placeID,Unnamed: 1_level_1,Unnamed: 2_level_1
134986,2.0,8
132922,1.833333,6
135055,1.714286,7


In [17]:
cuisine.head(1)

Unnamed: 0,placeID,Rcuisine
0,135110,Spanish


In [18]:
new_frame.merge(cuisine,how='left',on='placeID').merge(places,how='left',on='placeID').head(2)

Unnamed: 0,placeID,avg_rating,n_ratings,Rcuisine,name
0,134986,2.0,8,International,Restaurant Las Mananitas
1,132922,1.833333,6,Cafeteria,cafe punta del cielo


In [19]:
def n_top_restaurants(n, rest_names=places, rest_ratings=frame, rest_cuisine=cuisine):
  threshold = 12
  n_top=n
  temp_frame=(
  rest_ratings.groupby('placeID').agg(avg_rating=('rating','mean'),n_ratings=('userID','count'))
                        .query(f'n_ratings > {threshold}')
                        .nlargest(n_top-1,'avg_rating')
  )
  return temp_frame.merge(rest_cuisine,how='left',on='placeID').merge(rest_names,how='left',on='placeID')[['name','Rcuisine','avg_rating']]
  

In [20]:
n_top_restaurants(15)

Unnamed: 0,name,Rcuisine,avg_rating
0,Mariscos El Pescador,Seafood,1.692308
1,El Rincon de San Francisco,Mexican,1.666667
2,La Virreina,Mexican,1.533333
3,Cabana Huasteca,Mexican,1.461538
4,Restaurante la Gran Via,,1.461538
5,Restaurante Versalles,,1.428571
6,La Posada del Virrey,International,1.388889
7,Tortas Locas Hipocampo,Fast_Food,1.333333
8,Restaurante Pueblo Bonito,,1.307692
9,puesto de tacos,Mexican,1.28125
