In [2]:
import numpy as np
import pandas as pd
from fuzzywuzzy import process
from sklearn.neighbors import NearestNeighbors



In [5]:
restaurant_id = pd.read_csv("datasets/geoplaces2.csv",usecols=["placeID"],dtype={"placeID":"int32"}).sort_values(by="placeID").reset_index(drop=True)
restaurant_id

Unnamed: 0,placeID
0,132560
1,132561
2,132564
3,132572
4,132583
...,...
125,135088
126,135104
127,135106
128,135108


In [6]:
restaurant_record = pd.read_csv("datasets/chefmozcuisine.csv",usecols=["placeID","Rcuisine"],dtype={"placeID":"int32","Rcuisine":"str"}).sort_values(by="placeID")
restaurant_record = restaurant_record[restaurant_record.placeID.isin(restaurant_id.placeID)].reset_index(drop=True)
restaurant_record


Unnamed: 0,placeID,Rcuisine
0,132560,Regional
1,132572,Cafeteria
2,132583,American
3,132584,Mexican
4,132594,Mexican
...,...,...
107,135086,Burgers
108,135088,Cafeteria
109,135104,Mexican
110,135106,Mexican


In [12]:
restaurant_features = pd.concat([restaurant_record["Rcuisine"].str.get_dummies(sep=",")]).set_index(restaurant_record["placeID"])
restaurant_features
restaurant_features_all = restaurant_features.reset_index()

In [7]:
restaurant_full_record = pd.read_csv("datasets/geoplaces2.csv",usecols=["placeID","name","address"],dtype={"placeID":"int32","name":"str","address":"str","price":"str"},encoding='utf_8').sort_values(by="placeID").reset_index(drop=True)
restaurant_full_record = restaurant_full_record.merge(restaurant_record,on="placeID")
restaurant_full_record

Unnamed: 0,placeID,name,address,Rcuisine
0,132560,puesto de gorditas,frente al tecnologico,Regional
1,132572,Cafe Chaires,?,Cafeteria
2,132583,McDonalds Centro,Rayon sn col. Centro,American
3,132584,Gorditas Dona Tota,?,Mexican
4,132594,tacos de barbacoa enfrente del Tec,?,Mexican
...,...,...,...,...
107,135086,Mcdonalds Parque Tangamanga,Lateral Salvador Nava Martinez 3145,Burgers
108,135088,Cafeteria cenidet,Interior Internado Palmira SN,Cafeteria
109,135104,vips,?,Mexican
110,135106,El Rincon de San Francisco,Universidad 169,Mexican


In [8]:
restaurant_rating_record = pd.read_csv("datasets/rating_final.csv",usecols=["placeID","rating","userID"],dtype={"placeID":"int32","rating":"int32","userID":"str"})
restaurant_rating_record = restaurant_rating_record[restaurant_rating_record.placeID.isin(restaurant_id.placeID)]
restaurant_rating_df = restaurant_rating_record.groupby(by=["placeID"]).mean().reset_index()
restaurant_rating_df = restaurant_rating_df[restaurant_rating_df.placeID.isin(restaurant_record.placeID)]
restaurant_rating_df


Unnamed: 0,placeID,rating
0,132560,0.500000
3,132572,1.000000
4,132583,1.000000
5,132584,1.333333
6,132594,0.600000
...,...,...
124,135086,0.800000
125,135088,1.000000
126,135104,0.857143
127,135106,1.200000


In [13]:
restaurant_full_record = restaurant_full_record.merge(restaurant_rating_df,on="placeID")
restaurant_full_record

Unnamed: 0,placeID,name,address,Rcuisine,rating_x,rating_y
0,132560,puesto de gorditas,frente al tecnologico,Regional,0.500000,0.500000
1,132572,Cafe Chaires,?,Cafeteria,1.000000,1.000000
2,132583,McDonalds Centro,Rayon sn col. Centro,American,1.000000,1.000000
3,132584,Gorditas Dona Tota,?,Mexican,1.333333,1.333333
4,132594,tacos de barbacoa enfrente del Tec,?,Mexican,0.600000,0.600000
...,...,...,...,...,...,...
107,135086,Mcdonalds Parque Tangamanga,Lateral Salvador Nava Martinez 3145,Burgers,0.800000,0.800000
108,135088,Cafeteria cenidet,Interior Internado Palmira SN,Cafeteria,1.000000,1.000000
109,135104,vips,?,Mexican,0.857143,0.857143
110,135106,El Rincon de San Francisco,Universidad 169,Mexican,1.200000,1.200000


In [14]:
restaurant_df = restaurant_features.merge(restaurant_rating_df,on="placeID").sort_values(by="placeID").reset_index(drop=True)
restaurant_df = restaurant_df.drop(['placeID'], axis=1)
restaurant_df

Unnamed: 0,American,Armenian,Bakery,Bar,Bar_Pub_Brewery,Breakfast-Brunch,Burgers,Cafe-Coffee_Shop,Cafeteria,Chinese,...,International,Italian,Japanese,Mediterranean,Mexican,Pizzeria,Regional,Seafood,Vietnamese,rating
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0.500000
1,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1.000000
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1.000000
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1.333333
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0.600000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0.800000
108,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1.000000
109,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0.857143
110,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1.200000


In [89]:
model_cnn =  NearestNeighbors(algorithm='auto', n_neighbors=5)
model_cnn.fit(restaurant_df)
distances, indices = model_cnn.kneighbors(restaurant_df)

In [90]:
def get_index_from_name(name):
    return restaurant_full_record[restaurant_full_record["name"]==name].index.tolist()[0]

In [91]:
res_name = "sushi"
name=process.extractOne(res_name,restaurant_full_record['name'])[0]
found_id = get_index_from_name(name)
index_arr = []
for id in indices[found_id]:
    index_arr.append(id)
index_arr

[99, 44, 60, 23, 73]

In [92]:
res_name = "mcdonalds"
name=process.extractOne(res_name,restaurant_full_record['name'])[0]
name

'McDonalds Centro'

In [93]:
restaurant_full_record.loc[index_arr]

Unnamed: 0,placeID,name,address,Rcuisine,rating_x,rating_y
99,135072,Sushi Itto,Venustiano Carranza 1809 C Polanco,Japanese,1.25,1.25
44,132875,shi ro ie,?,Japanese,1.125,1.125
60,134999,Kiku Cuernavaca,Revolucion,Japanese,1.6,1.6
23,132766,Mikasa,Himno nacional esq. Blvd. Juarez,Japanese,0.666667,0.666667
73,135034,Michiko Restaurant Japones,Cordillera de Los Alpes 160 Lomas 2 Seccion,Japanese,2.0,2.0


In [298]:
name="American"
restaurant_features[restaurant_features[name]==1]

Unnamed: 0,placeID,American,Armenian,Bakery,Bar,Bar_Pub_Brewery,Breakfast-Brunch,Burgers,Cafe-Coffee_Shop,Cafeteria,...,Game,International,Italian,Japanese,Mediterranean,Mexican,Pizzeria,Regional,Seafood,Vietnamese
2,132583,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
33,132851,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
43,132872,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
50,132951,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
53,132958,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [288]:
restaurant_full_record[restaurant_full_record["name"]=="Subway"]

Unnamed: 0,placeID,name,address,Rcuisine
63,135021,Subway,Rio Mayo 17 Vista Hermosa,Fast_Food


In [391]:
res_name = "dominos"
idx=process.extractOne(res_name,restaurant_full_record['name'])
idx

('Dominos Pizza', 90, 41)