In [14]:
import pandas as pd
import numpy as np
from re import sub
from ast import literal_eval
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

df = pd.read_csv('../data/dataset.csv', encoding='latin1')

In [15]:
df = df.iloc[:, 1:].copy()
df = df.dropna()

In [16]:
df['City'] = df['City'].str.capitalize()
df['Cuisine Style'] = df['Cuisine Style'].str.lower()
df['keyword'] = df['keyword'].str.lower()
df['keyword'] = df['keyword'].str.strip()

df['Cuisine Style'] = df['Cuisine Style'].apply(lambda x: ' '.join(x.split(',')).split())
df['keyword'] = df['keyword'].apply(lambda x: sub(' +', ' ', x))

In [17]:
df = df.sample(5000)
df.sample(10)

Unnamed: 0,Name,City,Cuisine Style,Ranking,Rating,Price Range,keyword
89980,Slurp Ramen Joint,Copenhagen,"[japanese, asian, soups, vegetarianfriendly, v...",122.0,4.5,casual,copenhagen casual vegetarian soups asian
65634,Mucha Nie Siada,Warsaw,[european],1155.0,5.0,cheap,warsaw cheap
97251,The First Lady,Copenhagen,"[european, bar, pub, scandinavian, danish]",895.0,4.0,casual,copenhagen casual pub bar northerneuropean
56511,Le Tre Piramidi,Rome,"[pizza, mediterranean, italian]",3836.0,4.0,casual,rome casual mediterranean pizza
95953,Thai Pot - Bedfordbury,London,"[asian, thai, vegetarianfriendly, veganoptions...",1882.0,4.0,casual,london casual vegetarian asian
100837,Trattoria Angelo,Rome,"[italian, pizza, mediterranean, vegetarianfrie...",2703.0,3.5,casual,rome casual vegetarian mediterranean pizza
6165,BENI's,Lisbon,"[italian, french, american, belgian, european]",3469.0,3.0,cheap,lisbon cheap northamerican westerneuropean
17528,Caffe Amore,Dublin,"[italian, pizza, cafe, mediterranean, european...",255.0,4.5,casual,dublin casual cafe vegetarian mediterranean pizza
7701,Bar La Santa,Madrid,"[cafe, delicatessen, bar, pub, mediterranean, ...",933.0,4.5,casual,madrid casual pub cafe bar mediterranean weste...
44978,Keyser Soze,Berlin,"[german, bar, cafe, european, centraleuropean,...",1135.0,3.5,casual,berlin casual cafe bar vegetarian centraleurop...


In [5]:
# Import CountVectorizer and create the count matrix
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(df['keyword'])

In [6]:
# Compute the Cosine Similarity matrix based on the count_matrix
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [7]:
# Reset index of our main DataFrame and construct reverse mapping as before
df = df.reset_index()
indices = pd.Series(df.index, index=df['Name'])

In [8]:
# Function that takes in movie title as input and outputs most similar movies
def get_recommendations(title, cosine_sim=cosine_sim, num=10):
    # Get the index of the movie that matches the title
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:num+1]

    # Get the restaurants indices
    restaurants_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return df['Name'].iloc[restaurants_indices]

In [9]:
df.head(15)

Unnamed: 0,index,Name,City,Cuisine Style,Ranking,Rating,Price Range,keyword
0,66689,Nass Restaurant,Barcelona,"[mediterranean, european, spanish, vegetarianf...",435.0,4.5,casual,barcelona casual vegetarian mediterranean west...
1,96832,The Coal Hole,London,"[bar, british, pub, vegetarianfriendly]",1150.0,4.0,casual,london casual pub bar vegetarian westerneuropean
2,10060,Bierhalle - CH Arkadia,Warsaw,"[bar, polish, european, centraleuropean]",950.0,3.5,casual,warsaw casual bar centraleuropean
3,56809,Le pebon d'or,Paris,[african],8404.0,4.0,casual,paris casual african
4,93649,TZOM,Berlin,"[african, italian, mediterranean, fusion]",2868.0,5.0,cheap,berlin cheap mediterranean african fusion
5,101403,Triadou Haussmann,Paris,"[french, bar, european]",6797.0,3.5,casual,paris casual bar westerneuropean
6,35154,Gefstiki Gonia,Athens,"[mediterranean, barbecue, greek, vegetarianfri...",58.0,4.5,cheap,athens cheap vegetarian mediterranean grill ea...
7,3550,Anise - Bar,London,"[indian, bar, pub]",7712.0,4.0,casual,london casual pub bar asian
8,23318,Copenhagen,Madrid,"[spanish, vegetarianfriendly, veganoptions, gl...",1221.0,4.0,casual,madrid casual vegetarian westerneuropean
9,13044,Briciole,Milan,"[italian, pizza, seafood, mediterranean, europ...",859.0,4.0,cheap,milan cheap vegetarian mediterranean seafood p...


In [10]:
restaurant_name = input('Restaurant Name:')
recommendations = get_recommendations(restaurant_name)
recommendations

Restaurant Name:Gaffurio


2843    Al Meazza Ristorante Pizzeria
3133      Pizzeria Trattoria S. Maria
444                  Pizzeria Del 333
731              Pizzeria Dell'Angelo
1845                  I Monelli Pizza
55                   Birreria Tortuga
226              Ristorante Da Teresa
297                         Just Cafe
309                            Gnulot
1013                      BebelMilano
Name: Name, dtype: object

In [11]:
df.loc[(df['Name'] == restaurant_name) | (df['Name'].isin(recommendations))]

Unnamed: 0,index,Name,City,Cuisine Style,Ranking,Rating,Price Range,keyword
11,34378,Gaffurio,Milan,"[italian, pizza]",5931.0,3.0,cheap,milan cheap pizza
55,10364,Birreria Tortuga,Milan,"[italian, brewpub]",5963.0,2.5,cheap,milan cheap
226,84230,Ristorante Da Teresa,Milan,[italian],5364.0,3.0,cheap,milan cheap
297,43581,Just Cafe,Milan,[italian],2362.0,4.5,cheap,milan cheap
309,35819,Gnulot,Milan,[italian],2142.0,5.0,cheap,milan cheap
444,75413,Pizzeria Del 333,Milan,"[italian, pizza, vegetarianfriendly]",784.0,4.5,cheap,milan cheap vegetarian pizza
731,75419,Pizzeria Dell'Angelo,Milan,"[italian, pizza, vegetarianfriendly]",1249.0,4.0,cheap,milan cheap vegetarian pizza
1013,9036,BebelMilano,Milan,[italian],2402.0,5.0,cheap,milan cheap
1845,40302,I Monelli Pizza,Milan,"[italian, pizza, mediterranean]",768.0,4.5,cheap,milan cheap mediterranean pizza
2843,2062,Al Meazza Ristorante Pizzeria,Milan,"[italian, pizza]",4996.0,3.5,cheap,milan cheap pizza
