# Food Recommendation System Based on Restaurants from Mexico

# Joel Parker

# Data from: https://www.mavenanalytics.io/data-playground?page=2&pageSize=5 (Restaurant Ratings)

# Import Packages

In [28]:
pip install ipywidgets

Collecting ipywidgets
  Using cached ipywidgets-7.7.1-py2.py3-none-any.whl (123 kB)
Collecting jupyterlab-widgets>=1.0.0
  Using cached jupyterlab_widgets-1.1.1-py3-none-any.whl (245 kB)
Collecting widgetsnbextension~=3.6.0
  Using cached widgetsnbextension-3.6.1-py2.py3-none-any.whl (1.6 MB)
Collecting ipython-genutils~=0.2.0
  Using cached ipython_genutils-0.2.0-py2.py3-none-any.whl (26 kB)
Collecting notebook>=4.4.1
  Using cached notebook-6.4.12-py3-none-any.whl (9.9 MB)
Collecting nbformat
  Using cached nbformat-5.4.0-py3-none-any.whl (73 kB)
Collecting terminado>=0.8.3
  Using cached terminado-0.15.0-py3-none-any.whl (16 kB)
Collecting Send2Trash>=1.8.0
  Using cached Send2Trash-1.8.0-py3-none-any.whl (18 kB)
Collecting jinja2
  Using cached Jinja2-3.1.2-py3-none-any.whl (133 kB)
Collecting nbconvert>=5
  Using cached nbconvert-6.5.0-py3-none-any.whl (561 kB)
Collecting argon2-cffi
  Using cached argon2_cffi-21.3.0-py3-none-any.whl (14 kB)
Collecting prometheus-client
  Using ca

In [29]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re
import ipywidgets as widgets
from IPython.display import display

# Get Data

In [2]:
restaurants = pd.read_csv('restaurant_data.csv')
restaurants

Unnamed: 0.1,Unnamed: 0,Restaurant_ID,Name,Cuisine
0,0,132560,Puesto de Gorditas,Regional
1,1,132572,Cafe Chaires,Cafeteria
2,2,132583,McDonalds Centro,American
3,3,132584,Gorditas Doña Tota,Mexican
4,4,132594,Tacos De Barbacoa Enfrente Del Tec,Mexican
...,...,...,...,...
107,107,135086,McDonalds Parque Tangamanga,Fast Food
108,108,135088,Cafeteria Cenidet,Cafeteria
109,109,135104,Vips,Mexican
110,110,135106,El Rincón De San Francisco,Mexican


In [3]:
restaurants.drop(['Unnamed: 0'], axis=1, inplace=True)

In [4]:
restaurants

Unnamed: 0,Restaurant_ID,Name,Cuisine
0,132560,Puesto de Gorditas,Regional
1,132572,Cafe Chaires,Cafeteria
2,132583,McDonalds Centro,American
3,132584,Gorditas Doña Tota,Mexican
4,132594,Tacos De Barbacoa Enfrente Del Tec,Mexican
...,...,...,...
107,135086,McDonalds Parque Tangamanga,Fast Food
108,135088,Cafeteria Cenidet,Cafeteria
109,135104,Vips,Mexican
110,135106,El Rincón De San Francisco,Mexican


# Cleaning Names

regex = will search through each name and look for any characters that aren't a space, digit, or a lowercase or uppercase letter and it's going to remove them 

In [11]:
def clean_cuisine(Cuisine):
   return re.sub("[^a-zA-Z0-9 ]", "", Cuisine)

In [12]:
restaurants["clean_cuisine"] = restaurants["Cuisine"].apply(clean_cuisine)

In [13]:
restaurants

Unnamed: 0,Restaurant_ID,Name,Cuisine,clean_cuisine
0,132560,Puesto de Gorditas,Regional,Regional
1,132572,Cafe Chaires,Cafeteria,Cafeteria
2,132583,McDonalds Centro,American,American
3,132584,Gorditas Doña Tota,Mexican,Mexican
4,132594,Tacos De Barbacoa Enfrente Del Tec,Mexican,Mexican
...,...,...,...,...
107,135086,McDonalds Parque Tangamanga,Fast Food,Fast Food
108,135088,Cafeteria Cenidet,Cafeteria,Cafeteria
109,135104,Vips,Mexican,Mexican
110,135106,El Rincón De San Francisco,Mexican,Mexican


# Creating a TFIDF Matrix

term frequency matrix = each column would be a unique term across your titles or names

inverse document frequency = helps the search engine find terms that are unique 
 
what you get at the end is basically a vector (set of numbers) for each movie that describes that title or name
    so when you run a search, the computer turns the title or name that we enter into the search bar into a set of numbers and compares that set of numbers to all the titles or names we already have in our data and finds which ones are the most similar

ngrams = is going to look at groups of two words that are consecutive in the title or name 

In [6]:
vectorizer = TfidfVectorizer(ngram_range=(1,2))

tfidf = vectorizer.fit_transform(restaurants["Cuisine"])

# Compute the similarities between a term that we enter and all restuarants in our list

search(cuisine) = takes the title or name we want to search for. So we want search for cuisine in this case 

query_vec = turns the search term into a set of numbers

similarity = we will find the similarities our data has with search term. It's going to compare our query term to each of the cuisines that we have and return how similar each are to the title or name 

indices = finds the titles or names that have the greatness similarity to our search term. It will find the 5 most similar title or names to our search term

results = it's going to index our data by these indices 
    [::-1] = it will reverse the results because the most similar results are last in the list

In [37]:
 def search(cuisine):
    #cuisine = "Fast Food"
    cuisine = clean_cuisine(cuisine)
    query_vec = vectorizer.transform([cuisine])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = restaurants.iloc[indices][::-1]
    return results

In [23]:
query_vec

<1x27 sparse matrix of type '<class 'numpy.float64'>'
	with 3 stored elements in Compressed Sparse Row format>

# We will see the similarities our data has with Fast Food

In [24]:
similarity

array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 1., 0., 0., 0., 0.])

In [25]:
results

Unnamed: 0,Restaurant_ID,Name,Cuisine,clean_cuisine
82,135046,Restaurante El Reyecito,Fast Food,Fast Food
6,132609,Pollo Frito Buenos Aires,Fast Food,Fast Food
105,135085,Tortas Locas Hipocampo,Fast Food,Fast Food
63,135021,Subway,Fast Food,Fast Food
79,135043,Pizza Clasica,Fast Food,Fast Food


# Building Interactive Search Box

we need to create an input widget & output widget which allows us to use the search box as needed

our input will be a dictionary

display(search) = it's going to search our set of titles or names for the names and going to display it into our output widget

observe = whenever something happens with the input, it's going to call the on_type method and when that event is called we are going to run the def on_type function

In [38]:
from tkinter.font import names


restaurants_input = widgets.Text(
    value="Fast Food",
    description="Cuisine:", 
    disabled=False
)

restaurants_list = widgets.Output()

def on_type(data):
    with restaurants_list:
        restaurants_list.clear_output()
        cuisine = data["new"]
        if len(cuisine) > 5:
            display(search(cuisine))

restaurants_input.observe(on_type, names='value')

display(restaurants_input, restaurants_list)

Text(value='Fast Food', description='Cuisine:')

Output()