In [1]:
# Dependencies
from sklearn import tree
import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy import create_engine, func, literal_column

from flask import Flask, jsonify, render_template, redirect, request, flash, session

In [2]:
# MySQL Setup
#################################################

DB_CONN_URI_DEFAULT= "mysql://nchwjnkppsn6j4vj:s23q3vtsg2c0a4sv@o3iyl77734b9n3tg.cbetxkdyhwsb.us-east-1.rds.amazonaws.com:3306/zx309qzs0npjpbew?charset=utf8"

engine = create_engine(DB_CONN_URI_DEFAULT)


# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)
print(Base.metadata.tables.keys())

# Save reference to the table
Restaurant = Base.classes.restaurants
ZipRequest = Base.classes.ziprequests
Search_Information = Base.classes.search_information
Users = Base.classes.usersdb
CuisineType = Base.classes.cuisinetype

session = Session(engine)

dict_keys(['Zipcodes', 'cuisinetype', 'price', 'rating', 'restaurants', 'search_information', 'usersdb', 'zipcodes', 'ziprequests'])


In [3]:
# Grab current requested zip code
ziprequests = session.query(ZipRequest).order_by(ZipRequest.lastrequestdate.desc())
current_req = ziprequests[0].requestid
current_req

11747

In [4]:
# Filter restaurants list by requested zipcode
restaurants = session.query(Restaurant).filter(Restaurant.requestid == current_req)
restaurants_df = pd.read_sql("SELECT * FROM restaurants WHERE requestid=" + str(current_req), DB_CONN_URI_DEFAULT)

In [5]:
# Filtered restaurants
restaurants_df = restaurants_df[["cuisine", "delivery", "price", "rating", "reservations", "yelpid"]]
print(len(restaurants_df))
restaurants_df.head()

80


Unnamed: 0,cuisine,delivery,price,rating,reservations,yelpid
0,Italian,0,$$,4.5,1,cinque-terre-ristorante-italiano-huntington-st...
1,Thai,0,$$,4.5,0,new-chili-and-curry-hicksville-2
2,Japanese,0,$$,4.5,0,arata-sushi-syosset
3,Japanese,1,$$,4.5,0,sasa-hibachi-restaurant-farmingdale-2
4,Thai,0,$$,4.5,0,thai-flavors-kitchen-bethpage


In [6]:
# Getting YelpIDs of restaurants shown
selections = session.query(Search_Information).filter(Search_Information.userid==1).order_by(Search_Information.searchid.desc()).limit(12).all()

for select in selections:
    current_yelp = select.yelpid
    restaurants_df = restaurants_df[restaurants_df.yelpid != current_yelp]

In [7]:
# One-hot encoding cuisine
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(restaurants_df['cuisine'])
cuisine_transformed = encoder.transform(restaurants_df['cuisine'])

from keras.utils import to_categorical
one_hot_cuisine = to_categorical(cuisine_transformed)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [8]:
cuisines_unique = restaurants_df['cuisine'].unique()
cuisines = []
for cuisine in cuisines_unique:
    cuisines.append(cuisine.lower())
cuisines.sort()
cuisines

['chinese',
 'french',
 'greek',
 'italian',
 'japanese',
 'korean',
 'mediterranean',
 'mexican',
 'thai']

In [9]:
# Reshaping one_hot_cuisine
cuisine_dict = {}

for x in range(len(cuisines)):
    current_cuisine = cuisines[x]
    encoded_cuisine = []
    for y in range(len(restaurants_df)):
        encoded_cuisine.append(int(one_hot_cuisine[y][x]))
    cuisine_dict[current_cuisine] = encoded_cuisine
    
cuisine_dict

{'chinese': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  1],
 'french': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'greek': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0

In [10]:
# Encoding price
encoder.fit(restaurants_df['price'])
price_transformed = encoder.transform(restaurants_df['price'])

In [11]:
# DataFrame with encoded values
res_new = pd.DataFrame({
    'price': price_transformed,
    'rating': restaurants_df['rating'],
    'reservations': restaurants_df['reservations'],
    'delivery': restaurants_df['delivery'],
    'yelpid': restaurants_df['yelpid']
})

for key, value in cuisine_dict.items():
    res_new[key] = value
    
res_new.head()

Unnamed: 0,delivery,price,rating,reservations,yelpid,chinese,french,greek,italian,japanese,korean,mediterranean,mexican,thai
0,0,1,4.5,1,cinque-terre-ristorante-italiano-huntington-st...,0,0,0,1,0,0,0,0,0
2,0,1,4.5,0,arata-sushi-syosset,0,0,0,0,1,0,0,0,0
3,1,1,4.5,0,sasa-hibachi-restaurant-farmingdale-2,0,0,0,0,1,0,0,0,0
4,0,1,4.5,0,thai-flavors-kitchen-bethpage,0,0,0,0,0,0,0,0,1
5,0,1,4.5,0,vespa-italian-kitchen-and-bar-farmingdale-4,0,0,0,1,0,0,0,0,0


In [12]:
# Storing data for specified user (user 1)
results = session.query(Search_Information).filter(Search_Information.userid == 1).all()

data = []
for result in results:
    restaurant = {            
        'price': result.price,
        'rating': float(result.rating),
        'reservations': result.reservations,
        'delivery': result.delivery,
        'cuisine': result.cuisine,
        'like': result.like,
        'yelpid': result.yelpid
    }
    data.append(restaurant)


df = pd.DataFrame(data)
print(len(df))
df.head()

822


Unnamed: 0,cuisine,delivery,like,price,rating,reservations,yelpid
0,Japanese,0,1,$$$$,3.0,0,meo-hibachi-sushi-steakhouse-fresh-meadows
1,Korean,0,1,$$,4.0,0,picnic-garden-edison
2,Korean,1,1,$$,4.5,0,hanok-tofu-house-east-brunswick
3,Italian,0,1,$$$,4.0,0,novita-bistro-and-lounge-metuchen
4,Mediterranean,0,1,$$,4.0,0,the-orchid-restaurant-metuchen


In [13]:
# One-hot encoding cuisine
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(df['cuisine'])
cuisine_transformed = encoder.transform(df['cuisine'])

from keras.utils import to_categorical
one_hot_cuisine = to_categorical(cuisine_transformed)

In [14]:
cuisines_unique = df['cuisine'].unique()
cuisines = []
for cuisine in cuisines_unique:
    cuisines.append(cuisine.lower())
cuisines.sort()
cuisines

['chinese',
 'french',
 'greek',
 'italian',
 'japanese',
 'korean',
 'mediterranean',
 'mexican',
 'thai']

In [15]:
# Reshaping one_hot_cuisine
cuisine_dict = {}

for x in range(len(cuisines)):
    current_cuisine = cuisines[x]
    encoded_cuisine = []
    for y in range(len(df)):
        encoded_cuisine.append(int(one_hot_cuisine[y][x]))
    cuisine_dict[current_cuisine] = encoded_cuisine
    
cuisine_dict

{'chinese': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  0,
  1,

In [16]:
# Encoding price
encoder.fit(df['price'])
price_transformed = encoder.transform(df['price'])

In [17]:
# DataFrame with encoded values
df_new = pd.DataFrame({
    'price': price_transformed,
    'rating': df['rating'],
    'reservations': df['reservations'],
    'delivery': df['delivery'],
    'like': df['like'],
    'yelpid': df['yelpid']
})

for key, value in cuisine_dict.items():
    df_new[key] = value
    
df_new.head()

Unnamed: 0,delivery,like,price,rating,reservations,yelpid,chinese,french,greek,italian,japanese,korean,mediterranean,mexican,thai
0,0,1,3,3.0,0,meo-hibachi-sushi-steakhouse-fresh-meadows,0,0,0,0,1,0,0,0,0
1,0,1,1,4.0,0,picnic-garden-edison,0,0,0,0,0,1,0,0,0
2,1,1,1,4.5,0,hanok-tofu-house-east-brunswick,0,0,0,0,0,1,0,0,0
3,0,1,2,4.0,0,novita-bistro-and-lounge-metuchen,0,0,0,1,0,0,0,0,0
4,0,1,1,4.0,0,the-orchid-restaurant-metuchen,0,0,0,0,0,0,1,0,0


In [18]:
# X & y values
X = df_new.loc[:, (df_new.columns != 'like') & (df_new.columns != 'yelpid')]
y = df_new['like']

In [19]:
# Splitting data into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=40)

In [20]:
# Fitting data to Random Forest
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=33)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.8155339805825242

In [21]:
# Predicting data
X_res = res_new.loc[:, res_new.columns != 'yelpid']
predictions = rf.predict(X_res)
predictions

array([1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
       1, 0, 1])

In [22]:
res_new["predictions"] = predictions
res_new.head()

Unnamed: 0,delivery,price,rating,reservations,yelpid,chinese,french,greek,italian,japanese,korean,mediterranean,mexican,thai,predictions
0,0,1,4.5,1,cinque-terre-ristorante-italiano-huntington-st...,0,0,0,1,0,0,0,0,0,1
2,0,1,4.5,0,arata-sushi-syosset,0,0,0,0,1,0,0,0,0,1
3,1,1,4.5,0,sasa-hibachi-restaurant-farmingdale-2,0,0,0,0,1,0,0,0,0,1
4,0,1,4.5,0,thai-flavors-kitchen-bethpage,0,0,0,0,0,0,0,0,1,1
5,0,1,4.5,0,vespa-italian-kitchen-and-bar-farmingdale-4,0,0,0,1,0,0,0,0,0,0


In [23]:
# Selecting random restaurant with predictions = 1 
yesses = res_new[res_new['predictions'] == 1]
random_res = yesses.sample(n=1)
yelp_rec = str(random_res['yelpid'])
yelp_rec = yelp_rec.split(" ")[4].split("\n")[0]
yelp_rec

'three-brothers-pizzeria-and-restaurant-bethpage'