#### **Collaborative Filtering Recommender Model**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pyarrow
import fastparquet
from scipy import sparse
from surprise import Reader
from surprise import Dataset
from surprise import BaselineOnly
from surprise import KNNBaseline
from surprise import SVD
from surprise import SVDpp
from surprise.model_selection import GridSearchCV
from surprise.model_selection import cross_validate, train_test_split
from surprise.model_selection import KFold
from surprise import KNNBasic
from surprise import accuracy

## Data Pre-Processing

In [None]:
yelp_ratings = pd.read_parquet('/Users/hetvipatel/Downloads/yelp_merged_food_tenth.parquet')

In [None]:
yelp_ratings.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars_bus,review_count,...,attributes,categories,review_id,user_id,stars_rev,useful,funny,cool,text,date
0,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,Medford,NJ,8055,39.876084,-74.816911,3.5,24,...,"{'AcceptsInsurance': None, 'AgesAllowed': None...","Italian, Sandwiches, Pizza, Restaurants",iDQ5_1nNKi8eP_Jmg-H0nQ,eN8tvWTA0JTjHAKTDA4nJQ,5,0,0,0,Their pizza is just like you'd get on the boar...,2015-07-17 16:28:45
1,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,Medford,NJ,8055,39.876084,-74.816911,3.5,24,...,"{'AcceptsInsurance': None, 'AgesAllowed': None...","Italian, Sandwiches, Pizza, Restaurants",VzBUyylMGwAS06tHGQgMPg,8sOO-SmQjlZ6ZjIcT3na_Q,2,0,0,0,Good food but get a 1 star for the price . Pai...,2014-11-01 00:40:29
2,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,Medford,NJ,8055,39.876084,-74.816911,3.5,24,...,"{'AcceptsInsurance': None, 'AgesAllowed': None...","Italian, Sandwiches, Pizza, Restaurants",7xwJOw-MlHkokFH3q14XSA,HKqyy_Lranv7_8eUFu80xQ,3,1,1,1,"Wandered in here by chance, first pizza place ...",2014-10-15 00:50:16
3,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,Medford,NJ,8055,39.876084,-74.816911,3.5,24,...,"{'AcceptsInsurance': None, 'AgesAllowed': None...","Italian, Sandwiches, Pizza, Restaurants",_HSQfjwRb0Xo0DP-_p4IEw,QVby2V284huDCDY6GOdL9Q,5,0,0,0,Best pizza in Medford! Always consistent and ...,2018-10-02 22:31:19
4,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,Medford,NJ,8055,39.876084,-74.816911,3.5,24,...,"{'AcceptsInsurance': None, 'AgesAllowed': None...","Italian, Sandwiches, Pizza, Restaurants",jQ6nIsngp_49lzudG4UjDA,XUos3vImZ6uz2ByFbu2AWw,3,0,0,0,I just stop in for the slices. I prefer the s...,2011-03-02 17:43:55


In [None]:
# Converting to CSV for future steps

yelp_ratings.to_csv('/Users/hetvipatel/Downloads/yelp_merged_food_tenth_csv.csv')

In [None]:
# renaming columns

yelp_ratings.rename(columns = {'stars_bus':'stars_x'}, inplace = True)
yelp_ratings.rename(columns = {'stars_rev':'stars_y'}, inplace = True)

In [None]:
# Reading CSV file

yelp_ratings = pd.read_csv('/Users/hetvipatel/Downloads/yelp_merged_food_tenth_csv.csv',
                           usecols = ['business_id', 'user_id', 
                                      'address', 'name',
                                      'stars_rev', 'date' ])

In [None]:
# Renaming columns

yelp_ratings.rename(columns = {'stars_bus':'stars_x'}, inplace = True)
yelp_ratings.rename(columns = {'stars_rev':'stars_y'}, inplace = True)

In [None]:
yelp_ratings.head()

Unnamed: 0,business_id,name,address,user_id,stars_y,date
0,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,eN8tvWTA0JTjHAKTDA4nJQ,5,2015-07-17 16:28:45
1,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,8sOO-SmQjlZ6ZjIcT3na_Q,2,2014-11-01 00:40:29
2,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,HKqyy_Lranv7_8eUFu80xQ,3,2014-10-15 00:50:16
3,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,QVby2V284huDCDY6GOdL9Q,5,2018-10-02 22:31:19
4,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,XUos3vImZ6uz2ByFbu2AWw,3,2011-03-02 17:43:55


In [None]:
yelp_ratings.isna().sum()

business_id      0
name             0
address        962
user_id          0
stars_y          0
date             0
dtype: int64

In [None]:
yelp_ratings = yelp_ratings.dropna()
yelp_ratings.isna().sum()

business_id    0
name           0
address        0
user_id        0
stars_y        0
date           0
dtype: int64

In [None]:
yelp_ratings.dtypes

business_id    object
name           object
address        object
user_id        object
stars_y         int64
date           object
dtype: object

In [None]:
yelp_ratings['stars_y'] = yelp_ratings['stars_y'].astype('int_')
yelp_ratings['user_id'] = yelp_ratings['user_id'].astype('str')
yelp_ratings['business_id'] = yelp_ratings['business_id'].astype('str')
yelp_ratings['date'] = yelp_ratings['date'].astype('str')

In [None]:
yelp_ratings.dtypes

business_id    object
name           object
address        object
user_id        object
stars_y         int64
date           object
dtype: object

In [None]:
yelp_ratings.head()

Unnamed: 0,business_id,name,address,user_id,stars_y,date
0,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,eN8tvWTA0JTjHAKTDA4nJQ,5,2015-07-17 16:28:45
1,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,8sOO-SmQjlZ6ZjIcT3na_Q,2,2014-11-01 00:40:29
2,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,HKqyy_Lranv7_8eUFu80xQ,3,2014-10-15 00:50:16
3,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,QVby2V284huDCDY6GOdL9Q,5,2018-10-02 22:31:19
4,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd,XUos3vImZ6uz2ByFbu2AWw,3,2011-03-02 17:43:55


In [None]:
reader = Reader()

yelp_rating_data = Dataset.load_from_df(yelp_ratings[['user_id', 
                                                      'business_id',
                                                      'stars_y']], 
                                                      reader)

In [None]:
train_df, test_df = train_test_split(yelp_rating_data, 
                                     test_size=.2)

#80-20 split

In [None]:
# 1.2559 RSME -- not completely accurate

svd = SVD()

cross_validate(svd, yelp_rating_data, measures=['RMSE', 'MAE'], cv=5)

{'test_rmse': array([1.25597556, 1.25454089, 1.2553304 , 1.25607579, 1.25884932]),
 'test_mae': array([1.00658321, 1.00566603, 1.00663482, 1.00793106, 1.00866624]),
 'fit_time': (25.82894206047058,
  26.412113189697266,
  25.597191095352173,
  26.270920991897583,
  25.801610946655273),
 'test_time': (0.5965480804443359,
  0.5652680397033691,
  0.5678658485412598,
  0.5707650184631348,
  0.5689401626586914)}

In [None]:
svd_model_trained = svd.fit(train_df)

In [None]:
# user_id, business_id that the user might rate, rating given by the user based on what other's have rated
# we can see that this user gave 5.0 for this restaurant


svd_model_trained.predict('9m9mkjkyo4HGFUWgugewHA', 
                          '2O2K6SXPWv56amqxCECd4w', 
                          5.0)

Prediction(uid='9m9mkjkyo4HGFUWgugewHA', iid='2O2K6SXPWv56amqxCECd4w', r_ui=5.0, est=3.84598666397008, details={'was_impossible': False})

## Utility Matrix/Collab Filtering Method

In [None]:
from surprise import SVD
from surprise import accuracy
from surprise import Reader
from surprise import Dataset
from surprise import BaselineOnly


In [None]:
df_review = yelp_ratings[['user_id','business_id','stars_y', 'date']]
df_review

Unnamed: 0,user_id,business_id,stars_y,date
0,eN8tvWTA0JTjHAKTDA4nJQ,VME7Zez9J-FL7cy2H_eIdA,5,2015-07-17 16:28:45
1,8sOO-SmQjlZ6ZjIcT3na_Q,VME7Zez9J-FL7cy2H_eIdA,2,2014-11-01 00:40:29
2,HKqyy_Lranv7_8eUFu80xQ,VME7Zez9J-FL7cy2H_eIdA,3,2014-10-15 00:50:16
3,QVby2V284huDCDY6GOdL9Q,VME7Zez9J-FL7cy2H_eIdA,5,2018-10-02 22:31:19
4,XUos3vImZ6uz2ByFbu2AWw,VME7Zez9J-FL7cy2H_eIdA,3,2011-03-02 17:43:55
...,...,...,...,...
415061,iJnzniiM5pTcD0b5PneTfg,TZEfDSO2wRC6df4MIsQPsg,4,2017-07-11 00:58:19
415062,oqgc6uPpV1g7uwrWQzcV2Q,TZEfDSO2wRC6df4MIsQPsg,4,2019-02-10 17:40:21
415063,dbLz7YfH5Fy6AAB-5-vDyA,TZEfDSO2wRC6df4MIsQPsg,3,2016-03-18 14:53:46
415064,S9IMF5MkJR949hZtn88X4Q,TZEfDSO2wRC6df4MIsQPsg,4,2016-08-28 18:03:50


In [None]:
restaurant = yelp_ratings[['business_id', 'name', 'address']]
restaurant

Unnamed: 0,business_id,name,address
0,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd
1,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd
2,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd
3,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd
4,VME7Zez9J-FL7cy2H_eIdA,Branco's Pizza,428 Stokes Rd
...,...,...,...
415061,TZEfDSO2wRC6df4MIsQPsg,Zaxby's Chicken Fingers & Buffalo Wings,8810 Wesleyan Rd
415062,TZEfDSO2wRC6df4MIsQPsg,Zaxby's Chicken Fingers & Buffalo Wings,8810 Wesleyan Rd
415063,TZEfDSO2wRC6df4MIsQPsg,Zaxby's Chicken Fingers & Buffalo Wings,8810 Wesleyan Rd
415064,TZEfDSO2wRC6df4MIsQPsg,Zaxby's Chicken Fingers & Buffalo Wings,8810 Wesleyan Rd


In [None]:
combined_business_data = pd.merge(df_review, restaurant, on='business_id')
combined_business_data

Unnamed: 0,user_id,business_id,stars_y,date,name,address
0,eN8tvWTA0JTjHAKTDA4nJQ,VME7Zez9J-FL7cy2H_eIdA,5,2015-07-17 16:28:45,Branco's Pizza,428 Stokes Rd
1,eN8tvWTA0JTjHAKTDA4nJQ,VME7Zez9J-FL7cy2H_eIdA,5,2015-07-17 16:28:45,Branco's Pizza,428 Stokes Rd
2,eN8tvWTA0JTjHAKTDA4nJQ,VME7Zez9J-FL7cy2H_eIdA,5,2015-07-17 16:28:45,Branco's Pizza,428 Stokes Rd
3,eN8tvWTA0JTjHAKTDA4nJQ,VME7Zez9J-FL7cy2H_eIdA,5,2015-07-17 16:28:45,Branco's Pizza,428 Stokes Rd
4,eN8tvWTA0JTjHAKTDA4nJQ,VME7Zez9J-FL7cy2H_eIdA,5,2015-07-17 16:28:45,Branco's Pizza,428 Stokes Rd
...,...,...,...,...,...,...
207022715,DsBghdbpxFIAl6jApweRaQ,TZEfDSO2wRC6df4MIsQPsg,3,2019-01-21 15:21:16,Zaxby's Chicken Fingers & Buffalo Wings,8810 Wesleyan Rd
207022716,DsBghdbpxFIAl6jApweRaQ,TZEfDSO2wRC6df4MIsQPsg,3,2019-01-21 15:21:16,Zaxby's Chicken Fingers & Buffalo Wings,8810 Wesleyan Rd
207022717,DsBghdbpxFIAl6jApweRaQ,TZEfDSO2wRC6df4MIsQPsg,3,2019-01-21 15:21:16,Zaxby's Chicken Fingers & Buffalo Wings,8810 Wesleyan Rd
207022718,DsBghdbpxFIAl6jApweRaQ,TZEfDSO2wRC6df4MIsQPsg,3,2019-01-21 15:21:16,Zaxby's Chicken Fingers & Buffalo Wings,8810 Wesleyan Rd


In [None]:
combined_business_data.groupby('business_id')['stars_y'].count().sort_values(ascending=False).head()

business_id
ytynqOUb3hjKeJfRj5Tshw    33385284
dsfRniRgfbDjC8os848B6A     9168784
RQAF6a0akMiot5lZZnMNNw     7425625
SZU9c8V2GuREDN5KgyHFJw     5973136
vN6v8m4DO45Z4pp8yxxF_w     4494400
Name: stars_y, dtype: int64

In [None]:
# see the NAME of the most popular restaurant
Filter = combined_business_data['business_id'] == 'TZEfDSO2wRC6df4MIsQPsg'
print("Name: ", combined_business_data[Filter]['name'].unique())
print("Address:", combined_business_data[Filter]['address'].unique())

Name:  ["Zaxby's Chicken Fingers & Buffalo Wings"]
Address: ['8810 Wesleyan Rd']


### Utility Matrix: User-Restaurant Matrix

Matrix contains users, restaurants, and rating each user gave to each restaurant.

If user did not rate a restaurant, it's blank.

In [None]:
rating_crosstab = combined_business_data.pivot_table(values='stars_y', index='user_id', columns='name', fill_value=0)
rating_crosstab.head()

name,1 Night Stand,10 Barrel Brewing - Boise,10 Torr Distilling and Brewing,1010 Pizza & Grill,"1860 Saloon, Game Room, & Hardshell Café",1864 Tavern,1st NE International Market,2 Alexs Spanish Cuisine,2 Fat Guys American Grill,2 Little Piggys BBQ and Catering,...,Zoup!,Zudar's Bourbon St Bar & Grill,Zushi Dozo,Zushi Sushi & Ramen,a.kitchen,eegee's,honeygrow,il Fustino,sweetFrog Premium Frozen Yogurt,éclair délicieux
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
---2PmXbF47D870stH1jqA,0,0.0,0.0,0.0,0,0.0,0,0,0,0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0
---UgP94gokyCDuB5zUssA,0,0.0,0.0,0.0,0,0.0,0,0,0,0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0
--0FNOzZkEQlz8WzS3WttQ,0,0.0,0.0,0.0,0,0.0,0,0,0,0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0
--2bpE5vyR-2hAP7sZZ4lA,0,0.0,0.0,0.0,0,0.0,0,0,0,0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0
--2tyArRmSoyKx5r-FVG0A,0,0.0,0.0,0.0,0,0.0,0,0,0,0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0


In [None]:
# shape of the Utility matrix (original matrix) 
rating_crosstab.shape

# Transpose the Utility matrix
X = rating_crosstab.values.T
X.shape

(3313, 267776)

In [None]:
import sklearn
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import accuracy_score


SVD = TruncatedSVD(n_components=12, random_state=17)
result_matrix = SVD.fit_transform(X)
result_matrix.shape

(3313, 12)

In [None]:
# PearsonR coef 
corr_matrix = np.corrcoef(result_matrix)
corr_matrix.shape

(3313, 3313)

In [None]:
# get the index of the popular restaurant
restaurant_names = rating_crosstab.columns
restaurants_list = list(restaurant_names)

popular_rest = restaurants_list.index("Zaxby's Chicken Fingers & Buffalo Wings")
print("index of the popular restaurant: ", popular_rest) 

index of the popular restaurant:  3292


In [None]:
# restaurant of interest 
corr_popular_rest = corr_matrix[popular_rest]
corr_popular_rest.shape  

(3313,)

In [None]:
# list of correlated restaurants to the above 

list(restaurant_names[(corr_popular_rest < 1.0) & (corr_popular_rest > 0.9)]) 

['ABC Pizza International',
 'Bagels Galore',
 'Banh Mi Saigon Baguette',
 "Barr's Music City Soul Food",
 "Billy's Seafood & Gyros",
 'Black Coffee & Tasty Waffles',
 'Bunga Raya Restaurant & Sushi Bar',
 'Burger King',
 'Canes Cafe and Corner Store',
 'Captain Crab-Cajun Seafood',
 'Carmel Chocolate Cafe',
 "Casey's",
 'China One',
 'Constant Smoke BBQ',
 'Countryside Country Club',
 "Crabby's Bar & Grill",
 'Crazy Burrito - Dunedin',
 'D&E Donuts',
 'Deep Blue Liquors',
 'Del Valle',
 'Dented Keg Ale Works',
 "Domino's Pizza",
 "Don Julio's Authentic Mexican Cuisine",
 'Edge City BBQ & Tap',
 "Eve's Family Restaurant",
 'Feather Sound Liquors',
 'Flavors Indian Cuisine',
 "Frank's Restaurant",
 'Hibachi Express Tarpon Springs',
 'Hibiscus',
 'Hip Hop Crab',
 'Hot Dogs on Main',
 'Hot N Crazy Crab',
 'Hurricanes Sports Bar',
 'IKEA Restaurant',
 'Ice Cream Theory',
 'Inn On The Gulf',
 "Jersey Mike's Subs",
 'Juans Mexican Grill',
 'KFC',
 "Kazu's Sushi",
 'Khan Murjan',
 'La Fiebre 

Neural Network