# Food Recommendation System

## Loading the Libraries and Dataset

In [2]:
import numpy as np
import pandas as pd

In [3]:
restaurants = pd.read_csv('sg_restos.csv')
reviews = pd.read_csv('sg_reviews.csv')

In [4]:
# merge reviews and restaurants datasets on StoreId
food = pd.merge(restaurants, reviews, on='StoreId')

food.head()

Unnamed: 0,StoreId,CompleteStoreName,FoodType,AverageRating,Reviewers,City,Location,uuid,createdAt,updatedAt,text,isAnonymous,reviewerId,replies,likeCount,isLiked,overall,restaurant_food,rider
0,cc4y,Homtang Thai (Balestier Road),Thai,4.9,(9),Singapore,,2e36a827-863b-4280-a363-99fbe72e327b,2024-03-02T13:47:39Z,2024-03-02T13:47:39Z,2nd time ordering. The cooking skills is defin...,False,s2css6eq,[],0.0,False,5,5,
1,cc4y,Homtang Thai (Balestier Road),Thai,4.9,(9),Singapore,,4d4aed03-f2ed-49a9-ad13-469e1cdaed75,2024-03-09T11:31:41Z,2024-03-09T11:31:41Z,fresh & delicious,False,sg67j6ko,[],0.0,False,5,5,
2,cc4y,Homtang Thai (Balestier Road),Thai,4.9,(9),Singapore,,50be0977-348d-4d87-8b7b-e3289a34e19e,2024-02-22T14:04:08Z,2024-02-22T14:04:08Z,Fairly new stall as of feb 2024. The fried por...,False,s2css6eq,[],0.0,False,5,5,
3,cc4y,Homtang Thai (Balestier Road),Thai,4.9,(9),Singapore,,b3a4686b-89d5-4019-9059-c081e6ba7b4f,2024-03-11T12:50:18Z,2024-03-11T12:50:18Z,👍 hope more items available at Foodpanda,False,sg67j6ko,[],0.0,False,5,5,
4,sh3w,Xin Mei Xiang Zheng Zong Lor Mee 新美香正宗卤面 (Bale...,Noodles,4.4,(100+),Singapore,,13bf100b-df03-42d3-baf3-93471833c66d,2023-12-25T01:01:04Z,2023-12-25T01:01:04Z,It's not that dalicious now.,False,sgg9llre,[],0.0,False,3,3,


In [5]:
print(food.columns)

Index(['StoreId', 'CompleteStoreName', 'FoodType', 'AverageRating',
       'Reviewers', 'City', 'Location', 'uuid', 'createdAt', 'updatedAt',
       'text', 'isAnonymous', 'reviewerId', 'replies', 'likeCount', 'isLiked',
       'overall', 'restaurant_food', 'rider'],
      dtype='object')


## Data Preprocessing
We will first pivot the data to create a user-item matrix for collaborative filtering.

In [6]:
# Create a pivot table of users and their ratings for collaborative filtering
user_ratings = food.pivot_table(index='reviewerId', columns='StoreId', values='overall')

# Fill missing values with 0 (since not all users rated all restaurants)
user_ratings = user_ratings.fillna(0)

user_ratings.head()

StoreId,a04x,a0bn,a0gk,a0j3,a0o3,a0pb,a0py,a0sa,a0so,a0u2,...,zx24,zxoi,zyeh,zykw,zym3,zyrf,zys3,zz32,zz4n,zzd0
reviewerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00b66b0f-75d4-4282-91bd-d6aa3c6e21cd,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
01523cb8-d52b-40af-8302-863f4fff5dc2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
015efa2a-4211-4826-8e99-f53c59d2109c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
01bbf538-eee8-464f-97de-7aa923d8cb5d,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
01ef6745-72de-40e6-8bc8-9abb41ed1267,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
from sklearn.metrics.pairwise import cosine_similarity

In [8]:
# Compute cosine similarity between restaurants based on user ratings
restaurant_similarity = cosine_similarity(user_ratings.T)

# Create a DataFrame to store similarity scores between restaurants
restaurant_similarity_df = pd.DataFrame(restaurant_similarity, index=user_ratings.columns, columns=user_ratings.columns)

# Function to get similar restaurants based on a single restaurant
def get_similar_restaurants(restaurant_id, n=5):
    similar_scores = restaurant_similarity_df[restaurant_id].sort_values(ascending=False)
    return similar_scores.iloc[1:n+1].index  # To exclude the restaurant itself

Next, we will recommend restaurants based on their metadata (like cuisine, location).

In [9]:
from sklearn.feature_extraction.text import CountVectorizer

In [10]:
# Use CountVectorizer but avoid converting to dense array
cv = CountVectorizer(stop_words='english', max_features=500)
sparse_matrix = cv.fit_transform(food['FoodType'])

In [11]:
import pickle

In [13]:
pickle.dump(food, open('sg_restos.pkl', 'wb'))

In [15]:
food['CompleteStoreName'].values

array(['Homtang Thai (Balestier Road)', 'Homtang Thai (Balestier Road)',
       'Homtang Thai (Balestier Road)', ...,
       'Claypot Little Brother砂煲小弟 (41 Joo Koon)',
       'Claypot Little Brother砂煲小弟 (41 Joo Koon)',
       'Claypot Little Brother砂煲小弟 (41 Joo Koon)'], dtype=object)

In [16]:
pickle.dump(food, open('sg_reviews.pkl', 'wb'))

In [17]:
pickle.dump(food.to_dict(), open('food_list.pkl', 'wb'))