In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
import csv

# Content-based extended review

In [2]:
df = pd.read_csv('zomato_restaurants.csv')
df.head()

Unnamed: 0,rest_index,rest_name,locality,cost_for_two,rating,votes,cuisines
0,0,Punjab Grill,"Orion Mall, Malleshwaram",2000,4.9,2058,"['North Indian', 'Mughlai']"
1,1,Byg Brewski Brewing Company,Sarjapur Road,1600,4.9,17034,"['Continental', 'North Indian', 'Italian', 'So..."
2,2,Brahmin's Coffee Bar,Basavanagudi,100,4.9,2711,['South Indian']
3,3,Taaza Thindi,Jayanagar,100,4.9,4152,['South Indian']
4,4,Santé Spa Cuisine,Indiranagar,1000,4.9,279,"['Healthy Food', 'Salad', 'Mediterranean']"


In [4]:
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df['cuisines'])

In [10]:
# Get features
feature_names = tf.get_feature_names()
print(len(feature_names))
print(feature_names)

1165
['afghan', 'african', 'african burger', 'american', 'american arabian', 'american asian', 'american bakery', 'american bbq', 'american beverages', 'american burger', 'american cafe', 'american chinese', 'american continental', 'american desserts', 'american european', 'american fast', 'american finger', 'american goan', 'american indonesian', 'american italian', 'american mediterranean', 'american mexican', 'american modern', 'american north', 'american pizza', 'american salad', 'american sandwich', 'american seafood', 'american south', 'american steak', 'american tex', 'andhra', 'andhra asian', 'andhra bbq', 'andhra bengali', 'andhra biryani', 'andhra cafe', 'andhra chinese', 'andhra continental', 'andhra fast', 'andhra hyderabadi', 'andhra kebab', 'andhra kerala', 'andhra mangalorean', 'andhra mughlai', 'andhra north', 'andhra seafood', 'andhra south', 'arabian', 'arabian afghan', 'arabian bbq', 'arabian beverages', 'arabian biryani', 'arabian charcoal', 'arabian chinese', 'arab

In [24]:
# tf-idf score of each feature
print(len(tf.idf_))
print(tf.idf_)

1165
[7.59379145 9.29853955 9.29853955 ... 9.29853955 9.29853955 8.89307444]


In [27]:
print(tfidf_matrix.shape)
print(tfidf_matrix)

(8035, 1165)
  (0, 922)	0.23938322242107657
  (0, 592)	0.21234590644300375
  (0, 888)	0.597860802500554
  (0, 924)	0.2395649338829215
  (0, 638)	0.6948208345518041
  (1, 922)	0.10222998199024363
  (1, 592)	0.18136708138371968
  (1, 924)	0.10230758291517039
  (1, 383)	0.19448745575447085
  (1, 663)	0.23246273784929813
  (1, 1079)	0.14507176836591065
  (1, 499)	0.2872407430168685
  (1, 501)	0.14400785514007264
  (1, 416)	0.3126188884495961
  (1, 623)	0.3810649968902911
  (1, 692)	0.4574895017702353
  (1, 1081)	0.1451043157579797
  (1, 616)	0.4174114264700863
  (1, 500)	0.2872407430168685
  (2, 592)	0.40423914585582676
  (2, 1079)	0.6466850244775918
  (2, 1081)	0.6468301106737134
  (3, 592)	0.40423914585582676
  (3, 1079)	0.6466850244775918
  (3, 1081)	0.6468301106737134
  :	:
  (8028, 757)	0.7512811234993051
  (8029, 841)	1.0
  (8030, 922)	0.4046611427038459
  (8030, 592)	0.35895638917652184
  (8030, 924)	0.40496831363691654
  (8030, 316)	0.4630149475565673
  (8030, 610)	0.57360046508369

In [None]:
#You see that over 1165 different words were used to describe the 8035 restaurants in your dataset.

In [25]:
print(feature_names[888])

mughlai


In [28]:
from sklearn.metrics.pairwise import linear_kernel
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [29]:
cosine_sim.shape

(8035, 8035)

In [30]:
print(cosine_sim)

[[1.         0.08749401 0.08583853 ... 0.         0.         0.1285273 ]
 [0.08749401 1.         0.26098925 ... 0.         0.43099043 0.14162241]
 [0.08583853 0.26098925 1.         ... 0.         0.         0.06904586]
 ...
 [0.         0.         0.         ... 1.         0.         0.        ]
 [0.         0.43099043 0.         ... 0.         1.         0.        ]
 [0.1285273  0.14162241 0.06904586 ... 0.         0.         1.        ]]


In [31]:
# Build a 1-dimensional array with restaurant titles
titles = df['rest_name']
indices = pd.Series(df.index, index=df['rest_name'])
print(indices)

rest_name
Punjab Grill                           0
Byg Brewski Brewing Company            1
Brahmin's Coffee Bar                   2
Taaza Thindi                           3
Santé Spa Cuisine                      4
Belgian Waffle Factory                 5
Milano Ice Cream                       6
The Boozy Griffin                      7
O.G. Variar & Sons                     8
Asia Kitchen By Mainland China         9
CTR                                   10
The Globe Grub                        11
TBC Sky Lounge                        12
Punjab Grill                          13
The Big Barbeque                      14
AB's - Absolute Barbecues             15
Barbecue by Punjab Grill              16
Kurtoskalacs                          17
Rim Naam - The Oberoi                 18
Smoke House Deli                      19
Maziga                                20
Fenny's Lounge And Kitchen            21
Natural Ice Cream                     22
The Pancake Story                     23
Chili'

In [32]:
def restaurant_recommendations(title):
    # 1.	Get the index of the restaurant that matches the title
    idx = indices[title]
    
    # 2.	Get the list of cosine similarity scores for that particular restaurant with all restaurants. 
    # Convert it into a list of tuples where the first element is its position and the second is the similarity score.
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # 3.	Sort the restaurants based on the similarity score.
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # 4.	Get the restaurant indices based on the similarity score.
    rest_indices = [i[0] for i in sim_scores]
    
    # 5.	Get the restaurant name based on the restaurant indices
    final = titles.iloc[rest_indices].head()
    
    return final.values

# Item-based collaborative filtering

In [33]:
userR = pd.read_csv('userRatings.csv')
corrM = pd.read_csv('corrMatrix.csv')

In [35]:
user_index = userR.cust_name[userR.cust_name == 'Sunitha Pai'].index[0]
myRatings = userR.iloc[user_index].dropna()

In [44]:
simCandidates = pd.Series()
for i in range(1, len(myRatings.index)):
    #print ("adding sims for " + myRatings.index[i] + "...")
    sims = corrM[myRatings.index[i]].dropna()
#     print("1: ")
#     print(sims)
#     print(myRatings[i])
    sims = sims.map(lambda x: x * myRatings[i])
#     print("2: ")
#     print(sims)
    simCandidates = simCandidates.append(sims)
print(simCandidates)

79      4.0
3118    4.0
207     3.0
1936   -3.0
2696   -3.0
615     4.0
641     4.0
769     1.0
786     3.0
801     5.0
929     4.0
1138    3.0
1500    4.0
1234   -4.0
1299    4.0
2129    4.0
3282   -4.0
3291   -2.0
2144    5.0
2302    5.0
3045   -5.0
1445    5.0
2204    5.0
1622    4.0
2326    4.0
2957    4.0
2628    3.0
2737    5.0
237     4.0
2734    4.0
2738    4.0
969     4.0
2747    4.0
1391    4.0
2326    4.0
2957    4.0
79      4.0
3118    4.0
3215    4.0
3493    4.0
3525    3.0
dtype: float64


In [45]:
simCandidates.sort_values(inplace = True, ascending = False)
simCandidates.head()

801     5.0
2144    5.0
2302    5.0
1445    5.0
2204    5.0
dtype: float64

In [49]:
filteredSims = simCandidates.drop(myRatings.index, errors='ignore')

In [53]:
l = len(filteredSims)
indexes = simCandidates.index.values
final = []
for i in range(0, l):
    dataframe = {}
    dataframe['rest_name'] = corrM['rest_name'][indexes[i]]
    final.append(dataframe)
    print(dataframe['rest_name'])

Coffee Mechanics
Nandi Upachar
Orzuv
Hungree Belly
New Tandoor
Sea Spice by 7 Star
Adithya
Indian Coffee House & Restaurant
Capri Gastrobar
HVR Veg
Namma Kudla
Café Mor'ish
Udupi Grand
Kailash Parbat
Palmgrove
Sri Raghavendra Stores
Tasty Bites
Azure - Taj Yeshwantpur
Sea Rock
Seafood Dot Grill
Donne Biriyani House
Sendhoor Coffee
Hotel Kadamba Veg
Palmgrove
Sri Raghavendra Stores
Adithya
Tasty Bites
The Coastal Crew by Fujian on 24th
Desserted
Asaivam Restaurant
Coast
Urban Solace - Café for the Soul
Food Zone
S G Rao’s Military Hotel
Churchill's
The Konkan - Seafood Restaurant
Sanchez Taqueria & Cantina
Maravanthe
Gods Own Cafe
The Jade Kitchen
Swathi
