# Recommendation System

## Simple Recommendation System

### Importing Files

In [2]:

import numpy as np 
import pandas as pd 
import re

### Loading Data 

In [3]:

data = pd.read_csv('/home/student/Desktop/Practise_Folder/Project/Data/zomatoNCR.csv', encoding ='latin1')
data.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


### Extracting Delhi NCR Data

In [5]:

res_India = data
NCR = ['New Delhi','Gurgaon','Noida','Faridabad']
res_NCR = res_India[(res_India.City == NCR[0])|(res_India.City == NCR[1])|(res_India.City == NCR[2])|
                    (res_India.City == NCR[3])]

res_NCR.shape

(7922, 21)

### Regularizing Predicting Parameters --(Rating and Voting) 

In [8]:

data_new_delhi=res_NCR[['Restaurant Name','Cuisines','Locality','Aggregate rating', 'Votes']] #--Extracting Columns


C = data_new_delhi['Aggregate rating'].mean()
m = data_new_delhi['Votes'].quantile(0.90) #--Quantile

q_restaurant = data_new_delhi.copy().loc[data_new_delhi['Votes'] >= m]
q_restaurant.shape

(795, 5)

In [9]:
# Function that computes the weighted rating of each restaurant
def weighted_rating(x, m=m, C=C):
    v = x['Votes']
    R = x['Aggregate rating']
    return np.round((v/(v+m) * R) + (m/(m+v) * C))


q_restaurant['score'] = q_restaurant.apply(weighted_rating, axis=1)

In [10]:
#Sort restaurant based on score calculated above
#q_restaurant = q_restaurant.sort_values('score', ascending=False)
q_restaurant['score'].dtype

dtype('float64')

### Output

In [11]:
#Print the top 10 restaurants in Delhi NCR
q_restaurant[['Restaurant Name','Cuisines', 'Locality','Votes', 'Aggregate rating', 'score']].head(10)

Unnamed: 0,Restaurant Name,Cuisines,Locality,Votes,Aggregate rating,score
884,Berco's,"Chinese, Thai","Crown Interiorz Mall, Sector 35, Faridabad",508,3.8,3.0
891,Barbeque Nation,North Indian,"Crown Interiorz Mall, Sector 35, Faridabad",299,4.0,3.0
892,Yo! China,Chinese,"Crown Interiorz Mall, Sector 35, Faridabad",239,4.1,3.0
943,Cafe Parmesan,Italian,Sector 15,799,4.5,4.0
949,Chickenette,"Raw Meats, North Indian, Chinese, Fast Food",Sector 15,280,3.7,3.0
1163,Punjab Grill,"North Indian, Mughlai","Ambience Mall, Gurgaon",1887,4.3,4.0
1164,Zambar,"South Indian, Seafood, Kerala","Ambience Mall, Gurgaon",802,4.0,4.0
1190,Bisque Bakery,"Bakery, Fast Food","Central Arcade, DLF Phase 2, Gurgaon",415,3.9,3.0
1209,Khyen Chyen,Kashmiri,"Cross Point Mall, DLF Phase 4",364,3.7,3.0
1211,Tughlaq,"North Indian, Mughlai, Chinese, Seafood","Cross Point Mall, DLF Phase 4",351,3.5,3.0


## Content Based Algorithm

### Importing Files

In [20]:
import numpy as np
import pandas as pd
import re
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize 

### Loading Data

In [21]:
data = pd.read_csv('/home/student/Desktop/Practise_Folder/Project/Data/zomatoNCR.csv', encoding ='latin1')
data.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


### Extracting Relevant Columns

In [22]:
data_new_delhi=data[['Restaurant Name','Cuisines','Locality','Aggregate rating', 'Votes']] 

In [23]:
data_sample=[]
data_sample = data_new_delhi.loc[data_new_delhi['Locality'] == 'Connaught Place']
data_sample.reset_index(level=0, inplace=True) 
data_sample.head()

Unnamed: 0,index,Restaurant Name,Cuisines,Locality,Aggregate rating,Votes
0,2999,Amber,"North Indian, Chinese, Mughlai",Connaught Place,2.6,152
1,3000,Attitude Kitchen & Bar,"North Indian, Continental, Italian",Connaught Place,2.9,140
2,3001,Cafe Coffee Day,Cafe,Connaught Place,3.4,277
3,3002,Castle 9,"Finger Food, Continental, North Indian, Chinese",Connaught Place,3.1,1099
4,3003,Costa Coffee,Cafe,Connaught Place,3.4,76


### Creating spearate column for Cusines 

In [24]:
# Creating New Column
data_sample['Split']="X"
data_sample

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,index,Restaurant Name,Cuisines,Locality,Aggregate rating,Votes,Split
0,2999,Amber,"North Indian, Chinese, Mughlai",Connaught Place,2.6,152,X
1,3000,Attitude Kitchen & Bar,"North Indian, Continental, Italian",Connaught Place,2.9,140,X
2,3001,Cafe Coffee Day,Cafe,Connaught Place,3.4,277,X
3,3002,Castle 9,"Finger Food, Continental, North Indian, Chinese",Connaught Place,3.1,1099,X
4,3003,Costa Coffee,Cafe,Connaught Place,3.4,76,X
5,3004,Delhi Darbar Dhaba,"North Indian, Chinese",Connaught Place,3.2,89,X
6,3005,Garam Dharam,North Indian,Connaught Place,3.4,1523,X
7,3006,Gola Sizzlers,"Chinese, North Indian, Mughlai, Continental",Connaught Place,3.0,671,X
8,3007,Indian Coffee House,Fast Food,Connaught Place,3.3,1300,X
9,3008,My Bar Lounge & Restaurant,"North Indian, Chinese, Italian, Continental",Connaught Place,2.7,2460,X


In [27]:
for i in range(0,data_sample.index[-1]):
        split_data=re.split(r'[,]', data_sample['Cuisines'][i])
        for k,l in enumerate(split_data):
            split_data[k]=(split_data[k].replace(" ", ""))
        split_data=' '.join(split_data[:])
        data_sample['Split'].iloc[i]=split_data
        
data_sample

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,index,Restaurant Name,Cuisines,Locality,Aggregate rating,Votes,Split,Similarity
0,2999,Amber,"North Indian, Chinese, Mughlai",Connaught Place,2.6,152,NorthIndian Chinese Mughlai,0.33
1,3000,Attitude Kitchen & Bar,"North Indian, Continental, Italian",Connaught Place,2.9,140,NorthIndian Continental Italian,1.00
2,3001,Cafe Coffee Day,Cafe,Connaught Place,3.4,277,Cafe,0.00
3,3002,Castle 9,"Finger Food, Continental, North Indian, Chinese",Connaught Place,3.1,1099,FingerFood Continental NorthIndian Chinese,0.58
4,3003,Costa Coffee,Cafe,Connaught Place,3.4,76,Cafe,0.00
5,3004,Delhi Darbar Dhaba,"North Indian, Chinese",Connaught Place,3.2,89,NorthIndian Chinese,0.41
6,3005,Garam Dharam,North Indian,Connaught Place,3.4,1523,NorthIndian,0.58
7,3006,Gola Sizzlers,"Chinese, North Indian, Mughlai, Continental",Connaught Place,3.0,671,Chinese NorthIndian Mughlai Continental,0.58
8,3007,Indian Coffee House,Fast Food,Connaught Place,3.3,1300,FastFood,0.00
9,3008,My Bar Lounge & Restaurant,"North Indian, Chinese, Italian, Continental",Connaught Place,2.7,2460,NorthIndian Chinese Italian Continental,0.87


### Applying Cosine Similarity

In [26]:
data_sample['Similarity'] = 0.0
X = 'NorthIndian Continental Italian'

X_list = set(word_tokenize(X))  
for i in range(0, data_sample.index[-1]):
    Y = data_sample['Split'][i]
    Y_list = set(word_tokenize(Y))
    
    l1 =[];l2 =[] 
    rvector = X_list.union(Y_list)
    
    for w in rvector: 
        if w in X_list: l1.append(1) # create a vector 
        else: l1.append(0) 
        if w in Y_list: l2.append(1) 
        else: l2.append(0) 
    c = 0
    
    for l in range(len(rvector)): 
        c+= l1[l]*l2[l] 
    cosine = c / float((sum(l1)*sum(l2))**0.5) 
    
    
    data_sample['Similarity'].iloc[i] = float(np.round(cosine, 2))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


### Top Restuarant Based on Cusisine Similarity

In [19]:
data_sample.sort_values('Similarity', ascending=False)

Unnamed: 0,index,Restaurant Name,Cuisines,Locality,Aggregate rating,Votes,Split,Similarity
0,2999,Amber,"North Indian, Chinese, Mughlai",Connaught Place,2.6,152,NorthIndian Chinese Mughlai,0.0
91,3090,Ovenstory Pizza,"Pizza, Fast Food",Connaught Place,0.0,0,Pizza FastFood,0.0
89,3088,Zizo,"Lebanese, Mediterranean, Middle Eastern, Arabian",Connaught Place,3.9,1071,Lebanese Mediterranean MiddleEastern Arabian,0.0
88,3087,Zen,"Chinese, Japanese, Italian, Seafood",Connaught Place,3.5,1027,Chinese Japanese Italian Seafood,0.0
87,3086,Zaffran,"North Indian, Mughlai",Connaught Place,3.9,908,NorthIndian Mughlai,0.0
86,3085,Warehouse Cafe,"American, Continental, Italian, North Indian, ...",Connaught Place,3.7,4914,American Continental Italian NorthIndian Asian,0.0
85,3084,Veda,"North Indian, Mughlai",Connaught Place,3.9,1087,NorthIndian Mughlai,0.0
84,3083,The Vault Cafe,"North Indian, Mediterranean, Asian, Continental",Connaught Place,3.9,3413,NorthIndian Mediterranean Asian Continental,0.0
83,3082,The Rolling Joint,Fast Food,Connaught Place,3.9,783,FastFood,0.0
82,3081,The Luggage Room Kitchen And Bar,"North Indian, Continental, Fast Food",Connaught Place,3.5,63,NorthIndian Continental FastFood,0.0
