# Alok Raj

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords 
from nltk.tokenize import WordPunctTokenizer
import string
import re

In [2]:
df = pd.read_csv('RestoInfo.csv')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2069 entries, 0 to 2068
Data columns (total 15 columns):
Unnamed: 0                     2069 non-null int64
name                           2069 non-null object
online_order                   2069 non-null object
book_table                     2069 non-null object
rate                           1770 non-null object
votes                          2069 non-null int64
location                       2069 non-null object
rest_type                      2052 non-null object
dish_liked                     962 non-null object
cuisines                       2069 non-null object
approx_cost(for two people)    2053 non-null object
reviews_list                   2069 non-null object
menu_item                      2069 non-null object
listed_in(type)                2069 non-null object
listed_in(city)                2069 non-null object
dtypes: int64(2), object(13)
memory usage: 242.6+ KB


In [7]:
#Object to numeric
df['approx_cost(for two people)'] = pd.to_numeric(df['approx_cost(for two people)'], errors='coerce')
df['approx_cost(for two people)'].fillna((df['approx_cost(for two people)'].mean()), inplace=True)

In [8]:
#Combining reviews, menu, dish liked to customise search
df['content']=df['reviews_list'].fillna('')+' '+df['menu_item'].fillna('')+' '+df['dish_liked'].fillna('')

In [36]:
new=df['rate'].str.split('/',expand=True)
df['rate_mag']=new[0]
'''
Many restaurants have empty rating. An idea could be to fill with average of all. It could also be that very few people have tried that restaurant or reviewed it. 
So, it is not that famous unless it is NEW. I am not replacing those for this study
'''

In [9]:
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english")
'''
Cleaning the content to remove punctuations and correct where ever required.
I am also using stemmer to not differentiate between close enough words
'''

def clean_text(text):
    text = text.translate(string.punctuation)
    ## Convert words to lower case and split them
    text = text.lower().split()
    ## Remove stop words
    stops = set(stopwords.words("english"))
    text = [w for w in text if not w in stops and len(w) >= 3]
    text = " ".join(text)
    # Clean the text
    text = re.sub(r"[^A-Za-z0-9^,!.\/'+-=]", " ", text)
    text = re.sub(r"[0-9]", "", text)
    text = re.sub(r"\'s", "", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r",", "", text)
    text = re.sub(r"\.", "", text)
    text = re.sub(r"!", "", text)
    text = re.sub(r"\/", "", text)
    text = re.sub(r"'", "", text)
    text = re.sub(r":", "", text)
    text = re.sub(r"rated", "", text)
    text = re.sub(r" n ", "", text)
    text = re.sub(r"  ", " ", text)
    #text = re.sub(r"+", " ", text)
    text = re.sub(r"--", " ", text)
    text = re.sub(r"-", " ", text)
    text = re.sub(r";", " ", text)
    text = re.sub(r"=", " ", text)
    text=[stemmer.stem(w) for w in text.split(' ')] 
    while("" in text): 
        text.remove("") 
    string_text=''
    for i in range(len(text)):
        string_text =string_text+text[i]+' '
    return string_text

In [10]:
df['content_1']=df['content'].apply(clean_text)
df['content_1']

0                                                        
1       work cheap near offic colleg food avoid best t...
2       ice cream realli tasti especi belgian chocol s...
3       beauti ambianc manag get tabl right next fount...
4       good experi restaur ragi mudda good well prepa...
                              ...                        
2064    pathet servic we visit st jan complet disappoi...
2065    place locat heart indiranagar feet road make s...
2066    servic tast ambienc nnit team outing + peopl s...
2067                                                     
2068    small place minim tast pocket friend pretti qu...
Name: content_1, Length: 2069, dtype: object

In [11]:
content_vectorizer = TfidfVectorizer(tokenizer = WordPunctTokenizer().tokenize, max_features=2000)
content_vectors = content_vectorizer.fit_transform(df['content_1'])
content_P = pd.DataFrame(content_vectors.toarray(), index=df.index, columns=content_vectorizer.get_feature_names())

In [12]:
df['location_1']=df['location'].apply(clean_text)
location_vectorizer = TfidfVectorizer(tokenizer = WordPunctTokenizer().tokenize, max_features=500)
location_vectors = location_vectorizer.fit_transform(df['location_1'])
location_P = pd.DataFrame(location_vectors.toarray(), index=df.index, columns=location_vectorizer.get_feature_names())

In [13]:
df['cuisines_1']=df['cuisines']
cuisines_vectorizer = TfidfVectorizer(tokenizer = WordPunctTokenizer().tokenize, max_features=100)
cuisines_vectors = cuisines_vectorizer.fit_transform(df['cuisines_1'])
cuisines_P = pd.DataFrame(cuisines_vectors.toarray(), index=df.index, columns=cuisines_vectorizer.get_feature_names())

In [52]:
class restaurant_recommmender:
    def __init__(self,content,location='Marathahalli whitefield',cuisines='South Indian',budget=500,
                 content_P=content_P,location_P=location_P,cuisines_P=cuisines_P,approx_cost=df['approx_cost(for two people)'],
                content_vectorizer=content_vectorizer,location_vectorizer=location_vectorizer,cuisines_vectorizer=cuisines_vectorizer,
                df=df):
        self.content=content
        self.location=location
        self.cuisines=cuisines
        self.budget=budget
        self.content_P=content_P
        self.location_P=location_P
        self.cuisines_P=cuisines_P
        self.approx_cost=approx_cost
        self.content_vectorizer=content_vectorizer
        self.location_vectorizer=location_vectorizer
        self.cuisines_vectorizer=cuisines_vectorizer
        self.df=df
        a=self.comparison(self.content,self.content_P,content_vectorizer)
        b=self.comparison(self.location,self.location_P,location_vectorizer)
        c=self.comparison(self.cuisines,self.cuisines_P,cuisines_vectorizer)
        d=self.budget_comparison(self.budget,self.approx_cost)
        self.recommendation(a,b,c,d,self.df)
    def comparison(self,content, master_content,vectorizer): #Master content-> content_P
        test_df= pd.DataFrame([content], columns=['text'])
        test_df['text'] = test_df['text'].apply(clean_text) #Need to inherit clean_text function
        test_vectors = vectorizer.transform(test_df['text'])
        test_content_df = pd.DataFrame(test_vectors.toarray(), index=test_df.index, columns=vectorizer.get_feature_names())
        return np.dot(test_content_df.loc[0],master_content.T)
    def budget_comparison (self,budget, approx_cost):
        d=1-abs(approx_cost-budget)/(approx_cost+budget)
        return d
    def recommendation(self,a,b,c,d,df):
        a=a+0.01
        b=b+0.01
        c=c+0.01
        d=d+0.01
        '''I have added a very small number to remove zero multiplication. It preserves the influence of every inputs'''
        sum_total=a*b*c*d
        #top_recommendations=pd.Series.sort_values(sum_total, ascending=False)[:3]
        '''Select top 10 restaurants that match the recommendation and then select top 3 based on raatings among them'''
        top_recommendations=pd.Series.sort_values(sum_total, ascending=False)[:10]
        d = []
        for i in list(top_recommendations.index):
            d.append({'name': df.iloc[i,:]['name'],
            'rating': df.iloc[i,:]['rate_mag']})
        df_2=pd.DataFrame(d)
        print ('Ten restaurants which matches your requirement')
        print (df_2)
        print ("")
        df_2_name=df_2.sort_values(by='rating',ascending=False)['name']
        print ("Top 3 recommended restaurants for you are:")
        print ("")
        for i in range(3):
            print ('Recommendation {}: '.format(i+1),df_2_name.iloc[i])
        #for i,num in enumerate(list(top_recommendations.index)):
        #    print ('Recommendation {}:'.format(i+1),df.iloc[num,:])#['name'])

In [53]:
sentence='biryani chicken'
location='Whitefield'
restaurant_recommmender(sentence,location,budget=1500)

Ten restaurants which matches your requirement
                            name rating
0        Amaravathi Biryani Zone    NaN
1               Taste of Coastal    NaN
2         Flavours Radha Hometel   3.5 
3         Flavours Radha Hometel   3.5 
4  Green Hyderabad Biryani House   3.8 
5                     U.P. Dhaba    2.8
6                   Nawabi Handi    3.4
7       Basmati's Briyani Nation    NEW
8           Windy- Taste Of Home   3.6 
9            Curries and Pickles    NaN

Top 3 recommended restaurants for you are:

Recommendation 1:  Basmati's Briyani Nation
Recommendation 2:  Green Hyderabad Biryani House
Recommendation 3:  Windy- Taste Of Home


<__main__.restaurant_recommmender at 0x25b939f4308>

In [54]:
print ('What are you looking for today? How about Biryani?')
content = input()
print ('Location?')
location = input()
print ('cruisines?')
cruisines = input()
print ('approx cost for two?')
budget= input()
restaurant_recommmender(content,location,cruisines,int(budget))

What are you looking for today? How about Biryani?
Good ambience, paneer tikka, veg
Location?
marathahalli, bellandur, whitefield
cruisines?
north indian
approx cost for two?
1000
Ten restaurants which matches your requirement
                  name rating
0           Food Singh    NaN
1      Meghana Biryani    2.3
2              Santo's    NaN
3          Dil Punjabi    2.6
4        Yummy Punjabi    2.9
5  The Spice Pavillion   3.7 
6        Curry Chutney   3.0 
7           U.P. Dhaba    2.8
8       Krishna Bhojan    NaN
9            The Dhaba    2.7

Top 3 recommended restaurants for you are:

Recommendation 1:  The Spice Pavillion
Recommendation 2:  Curry Chutney
Recommendation 3:  Yummy Punjabi


<__main__.restaurant_recommmender at 0x25b939f4888>