# Importing Libraries

In [49]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import r2_score
import re
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

# Loading and Reading Data

In [50]:
zomato_pune=pd.read_csv("Pune Restaurants.csv")


In [51]:
zomato_pune.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4797 entries, 0 to 4796
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Restaurant_Name        4797 non-null   object 
 1   Category               4797 non-null   object 
 2   Pricing_for_2          4797 non-null   int64  
 3   Locality               4797 non-null   object 
 4   Dining_Rating          4797 non-null   float64
 5   Dining_Review_Count    4797 non-null   int64  
 6   Delivery_Rating        3226 non-null   float64
 7   Delivery_Rating_Count  4797 non-null   int64  
 8   Website                4797 non-null   object 
 9   Address                4797 non-null   object 
 10  Phone_No               4797 non-null   object 
 11  Latitude               4797 non-null   float64
 12  Longitude              4797 non-null   float64
 13  Known_for1             4155 non-null   object 
 14  Known_for2             1078 non-null   object 
dtypes: f

# Data Cleaning

In [52]:
zomato_data =zomato_pune.drop(['Website','Latitude','Longitude','Phone_No','Known_for2','Address','Delivery_Rating','Delivery_Rating_Count','Category'],axis=1)

In [53]:
zomato_pune.head() # prints the first N rows of a DataFrame

Unnamed: 0,Restaurant_Name,Category,Pricing_for_2,Locality,Dining_Rating,Dining_Review_Count,Delivery_Rating,Delivery_Rating_Count,Website,Address,Phone_No,Latitude,Longitude,Known_for1,Known_for2
0,Santè Spa Cuisine,"Continental, Healthy Food, Mediterranean",1200,"Koregaon Park, Pune",4.9,1469,3.9,588,https://www.zomato.com/pune/santè-spa-cuisine-...,"Lane 1, Near Sunderban Resorts, Koregaon Park,...",918000000000.0,18.537416,73.88889,"Bajra Risotto, Spinach Hummus, Whole Wheat Spa...","Nothing to Dislike, Healthy Menu, Healthy Food..."
1,Le Plaisir,"Cafe, Italian, Continental, Salad, Sandwich, P...",1000,"Deccan Gymkhana, Pune",4.9,4808,4.3,4959,https://www.zomato.com/pune/le-plaisir-deccan-...,"Survey 759/125, Rajkamal, Prabhat Road, Opposi...",919000000000.0,18.514254,73.838658,"Expresso Panacota, Banana Nutella Crepe, New Y...","Classy Place, Classy Ambience, Perfect Evening..."
2,Gong,"Chinese, Sushi, Asian, Momos, Beverages",1700,"Balewadi High Street, Baner, Pune",4.9,1788,4.3,1352,https://www.zomato.com/pune/gong-baner,"Shop 22/23, Cummins India Office Campus, Balew...",919000000000.0,18.570471,73.774149,"Chicken Minestrone Soup, Crab Ravioli, Ebi Tem...","Classy Place, Quality of Food, Amazing Interio..."
3,The French Window Patisserie,"Cafe, Desserts, French, Bakery, European",600,"Koregaon Park, Pune",4.9,1643,4.4,1208,https://www.zomato.com/pune/the-french-window-...,"Lane Number 5, Ganga Fortune Society, Meera Na...",920000000000.0,18.537432,73.897714,"Trois Chocolat, Chocolate Flan, Mille Feuille,...","Bookshelf, Beautiful Cafe, Pretty Ambience, Pe..."
4,Savya Rasa,"South Indian, Mangalorean, Kerala, Chettinad, ...",2100,"Koregaon Park, Pune",4.9,1283,4.3,446,https://www.zomato.com/pune/savya-rasa-koregao...,"Flat 2, Survey 479 & 480, Gera Serenity, North...",919000000000.0,18.538862,73.886621,"Bun Paratha, Pallipalayam Kozhi, Palm Sugar, V...","Sophisticated, Worth the Price, Warm Ambience,..."


In [54]:
zomato_data.duplicated().sum()


0

In [55]:
zomato_data.drop_duplicates(inplace=True)

In [56]:
#Removing the NaN values from the dataset
zomato_data.isnull().sum()
zomato_data.dropna(how='any',inplace=True)
zomato_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4155 entries, 0 to 4796
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Restaurant_Name      4155 non-null   object 
 1   Pricing_for_2        4155 non-null   int64  
 2   Locality             4155 non-null   object 
 3   Dining_Rating        4155 non-null   float64
 4   Dining_Review_Count  4155 non-null   int64  
 5   Known_for1           4155 non-null   object 
dtypes: float64(1), int64(2), object(3)
memory usage: 227.2+ KB


In [57]:
pune_restaurants = zomato_data.rename(columns={'Pricing_for_2':'Price','Dining_Rating':'Ratings','Dining_Review_Count':'Total_Reviews','Known_for1':'Best_Dishes'})

In [58]:
pune_restaurants.columns

Index(['Restaurant_Name', 'Price', 'Locality', 'Ratings', 'Total_Reviews',
       'Best_Dishes'],
      dtype='object')

In [59]:
pune_restaurants['Price'] = pune_restaurants['Price'].astype(str) 
pune_restaurants['Price'] = pune_restaurants['Price'].apply(lambda x: x.replace(',','.'))
pune_restaurants['cost'] = pune_restaurants['Price'].astype(float) 
pune_restaurants.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4155 entries, 0 to 4796
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Restaurant_Name  4155 non-null   object 
 1   Price            4155 non-null   object 
 2   Locality         4155 non-null   object 
 3   Ratings          4155 non-null   float64
 4   Total_Reviews    4155 non-null   int64  
 5   Best_Dishes      4155 non-null   object 
 6   cost             4155 non-null   float64
dtypes: float64(2), int64(1), object(4)
memory usage: 259.7+ KB


In [60]:
pune_restaurants['Ratings'].unique()

array([4.9, 4.8, 4.7, 4.6, 4.5, 4.4, 4.3, 4.2, 4.1, 4. , 3.9, 3.8, 3.7,
       3.6, 3.5, 3.4, 3.3, 3.2, 3.1, 3. ])

In [61]:
pune_restaurants.isnull().sum()

Restaurant_Name    0
Price              0
Locality           0
Ratings            0
Total_Reviews      0
Best_Dishes        0
cost               0
dtype: int64

In [62]:
## Computing Mean Rating
restaurants = list(pune_restaurants['Restaurant_Name'].unique())
pune_restaurants['Mean Rating'] = 0

for i in range(len(restaurants)):
    pune_restaurants['Mean Rating'][pune_restaurants['Restaurant_Name'] == restaurants[i]] = pune_restaurants['Ratings'][pune_restaurants['Restaurant_Name'] == restaurants[i]].mean()
    
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (1,5))
pune_restaurants[['Mean Rating']] = scaler.fit_transform(pune_restaurants[['Mean Rating']]).round(2)

# TDF-IDF Vectorization

In [63]:
import pandas

df_percent = pune_restaurants.sample(frac=0.5)

In [64]:
df_percent.shape

(2078, 8)

In [65]:
df_percent.set_index('Restaurant_Name', inplace=True)
indices = pd.Series(df_percent.index)

# Creating tf-idf matrix
tfidf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0, stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_percent['Best_Dishes'])

cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

# Recommendation _System

In [66]:
def recommend(name, cosine_similarities = cosine_similarities):
    
    # Create a list to put top restaurants
    recommend_restaurant = []
    
    # Find the index of the hotel entered
    idx = indices[indices == name].index[0]
    
    # Find the restaurants with a similar cosine-sim value and order them from biggest number
    score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending=False)
    
    # Extract top 20 restaurant indexes with a similar cosine-sim value
    top20_indexes = list(score_series.iloc[0:21].index)
    
    # Names of the top 20 restaurants
    for each in top20_indexes:
        recommend_restaurant.append(list(df_percent.index)[each])
    
    # Creating the new data set to show similar restaurants
    df_new = pd.DataFrame(columns=['Best_Dishes', 'Mean Rating', 'Price','Locality'])
    
    # Create the top 20 similar restaurants with some of their columns
    for each in recommend_restaurant:
            df_new = df_new.append(pd.DataFrame(df_percent[['Best_Dishes','Mean Rating', 'Price','Locality']][df_percent.index == each].sample()))
    
    # Drop the same named restaurants and sort only the top 10 by the highest rating
    df_new = df_new.drop_duplicates(subset=['Best_Dishes','Mean Rating', 'Locality'], keep=False)
    df_new = df_new.sort_values(by='Mean Rating', ascending=False).head(10)
    
    print('TOP %s RESTAURANTS LIKE %s WITH SIMILAR REVIEWS ARE: ' % (str(len(df_new)), name))
    
    return df_new

In [67]:
df_percent[df_percent.index == 'Gong'].head()

Unnamed: 0_level_0,Price,Locality,Ratings,Total_Reviews,Best_Dishes,cost,Mean Rating
Restaurant_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Gong,1700,"Balewadi High Street, Baner, Pune",4.9,1788,"Chicken Minestrone Soup, Crab Ravioli, Ebi Tem...",1700.0,5.0


In [68]:
recommend('Gong')

TOP 10 RESTAURANTS LIKE Gong WITH SIMILAR REVIEWS ARE: 


Unnamed: 0,Best_Dishes,Mean Rating,Price,Locality
Gong,"Chicken Minestrone Soup, Crab Ravioli, Ebi Tem...",5.0,1700,"Balewadi High Street, Baner, Pune"
Pune Baking Company - JW Marriott Pune,"Grand Cru Marquise, Chocolate Feuillant, Cinna...",4.58,700,"JW Marriott Pune, Senapati Bapat Road, Pune"
Onesta,"Herbed Potato Wedges, Schezwan Pizza, Mushroom...",4.37,700,"Mariplex Mall, Kalyani Nagar, Pune"
The Daily All Day,"Drunken Prawns, Whiskey Cocktail, Mushroom Cro...",4.16,2000,"Koregaon Park, Pune"
Mr Rabbit's Bar & Burrow,"Murgh Ghee Roast, Spinach Ravioli, Rabbit, Per...",4.16,1900,"Baner, Pune"
Moshi Moshi,"Veggie Tempura, Spicy Vegetable, Yellow Curry ...",3.95,1500,"Koregaon Park, Pune"
Cake N Counter,"Chocolate Dreamcake, Chocolate Walnut, Chocola...",3.74,300,"East Street, Pune"
Itss Yummyy,"Crunchy Roll, Chicken Crisp Burger, Barbecue C...",3.74,500,"Katraj, Pune"
Nawab Asia,"Lalla Mussa Dal, Paneer Kali Mirch, Crab Meat ...",3.74,1300,"Balewadi High Street, Baner, Pune"
Spice Culture,"Cigar Roll, Chicken Lazeez, Phad Thai, Caramel...",3.74,700,"Salunkhe Vihar Road, Pune"


In [69]:
class Restaurant:
    def __init__(self, name, category, rating, location):
        self.name = name
        self.category = category
        self.rating = rating
        self.location = location

def load_restaurants(dataset_path):
    restaurants = []
    dataset = pd.read_csv(dataset_path)
    for index, row in dataset.iterrows():
        restaurant = Restaurant(row['Restaurant_Name'], row['Category'], row['Dining_Rating'], row['Locality'])
        restaurants.append(restaurant)
    return restaurants

def recommend_restaurants(restaurants, category):
    recommended_restaurants = []
    category_restaurants = [restaurant for restaurant in restaurants if category.lower() in restaurant.category.lower()]
    
    if not category_restaurants:
        return recommended_restaurants
    
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform([restaurant.category for restaurant in category_restaurants])
    similarity_matrix = cosine_similarity(tfidf_matrix)

    for idx, restaurant in enumerate(category_restaurants):
        similar_indices = similarity_matrix[idx].argsort()[::-1][1:] 
        similar_restaurants = [category_restaurants[i] for i in similar_indices]
        recommended_restaurants.extend(similar_restaurants)

    recommended_restaurants = list(set(recommended_restaurants))
    recommended_restaurants.sort(key=lambda x: x.rating, reverse=True)
    return recommended_restaurants

def main():
    dataset_path = "Pune Restaurants.csv"
    restaurants = load_restaurants(dataset_path)

    category = input("Enter the type of category you are looking for: ")

    recommended_restaurants = recommend_restaurants(restaurants, category)

    if recommended_restaurants:
        print("Recommended Restaurants:")
        for restaurant in recommended_restaurants:
            print(f"Restaurant: {restaurant.name} | Category: {restaurant.category} | Rating: {restaurant.rating} | Location: {restaurant.location}")
    else:
        print("No restaurants found for the specified category.")

if __name__ == "__main__":
    main()


Enter the type of category you are looking for: Italian
Recommended Restaurants:
Restaurant: Le Plaisir | Category: Cafe, Italian, Continental, Salad, Sandwich, Pizza, Beverages | Rating: 4.9 | Location:  Deccan Gymkhana, Pune
Restaurant: Autobahn - The Smart Restaurant | Category: North Indian, Goan, Chettinad, Beverages, Desserts, Italian | Rating: 4.7 | Location:  Phoenix Market City, Viman Nagar, Pune
Restaurant: Alto Vino - JW Marriott Pune | Category: Italian, Salad, Beverages, Continental | Rating: 4.6 | Location:  JW Marriott Pune, Senapati Bapat Road, Pune
Restaurant: Toit | Category: Modern Indian, American, Italian, Pizza | Rating: 4.6 | Location:  Kalyani Nagar, Pune
Restaurant: Laa Unico | Category: Italian, Mexican, Continental, Modern Indian, Beverages | Rating: 4.6 | Location:  Swargate, Pune
Restaurant: Darshan | Category: Italian, Continental, Chinese, Mexican, Fast Food, Street Food, Desserts, Sandwich | Rating: 4.6 | Location:  Baner, Pune
Restaurant: Arthur's Theme