In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

In [2]:
# Step 2: Load Dataset
# Assuming the dataset is in a CSV file
data = pd.read_csv('.csv file path')

# checking the data
data.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [3]:
# informations about the data
data.describe()

Unnamed: 0,Restaurant ID,Country Code,Longitude,Latitude,Average Cost for two,Price range,Aggregate rating,Votes
count,9551.0,9551.0,9551.0,9551.0,9551.0,9551.0,9551.0,9551.0
mean,9051128.0,18.365616,64.126574,25.854381,1199.210763,1.804837,2.66637,156.909748
std,8791521.0,56.750546,41.467058,11.007935,16121.183073,0.905609,1.516378,430.169145
min,53.0,1.0,-157.948486,-41.330428,0.0,1.0,0.0,0.0
25%,301962.5,1.0,77.081343,28.478713,250.0,1.0,2.5,5.0
50%,6004089.0,1.0,77.191964,28.570469,400.0,2.0,3.2,31.0
75%,18352290.0,1.0,77.282006,28.642758,700.0,2.0,3.7,131.0
max,18500650.0,216.0,174.832089,55.97698,800000.0,4.0,4.9,10934.0


In [4]:
# Analyze missing values
missing_values = data.isnull().sum()
missing_percentage = (missing_values / len(data)) * 100

print("Missing Values Count:")
print(missing_values)
print("\nMissing Values Percentage:")
print(missing_percentage)

Missing Values Count:
Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                9
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64

Missing Values Percentage:
Restaurant ID           0.000000
Restaurant Name         0.000000
Country Code            0.000000
City                    0.000000
Address                 0.000000
Locality                0.000000
Locality Verbose        0.000000
Longitude               0.000000
Latitude                0.000000
Cuisines                0.094231
Average Cost for two    0.000000
Currency                0.00

In [5]:
# Fill missing cuisines with 'Unknown'
data['Cuisines'].fillna('Unknown', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Cuisines'].fillna('Unknown', inplace=True)


In [6]:
# Step 2: Encode categorical variables
# Encode 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu' using LabelEncoder
label_encoder = LabelEncoder()

categorical_columns = ['Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu']
for col in categorical_columns:
    data[col] = label_encoder.fit_transform(data[col])


In [7]:
# Step 3: Use TF-IDF for the 'Cuisines' column to convert text data into numerical data
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
cuisines_matrix = tfidf_vectorizer.fit_transform(data['Cuisines'])

In [8]:
# Normalize 'Average Cost for two' and 'Price range'
scaler = StandardScaler()
data[['Average Cost for two', 'Price range']] = scaler.fit_transform(data[['Average Cost for two', 'Price range']])

In [9]:
# Update the recommendation function to use 'City' instead of latitude and longitude for location filtering
def recommend_restaurants(user_cuisine=None, user_price_range=None, user_cost=None, user_city=None, num_recommendations=5):
    # Default similarity weights
    weight_cuisine = 1
    weight_price = 1
    weight_cost = 1
    weight_city = 1
    
    # Transform user input into a TF-IDF vector if cuisine is provided
    if user_cuisine:
        user_input = tfidf_vectorizer.transform([user_cuisine])
        cuisine_similarity = cosine_similarity(user_input, cuisines_matrix).flatten()
    else:
        cuisine_similarity = np.ones(len(data))  # Default similarity if no cuisine provided
    
    # Adjust similarity score based on user price range preference if provided
    if user_price_range is not None:
        price_similarity = 1 - abs(data['Price range'] - user_price_range)
    else:
        price_similarity = np.ones(len(data))  # Default similarity if no price range provided
    
    # Adjust similarity score based on user average cost preference if provided
    if user_cost is not None:
        cost_similarity = 1 - abs(data['Average Cost for two'] - user_cost)
    else:
        cost_similarity = np.ones(len(data))  # Default similarity if no cost provided
    
    # Adjust similarity score based on user city if provided
    if user_city:
        city_similarity = data['City'].apply(lambda x: 1 if x.lower() == user_city.lower() else 0)
    else:
        city_similarity = np.ones(len(data))  # Default similarity if no city provided
    
    # Combine all similarities into a final score
    final_similarity = (cuisine_similarity * price_similarity * cost_similarity * city_similarity)
    
    # Normalize final similarity to handle cases with all defaults
    if np.all(final_similarity == 0):
        final_similarity = np.ones(len(data))
    
    # Get the indices of the top similar restaurants
    similar_indices = final_similarity.argsort()[-num_recommendations:][::-1]
    
    # Return the top recommendations
    return data.iloc[similar_indices][['Restaurant Name', 'Cuisines', 'City', 'Price range', 'Average Cost for two', 'Aggregate rating']]



In [10]:
# Test the updated recommendation system with sample user preferences
sample_recommendations = recommend_restaurants(user_cuisine='Japanese', user_price_range=3, user_cost=3000, user_city='Makati City')

# Print recommendations
print(sample_recommendations)

        Restaurant Name                    Cuisines         City  Price range  \
1      Izakaya Kikufuji                    Japanese  Makati City     1.319803   
0      Le Petit Souffle  French, Japanese, Desserts  Makati City     1.319803   
3179  Taj Chicken Point       North Indian, Mughlai    New Delhi    -0.888771   
3186            Barista                        Cafe    New Delhi     0.215516   
3185  Anil Mishtan Wala         Mithai, Street Food    New Delhi    -0.888771   

      Average Cost for two  Aggregate rating  
1                 0.000049               4.5  
0                -0.006154               4.8  
3179             -0.049578               0.0  
3186             -0.034069               3.3  
3185             -0.068188               3.1  
