In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [2]:
data = pd.read_csv('Dataset.csv')

In [3]:
print(data.head())

   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, Ortigas, Mandaluyong City   
3      SM 

In [4]:
data = data.dropna()

In [5]:
label_encoders = {}
categorical_columns = ['Restaurant Name', 'City', 'Locality', 'Locality Verbose', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Rating color', 'Rating text']
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

In [6]:
data['Cuisines'] = data['Cuisines'].apply(lambda x: x.split(', '))

In [7]:
cuisines = data['Cuisines'].apply(pd.Series).stack().reset_index(level=1, drop=True).to_frame('Cuisine')
cuisines_dummies = pd.get_dummies(cuisines['Cuisine']).groupby(level=0).sum()

In [13]:
data = data.drop(['Cuisines'], axis=1).join(cuisines_dummies)

In [14]:
# Select relevant features for recommendation
features = ['Price range', 'City', 'Average Cost for two'] + list(cuisines_dummies.columns)
feature_data = data[features]

In [15]:
# Define a function to recommend restaurants based on user preferences
def recommend_restaurants(user_preferences, feature_data, top_n=5):
    user_preferences_df = pd.DataFrame([user_preferences])
    user_preferences_encoded = pd.get_dummies(user_preferences_df)

    # Ensure the same columns in the user preferences and feature data
    user_preferences_encoded = user_preferences_encoded.reindex(columns=feature_data.columns, fill_value=0)

    # Calculate cosine similarity
    similarities = cosine_similarity(user_preferences_encoded, feature_data)
    similarities = similarities.flatten()

    # Get the top N recommendations
    recommendations = np.argsort(similarities)[-top_n:][::-1]
    recommended_restaurants = data.iloc[recommendations]

    return recommended_restaurants

In [16]:
# Define sample user preferences
user_preferences = {
    'Price range': 3,
    'City': label_encoders['City'].transform(['Makati City'])[0],
    'Average Cost for two': 1000
}

In [17]:
# Convert the cuisines to the one-hot encoding format expected
preferred_cuisines = ['Japanese']
user_preferences = {**user_preferences, **{cuisine: 1 for cuisine in preferred_cuisines}}

In [18]:
# Get recommendations
recommended_restaurants = recommend_restaurants(user_preferences, feature_data, top_n=5)

In [19]:
# Decode the City and Restaurant Name columns for display
recommended_restaurants['City'] = label_encoders['City'].inverse_transform(recommended_restaurants['City'])
recommended_restaurants['Restaurant Name'] = label_encoders['Restaurant Name'].inverse_transform(recommended_restaurants['Restaurant Name'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommended_restaurants['City'] = label_encoders['City'].inverse_transform(recommended_restaurants['City'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommended_restaurants['Restaurant Name'] = label_encoders['Restaurant Name'].inverse_transform(recommended_restaurants['Restaurant Name'])


In [20]:
# Display recommendations
print("Recommended Restaurants:")
print(recommended_restaurants[['Restaurant Name', 'Price range', 'City', 'Average Cost for two', 'Aggregate rating']])

Recommended Restaurants:
              Restaurant Name  Price range       City  Average Cost for two  \
8102  Aim Cafe And Restaurant            3      Noida                  1200   
3184                 Aka Saka            3  New Delhi                  1200   
6139                  Ichiban            3  New Delhi                  1200   
3238            Moets Oh! Bao            3  New Delhi                  1200   
9083         Ristorante Prego            3      Noida                  1200   

      Aggregate rating  
8102               3.3  
3184               3.3  
6139               3.9  
3238               3.6  
9083               3.1  
