# A restaurant recommendation system based on user preferences.

## Step 1: Preprocess the Dataset

### 1.1 Load the Dataset

In [1]:
# Load the necessary libraries
import pandas as pd

# Load the dataset
file_path = 'dataset.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
df.head()


Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


### 1.2 Check for Missing Values

In [2]:
# Display basic info to check for missing values
print(df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9551 entries, 0 to 9550
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         9551 non-null   int64  
 1   Restaurant Name       9551 non-null   object 
 2   Country Code          9551 non-null   int64  
 3   City                  9551 non-null   object 
 4   Address               9551 non-null   object 
 5   Locality              9551 non-null   object 
 6   Locality Verbose      9551 non-null   object 
 7   Longitude             9551 non-null   float64
 8   Latitude              9551 non-null   float64
 9   Cuisines              9542 non-null   object 
 10  Average Cost for two  9551 non-null   int64  
 11  Currency              9551 non-null   object 
 12  Has Table booking     9551 non-null   object 
 13  Has Online delivery   9551 non-null   object 
 14  Is delivering now     9551 non-null   object 
 15  Switch to order menu 

## 1.3 Handle Missing Values

In [3]:
# Drop rows with missing values
df = df.dropna()

# Verify if all missing values are handled
print(df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 9542 entries, 0 to 9550
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         9542 non-null   int64  
 1   Restaurant Name       9542 non-null   object 
 2   Country Code          9542 non-null   int64  
 3   City                  9542 non-null   object 
 4   Address               9542 non-null   object 
 5   Locality              9542 non-null   object 
 6   Locality Verbose      9542 non-null   object 
 7   Longitude             9542 non-null   float64
 8   Latitude              9542 non-null   float64
 9   Cuisines              9542 non-null   object 
 10  Average Cost for two  9542 non-null   int64  
 11  Currency              9542 non-null   object 
 12  Has Table booking     9542 non-null   object 
 13  Has Online delivery   9542 non-null   object 
 14  Is delivering now     9542 non-null   object 
 15  Switch to order menu  9542

### 1.4 Encode Categorical Variables

In [4]:
# Import LabelEncoder
from sklearn.preprocessing import LabelEncoder

# Encode categorical variables
label_encoders = {}
categorical_columns = ['City', 'Cuisines', 'Rating color', 'Rating text', 'Currency']

for column in categorical_columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Display the first few rows of the processed dataset
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,73,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,920,...,0,Yes,No,No,No,3,4.8,0,1,314
1,6304287,Izakaya Kikufuji,162,73,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,1111,...,0,Yes,No,No,No,3,4.5,0,1,591
2,6300002,Heat - Edsa Shangri-La,162,75,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,1671,...,0,Yes,No,No,No,4,4.4,1,5,270
3,6318506,Ooma,162,75,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,1126,...,0,No,No,No,No,4,4.9,0,1,365
4,6314302,Sambo Kojin,162,75,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,1122,...,0,Yes,No,No,No,4,4.8,0,1,229


## Step 2: Determine the Criteria for Restaurant Recommendations

### 2.1 Define User Preferences

In [5]:
# Define user preferences
user_preferences = {
    'City': 'Makati City',
    'Cuisines': 'Japanese',
    'Price range': 3
}


### 2.2 Encode User Preferences

In [6]:
# Encode user preference using the same lable encoders
for key in user_preferences.keys():
    if key in label_encoders:
        user_preferences[key] = label_encoders[key].transform([user_preferences[key]])[0]

print(user_preferences)

{'City': 73, 'Cuisines': 1111, 'Price range': 3}


## Step 3: Implement Content-Based Filtering

### 3.1 Select features for Recommendation 

In [7]:
# Select features for recommendation
features = ['City', 'Cuisines', 'Price range']


### 3.2 Create Feature Matrix

In [8]:
# Create a matrix of the selected features
feature_matrix = df[features].values

### 3.3 Calculate Cosine Similarity

In [10]:
# Import cosine_similarity
from sklearn.metrics.pairwise import cosine_similarity

# Create a user vector from preferences
user_vector = [user_preferences['City'], user_preferences['Cuisines'], user_preferences['Price range']]

# Calculate cosine similarity between the user preferences and the feature matrix
similarities = cosine_similarity([user_vector], feature_matrix)

### 3.4 Get Top Recommendations

In [11]:
# Get the top 5 restaurant indices
top_indices = similarities[0].argsort()[-5:][::-1]

# Recommend top 5 restaurants
recommended_restaurants = df.iloc[top_indices]
recommended_restaurants[['Restaurant Name', 'City', 'Cuisines', 'Price range', 'Aggregate rating']]


Unnamed: 0,Restaurant Name,City,Cuisines,Price range,Aggregate rating
1,Izakaya Kikufuji,73,1111,3,4.5
2241,A Piece of Paris,50,760,2,3.9
9392,Roti Chai,67,1020,3,4.5
42,Pesqueiro Eco Gourmet,110,1673,4,4.0
8144,Airborne,88,1343,4,0.0


## Step 4: Test the Recommendation System

### 4.1 Display Recommendations

In [12]:
# Print the recommended restaurants
recommended_restaurants = df.iloc[top_indices]
print(recommended_restaurants[['Restaurant Name', 'City', 'Cuisines', 'Price range', 'Aggregate rating']])


            Restaurant Name  City  Cuisines  Price range  Aggregate rating
1          Izakaya Kikufuji    73      1111            3               4.5
2241       A Piece of Paris    50       760            2               3.9
9392              Roti Chai    67      1020            3               4.5
42    Pesqueiro Eco Gourmet   110      1673            4               4.0
8144               Airborne    88      1343            4               0.0


## Step 5: Save the Trained Model

### 5.1 Save Label Encoders

In [13]:
import pickle

# Save label encoders
with open('label_encoders.pkl', 'wb') as file:
    pickle.dump(label_encoders, file)


### 5.2 Save Similarity Model
Since the similarity model we have used is a matrix and not a traditional ML model, we save the feature matrix and user vector instead.

In [14]:
# Save feature matrix and user vector
with open('feature_matrix.pkl', 'wb') as file:
    pickle.dump(feature_matrix, file)

with open('user_vector.pkl', 'wb') as file:
    pickle.dump(user_vector, file)


## Step 6: Load the Trained Model

### 6.1 Load Label Encoders


In [15]:
# Load label encoders
with open('label_encoders.pkl', 'rb') as file:
    label_encoders = pickle.load(file)

### 6.2 Load Similarity Model

In [16]:
# Load feature matrix and user vector
with open('feature_matrix.pkl', 'rb') as file:
    feature_matrix = pickle.load(file)

with open('user_vector.pkl', 'rb') as file:
    user_vector = pickle.load(file)


## Step 7: Use Trained Model with New Data

### 7.1 Preprocess New Data

In [17]:
# Define new user preferences
new_user_preferences = {
    'City': 'Makati City',
    'Cuisines': 'Japanese',
    'Price range': 4
}

# Encode new user preferences using the same label encoders
for key in new_user_preferences.keys():
    if key in label_encoders:
        new_user_preferences[key] = label_encoders[key].transform([new_user_preferences[key]])[0]

print(new_user_preferences)


{'City': 73, 'Cuisines': 1111, 'Price range': 4}


### 7.2 Calculate Similarity for New User Preferences

In [18]:
# Create a user vector from new preferences
new_user_vector = [new_user_preferences['City'], new_user_preferences['Cuisines'], new_user_preferences['Price range']]

# Calculate cosine similarity between the new user preferences and the feature matrix
new_similarities = cosine_similarity([new_user_vector], feature_matrix)

# Get the top 5 restaurant indices for the new user
new_top_indices = new_similarities[0].argsort()[-5:][::-1]

# Recommend top 5 restaurants for the new user
new_recommended_restaurants = df.iloc[new_top_indices]
new_recommended_restaurants[['Restaurant Name', 'City', 'Cuisines', 'Price range', 'Aggregate rating']]


Unnamed: 0,Restaurant Name,City,Cuisines,Price range,Aggregate rating
2301,AB's - Absolute Barbecues,54,818,3,4.9
2307,AB's - Absolute Barbecues,54,818,3,4.9
9377,Gymkhana,67,1015,4,4.7
2556,Soma at Sula,86,1306,4,3.3
2554,Barbeque Ville,86,1306,4,3.8
