In [8]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import NearestNeighbors
import numpy as np

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'
data = pd.read_csv('Preprocessed_Dataset.csv')

# Handle missing values using forward fill
data.ffill(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Cuisines', 'Locality', 'Price range']

for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Standardize numerical columns
scaler = StandardScaler()
data[['Aggregate rating']] = scaler.fit_transform(data[['Aggregate rating']])

# Display the processed dataset
print(data.head())

# Define user preferences
user_preferences = {
    'Cuisines': 'Italian',
    'Price range': 3,
    'Locality': 'Albany',
    'Aggregate rating': 4
}

# Encode user preferences
user_preferences_encoded = {
    'Cuisines': label_encoders['Cuisines'].transform([user_preferences['Cuisines']])[0],
    'Price range': label_encoders['Price range'].transform([user_preferences['Price range']])[0],
    'Locality': label_encoders['Locality'].transform([user_preferences['Locality']])[0],
    'Aggregate rating': scaler.transform([[user_preferences['Aggregate rating']]])[0][0]
}

# Convert user preferences to DataFrame
user_preferences_df = pd.DataFrame([user_preferences_encoded])

# Fit k-NN model
knn = NearestNeighbors(n_neighbors=5, metric='cosine')
knn.fit(data[['Cuisines', 'Price range', 'Locality', 'Aggregate rating']])

# Find nearest neighbors
distances, indices = knn.kneighbors(user_preferences_df)

# Get recommendations
recommendations = data.iloc[indices[0]]

# Display recommendations
print(recommendations)


   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  Locality  \
0  Third Floor, Century City Mall, Kalayaan Avenu...       171   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...       593   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...       308   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...       862   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...       862   

                                    Locality Verbose   Longitude   Latitude  \
0  Century City Mall, Poblacion, Makati City, Mak...  121.027535  1



In [9]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import NearestNeighbors
import numpy as np

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'
data = pd.read_csv("Preprocessed_Dataset.csv")

# Handle missing values using forward fill
data.ffill(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Cuisines', 'Locality', 'Price range']

for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Standardize numerical columns
scaler = StandardScaler()
data[['Aggregate rating']] = scaler.fit_transform(data[['Aggregate rating']])

# Display the processed dataset
print(data.head())

# Define user preferences
user_preferences = {
    'Cuisines': 'Italian',
    'Price range': 3,
    'Locality': 'Albany',
    'Aggregate rating': 4
}

# Ensure the user preference values exist in the dataset
if user_preferences['Cuisines'] not in label_encoders['Cuisines'].classes_:
    raise ValueError(f"Cuisine '{user_preferences['Cuisines']}' not found in dataset.")
if user_preferences['Price range'] not in label_encoders['Price range'].classes_:
    raise ValueError(f"Price range '{user_preferences['Price range']}' not found in dataset.")
if user_preferences['Locality'] not in label_encoders['Locality'].classes_:
    raise ValueError(f"Locality '{user_preferences['Locality']}' not found in dataset.")

# Encode user preferences
user_preferences_encoded = {
    'Cuisines': label_encoders['Cuisines'].transform([user_preferences['Cuisines']])[0],
    'Price range': label_encoders['Price range'].transform([user_preferences['Price range']])[0],
    'Locality': label_encoders['Locality'].transform([user_preferences['Locality']])[0],
    'Aggregate rating': scaler.transform(pd.DataFrame([[user_preferences['Aggregate rating']]], columns=['Aggregate rating']))[0][0]
}

# Convert user preferences to DataFrame
user_preferences_df = pd.DataFrame([user_preferences_encoded])

# Fit k-NN model
knn = NearestNeighbors(n_neighbors=5, metric='cosine')
knn.fit(data[['Cuisines', 'Price range', 'Locality', 'Aggregate rating']])

# Find nearest neighbors
distances, indices = knn.kneighbors(user_preferences_df)

# Get recommendations
recommendations = data.iloc[indices[0]]

# Display recommendations
print(recommendations)


   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  Locality  \
0  Third Floor, Century City Mall, Kalayaan Avenu...       171   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...       593   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...       308   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...       862   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...       862   

                                    Locality Verbose   Longitude   Latitude  \
0  Century City Mall, Poblacion, Makati City, Mak...  121.027535  1

In [10]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import NearestNeighbors
import numpy as np

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'
data = pd.read_csv('Preprocessed_Dataset.csv')

# Handle missing values using forward fill
data.ffill(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Cuisines', 'Locality', 'Price range']

for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Standardize numerical columns
scaler = StandardScaler()
data[['Aggregate rating']] = scaler.fit_transform(data[['Aggregate rating']])

# Display the processed dataset
print(data.head())

# Define user preferences
user_preferences = {
    'Cuisines': 'Italian',
    'Price range': 3,
    'Locality': 'Albany',
    'Aggregate rating': 4
}

# Encode user preferences
user_preferences_encoded = {
    'Cuisines': label_encoders['Cuisines'].transform([user_preferences['Cuisines']])[0],
    'Price range': label_encoders['Price range'].transform([user_preferences['Price range']])[0],
    'Locality': label_encoders['Locality'].transform([user_preferences['Locality']])[0],
    'Aggregate rating': scaler.transform([[user_preferences['Aggregate rating']]])[0][0]
}

# Convert user preferences to DataFrame
user_preferences_df = pd.DataFrame([user_preferences_encoded])

# Fit k-NN model
knn = NearestNeighbors(n_neighbors=5, metric='cosine')
knn.fit(data[['Cuisines', 'Price range', 'Locality', 'Aggregate rating']])

# Find nearest neighbors
distances, indices = knn.kneighbors(user_preferences_df)

# Get recommendations
recommendations = data.iloc[indices[0]]

# Display recommendations with additional columns
additional_columns = ['Country Code', 'City', 'Cuisines', 'Average Cost for two', 'Currency', 
                      'Price range', 'Aggregate rating', 'Votes', 'Rating color (numerical)', 
                      'Rating text (numerical)', 'Has Table booking (numerical)', 
                      'Is delivering now (numerical)', 'Switch to order menu (numerical)', 
                      'Votes_binned', 'Cost_binned', 'Rating_binned']

recommendations = data.iloc[indices[0]][additional_columns]

# Display recommendations
print(recommendations)


   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  Locality  \
0  Third Floor, Century City Mall, Kalayaan Avenu...       171   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...       593   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...       308   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...       862   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...       862   

                                    Locality Verbose   Longitude   Latitude  \
0  Century City Mall, Poblacion, Makati City, Mak...  121.027535  1



In [11]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import NearestNeighbors
import numpy as np

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'
data = pd.read_csv('Preprocessed_Dataset.csv')

# Handle missing values using forward fill
data.ffill(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Cuisines', 'Locality', 'Price range']

for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Standardize numerical columns
scaler = StandardScaler()
data[['Aggregate rating']] = scaler.fit_transform(data[['Aggregate rating']])



# Define user preferences
user_preferences = {
    'Cuisines': 'Italian',
    'Price range': 3,
    'Locality': 'Albany',
    'Aggregate rating': 4
}

# Ensure the user preference values exist in the dataset
if user_preferences['Cuisines'] not in label_encoders['Cuisines'].classes_:
    raise ValueError(f"Cuisine '{user_preferences['Cuisines']}' not found in dataset.")
if user_preferences['Price range'] not in label_encoders['Price range'].classes_:
    raise ValueError(f"Price range '{user_preferences['Price range']}' not found in dataset.")
if user_preferences['Locality'] not in label_encoders['Locality'].classes_:
    raise ValueError(f"Locality '{user_preferences['Locality']}' not found in dataset.")

# Encode user preferences
user_preferences_encoded = {
    'Cuisines': label_encoders['Cuisines'].transform([user_preferences['Cuisines']])[0],
    'Price range': label_encoders['Price range'].transform([user_preferences['Price range']])[0],
    'Locality': label_encoders['Locality'].transform([user_preferences['Locality']])[0],
    'Aggregate rating': scaler.transform(pd.DataFrame([[user_preferences['Aggregate rating']]], columns=['Aggregate rating']))[0][0]
}

# Convert user preferences to DataFrame
user_preferences_df = pd.DataFrame([user_preferences_encoded])

# Fit k-NN model
knn = NearestNeighbors(n_neighbors=5, metric='cosine')
knn.fit(data[['Cuisines', 'Price range', 'Locality', 'Aggregate rating']])

# Find nearest neighbors
distances, indices = knn.kneighbors(user_preferences_df)

# Get recommendations
additional_columns = ['Country Code', 'City', 'Cuisines', 'Average Cost for two', 'Currency', 
                      'Price range', 'Aggregate rating', 'Votes', 'Rating color (numerical)', 
                      'Rating text (numerical)', 'Has Table booking (numerical)', 
                      'Is delivering now (numerical)', 'Switch to order menu (numerical)', 
                      'Votes_binned', 'Cost_binned', 'Rating_binned']

recommendations = data.iloc[indices[0]][additional_columns]

# Display recommendations
print(recommendations)


      Country Code       City  Cuisines  Average Cost for two  \
9295            94  Tangerang      1021                300000   
2542             1     Nashik      1306                   600   
9223             1   Vadodara      1086                  1500   
2280             1   Guwahati      1424                  1400   
568            214  Abu Dhabi       837                   100   

                    Currency  Price range  Aggregate rating  Votes  \
9295  Indonesian Rupiah(IDR)            2          0.681680    155   
2542      Indian Rupees(Rs.)            2          0.615730    125   
9223      Indian Rupees(Rs.)            3          1.143331    276   
2280      Indian Rupees(Rs.)            2          0.813580    360   
568       Emirati Diram(AED)            2          1.011430    228   

      Rating color (numerical)  Rating text (numerical)  \
9295                         3                        3   
2542                         3                        3   
9223       

In [12]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import NearestNeighbors

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'
data = pd.read_csv(file_path)

# Handle missing values using forward fill
data.ffill(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Cuisines', 'Locality', 'Currency', 'Has Table booking',
                       'Has Online delivery', 'Is delivering now', 'Switch to order menu', ]

for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Standardize numerical columns
scaler = StandardScaler()
numerical_columns = ['Price range', 'Aggregate rating', 'Country Code', 'Average Cost for two',
                     'Rating color (numerical)', 'Rating text (numerical)', 'Votes']

# Ensure all numerical columns are numeric and replace any non-numeric values with NaN
for column in numerical_columns:
    data[column] = pd.to_numeric(data[column], errors='coerce')

# Drop rows with NaN values
data.dropna(subset=numerical_columns, inplace=True)

# Fit StandardScaler on numerical columns
scaler.fit(data[numerical_columns])

# Define user preferences
user_preferences = {
    'Cuisines': 'Desserts, Italian, Pizza',
    'Price range': 3,
    'Locality': 'Cedar Rapids',
    'Aggregate rating': 4,
    'Country Code': 216,
    'Average Cost for two': 10,
    'Currency': 'Dollar($)',
    'Has Table booking': 'Yes',
    'Has Online delivery': 'No',
    'Is delivering now': 'No',
    'Switch to order menu': 'No',
    'Rating color (numerical)': 4,
    'Rating text (numerical)': 3,
    'Votes': 100,
}

# Encode user preferences
user_preferences_encoded = {}

for key, value in user_preferences.items():
    if key in label_encoders:
        user_preferences_encoded[key] = label_encoders[key].transform([value])[0]
    elif key in numerical_columns:
        # Ensure to provide the value in the same shape as expected by scaler.transform
        user_preferences_encoded[key] = value  # Directly use the value for non-scaled numerical columns
    elif key in ['Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu']:
        # Assuming these are binary or categorical variables, encode them accordingly
        if value == 'Yes':
            user_preferences_encoded[key] = 1
        elif value == 'No':
            user_preferences_encoded[key] = 0
        else:
            raise ValueError(f"Unexpected value '{value}' for attribute '{key}'")

# Convert user preferences to DataFrame
user_preferences_df = pd.DataFrame([user_preferences_encoded])

# Fit k-NN model
columns_of_interest = ['Cuisines', 'Price range', 'Locality', 'Aggregate rating', 'Average Cost for two',
                       'Country Code', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now',
                       'Switch to order menu', 'Rating color (numerical)', 'Rating text (numerical)', 'Votes']

# Transform user preferences for scaling
user_preferences_scaled = scaler.transform(user_preferences_df[numerical_columns])

# Merge scaled numerical preferences with categorical preferences
user_preferences_final = user_preferences_df.copy()
user_preferences_final[numerical_columns] = user_preferences_scaled

# Fit k-NN model
knn = NearestNeighbors(n_neighbors=5, metric='cosine')
knn.fit(data[columns_of_interest])

# Find nearest neighbors
distances, indices = knn.kneighbors(user_preferences_final[columns_of_interest])

# Get recommendations
recommendations = data.iloc[indices[0]]

# Display recommendations
print(recommendations)


      Restaurant ID     Restaurant Name  Country Code       City  \
292        16613507     Flaxton Gardens            14    Flaxton   
3898       18464621  Om Ji Bhature Wale             1  New Delhi   
3325       18426904    Preet Fast Foods             1  New Delhi   
3854       18463985   RAM-G Samose Wale             1  New Delhi   
3912         308599        Chaat Corner             1  New Delhi   

                                                Address  Locality  \
292                     313 Flaxton Drive, Flaxton, QLD       328   
3898  G-6, Vardhman Royal Complex, Part 1, Gujranwal...       386   
3325  Shop 56, Edward Lane, Kingsway Camp, Main Mark...       278   
3854            C-16, Hudson Lane, GTB Nagar, New Delhi       343   
3912     Shop 6, Aurobindo Market, Hauz Khas, New Delhi       402   

                           Locality Verbose   Longitude   Latitude  Cuisines  \
292                        Flaxton, Flaxton  152.877147 -26.652133      1793   
3898            

content based filtering code--|

In [13]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import NearestNeighbors

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'
data = pd.read_csv(file_path)

# Handle missing values using forward fill
data.ffill(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Cuisines', 'Locality', 'Currency', 'Has Table booking',
                       'Has Online delivery', 'Is delivering now', 'Switch to order menu']

for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Standardize numerical columns
scaler = StandardScaler()
numerical_columns = ['Price range', 'Aggregate rating', 'Country Code', 'Average Cost for two',
                     'Rating color (numerical)', 'Rating text (numerical)', 'Votes']

# Ensure all numerical columns are numeric and replace any non-numeric values with NaN
for column in numerical_columns:
    data[column] = pd.to_numeric(data[column], errors='coerce')

# Drop rows with NaN values
data.dropna(subset=numerical_columns, inplace=True)

# Fit StandardScaler on numerical columns
scaler.fit(data[numerical_columns])  # Ensure scaler is fitted on the dataset

# Define user preferences
user_preferences = {
    'Cuisines': 'Desserts',
    'Price range': 3,
    'Locality': 'Cedar Rapids',
    'Aggregate rating': 4,
    'Country Code': 208 ,
    'Average Cost for two': 20,
    'Currency': 'Dollar($)',
    'Has Table booking': 'Yes',
    'Has Online delivery': 'No',
    'Is delivering now': 'No',
    'Switch to order menu': 'No',
    'Rating color (numerical)': 3,
    'Rating text (numerical)': 2,
    'Votes': 250,
}

# Encode user preferences
user_preferences_encoded = {}

for key, value in user_preferences.items():
    if key in label_encoders:
        user_preferences_encoded[key] = label_encoders[key].transform([value])[0]
    elif key in numerical_columns:
        # Ensure to provide the value in the same shape as expected by scaler.transform
        user_preferences_encoded[key] = value  # Directly use the value for non-scaled numerical columns
    elif key in ['Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu']:
        # Assuming these are binary or categorical variables, encode them accordingly
        if value == 'Yes':
            user_preferences_encoded[key] = 1
        elif value == 'No':
            user_preferences_encoded[key] = 0
        else:
            raise ValueError(f"Unexpected value '{value}' for attribute '{key}'")

# Convert user preferences to DataFrame
user_preferences_df = pd.DataFrame([user_preferences_encoded])

# Fit k-NN model
columns_of_interest = ['Cuisines', 'Price range', 'Locality', 'Aggregate rating', 'Average Cost for two',
                       'Country Code',  'Has Table booking', 'Has Online delivery', 'Is delivering now',
                       'Rating color (numerical)', 'Rating text (numerical)', 'Votes']

# Transform user preferences for scaling
user_preferences_scaled = scaler.transform(user_preferences_df[numerical_columns])

# Merge scaled numerical preferences with categorical preferences
user_preferences_final = user_preferences_df.copy()
user_preferences_final[numerical_columns] = user_preferences_scaled

# Fit k-NN model
knn = NearestNeighbors(n_neighbors=5, metric='cosine')
knn.fit(data[columns_of_interest])

# Find nearest neighbors
distances, indices = knn.kneighbors(user_preferences_final[columns_of_interest])

# Get recommendations
recommendations = data.iloc[indices[0]]

# Display recommendations
print(recommendations)


      Restaurant ID     Restaurant Name  Country Code       City  \
292        16613507     Flaxton Gardens            14    Flaxton   
3898       18464621  Om Ji Bhature Wale             1  New Delhi   
3912         308599        Chaat Corner             1  New Delhi   
3854       18463985   RAM-G Samose Wale             1  New Delhi   
3325       18426904    Preet Fast Foods             1  New Delhi   

                                                Address  Locality  \
292                     313 Flaxton Drive, Flaxton, QLD       328   
3898  G-6, Vardhman Royal Complex, Part 1, Gujranwal...       386   
3912     Shop 6, Aurobindo Market, Hauz Khas, New Delhi       402   
3854            C-16, Hudson Lane, GTB Nagar, New Delhi       343   
3325  Shop 56, Edward Lane, Kingsway Camp, Main Mark...       278   

                           Locality Verbose   Longitude   Latitude  Cuisines  \
292                        Flaxton, Flaxton  152.877147 -26.652133      1793   
3898            

In [None]:
content based filtering--|

In [14]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'
data = pd.read_csv(file_path)

# Handle missing values using forward fill
data.ffill(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Cuisines', 'Locality', 'Currency', 'Has Table booking',
                       'Has Online delivery', 'Is delivering now', 'Switch to order menu']

for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Standardize numerical columns
scaler = StandardScaler()
numerical_columns = ['Price range', 'Aggregate rating', 'Country Code', 'Average Cost for two',
                     'Rating color (numerical)', 'Rating text (numerical)', 'Votes']

for column in numerical_columns:
    data[[column]] = scaler.fit_transform(data[[column]])

# Define user preferences
user_preferences = {
    'Cuisines': 'Desserts, Italian, Pizza',
    'Price range': 3,
    'Locality': 'Cedar Rapids',
    'Aggregate rating': 4,
    'Country Code': 216,
    'Average Cost for two': 10,
    'Currency': 'Dollar($)',
    'Has Table booking': 'Yes',
    'Has Online delivery': 'No',
    'Is delivering now': 'No',
    'Switch to order menu': 'No',
    'Rating color (numerical)': 4,
    'Rating text (numerical)': 3,
    'Votes': 100,
}

# Encode user preferences
user_preferences_encoded = {}

for key, value in user_preferences.items():
    if key in label_encoders:
        user_preferences_encoded[key] = label_encoders[key].transform([value])[0]
    elif key in numerical_columns:
        user_preferences_encoded[key] = value  # Keep numerical values as they are for comparison

# Prepare user preferences as a DataFrame for similarity calculation
user_preferences_df = pd.DataFrame([user_preferences_encoded])

# Extract features of interest for similarity calculation
columns_of_interest = ['Cuisines', 'Price range', 'Locality', 'Aggregate rating',
                       'Country Code', 'Average Cost for two', 'Currency',
                       'Has Table booking', 'Has Online delivery', 'Is delivering now',
                       'Switch to order menu', 'Rating color (numerical)', 'Rating text (numerical)', 'Votes']

# Calculate cosine similarity between user preferences and each restaurant
data_features = data[columns_of_interest]
user_features = user_preferences_df[columns_of_interest]

similarities = cosine_similarity(user_features, data_features)

# Find indices of top similar restaurants
indices = similarities.argsort()[0][::-1]  # Sort indices in descending order of similarity

# Get top recommendations (top 5 in this case)
top_indices = indices[:5]
recommendations = data.iloc[top_indices]

# Display recommendations
print(recommendations)


     Restaurant ID                Restaurant Name  Country Code        City  \
249       17258496                   Hickory Park      3.482693  Des Moines   
82        17284404    Austin's BBQ and Oyster Bar      3.482693      Albany   
614        5600960              Al Mukhtar Bakery      3.447449     Sharjah   
615        5601340                 Aroos Damascus      3.447449     Sharjah   
89        17284409  Guang Zhou Chinese Restaurant      3.482693      Albany   

                                               Address  Locality  \
249                    1404 S Duff Ave, Ames, IA 50010        46   
82                  2820 Meredyth Dr, Albany, GA 31707        39   
614                Near Safeer Mall, Al Nahda, Sharjah        32   
615  Opposite Emirates NBD, Near First Gulf Bank, K...        34   
89              1214 N Westover Blvd, Albany, GA 31707        39   

      Locality Verbose  Longitude   Latitude  Cuisines  ...     Votes  \
249   Ames, Des Moines -93.610084  42.01025

In [16]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'
data = pd.read_csv(file_path)

# Handle missing values using forward fill
data.ffill(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Cuisines', 'Locality', 'Currency', 'Has Table booking',
                       'Has Online delivery', 'Is delivering now', 'Switch to order menu']

for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Standardize numerical columns
scaler = StandardScaler()
numerical_columns = ['Price range', 'Aggregate rating', 'Country Code', 'Average Cost for two',
                     'Rating color (numerical)', 'Rating text (numerical)', 'Votes']

for column in numerical_columns:
    data[[column]] = scaler.fit_transform(data[[column]])

# Function to get user input
def get_user_preference(prompt, options=None):
    if options:
        print(f"{prompt} ({'/'.join(options)})")
    else:
        print(prompt)
    return input()

# Collect user preferences interactively
user_preferences = {
    'Cuisines': get_user_preference('Enter preferred cuisines', options=['Desserts', 'Italian', 'Pizza']),
    'Price range': int(get_user_preference('Enter price range (1-4)')),
    'Locality': get_user_preference('Enter locality', options=['Cedar Rapids', 'Other options...']),
    'Aggregate rating': float(get_user_preference('Enter minimum aggregate rating (1.0-5.0)')),
    'Country Code': int(get_user_preference('Enter country code')),
    'Average Cost for two': float(get_user_preference('Enter average cost for two')),
    'Currency': get_user_preference('Enter currency', options=['Dollar($)', 'Other options...']),
    'Has Table booking': get_user_preference('Has table booking?', options=['Yes', 'No']),
    'Has Online delivery': get_user_preference('Has online delivery?', options=['Yes', 'No']),
    'Is delivering now': get_user_preference('Is delivering now?', options=['Yes', 'No']),
    'Switch to order menu': get_user_preference('Switch to order menu?', options=['Yes', 'No']),
    'Rating color (numerical)': float(get_user_preference('Enter rating color (numerical)')),
    'Rating text (numerical)': float(get_user_preference('Enter rating text (numerical)')),
    'Votes': int(get_user_preference('Enter number of votes')),
}

# Encode user preferences
user_preferences_encoded = {}

for key, value in user_preferences.items():
    if key in label_encoders:
        user_preferences_encoded[key] = label_encoders[key].transform([value])[0]
    elif key in numerical_columns:
        user_preferences_encoded[key] = value  # Keep numerical values as they are for comparison

# Prepare user preferences as a DataFrame for similarity calculation
user_preferences_df = pd.DataFrame([user_preferences_encoded])

# Extract features of interest for similarity calculation
columns_of_interest = ['Cuisines', 'Price range', 'Locality', 'Aggregate rating',
                       'Country Code', 'Average Cost for two', 'Currency',
                       'Has Table booking', 'Has Online delivery', 'Is delivering now',
                       'Switch to order menu', 'Rating color (numerical)', 'Rating text (numerical)', 'Votes']

# Calculate cosine similarity between user preferences and each restaurant
data_features = data[columns_of_interest]
user_features = user_preferences_df[columns_of_interest]

similarities = cosine_similarity(user_features, data_features)

# Find indices of top similar restaurants
indices = similarities.argsort()[0][::-1]  # Sort indices in descending order of similarity

# Get top recommendations (top 5 in this case)
top_indices = indices[:5]
recommendations = data.iloc[top_indices]

# Display recommendations
print(recommendations)


Enter preferred cuisines (Desserts/Italian/Pizza)


 French, Japanese, Desserts


Enter price range (1-4)


 3


Enter locality (Cedar Rapids/Other options...)


 Cedar Rapids


Enter minimum aggregate rating (1.0-5.0)


 4


Enter country code


 216


Enter average cost for two


 10


Enter currency (Dollar($)/Other options...)


 Dollar($)


Has table booking? (Yes/No)


 Yes


Has online delivery? (Yes/No)


 No


Is delivering now? (Yes/No)


 No


Switch to order menu? (Yes/No)


 No


Enter rating color (numerical)


 4


Enter rating text (numerical)


 3


Enter number of votes


 100


      Restaurant ID              Restaurant Name  Country Code         City  \
614         5600960            Al Mukhtar Bakery      3.447449      Sharjah   
82         17284404  Austin's BBQ and Oyster Bar      3.482693       Albany   
9517        6001980                  Timboo Cafe      3.341718       Ankara   
249        17258496                 Hickory Park      3.482693   Des Moines   
18         18255654   Hobing Korean Dessert Cafe      2.531111  Taguig City   

                                                Address  Locality  \
614                 Near Safeer Mall, Al Nahda, Sharjah        32   
82                   2820 Meredyth Dr, Albany, GA 31707        39   
9517  Armada AVM, Kat -1, Eskiehir Yolu, No 6, Yenim...        64   
249                     1404 S Duff Ave, Ames, IA 50010        46   
18    Third Floor, BGC Stopover Pavillion, Rizal Dri...        84   

                                       Locality Verbose   Longitude  \
614                                   A

In [17]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'
data = pd.read_csv(file_path)

# Handle missing values using forward fill
data.ffill(inplace=True)

# Convert all categorical data to lowercase for case insensitivity
data['Cuisines'] = data['Cuisines'].str.lower()
data['Locality'] = data['Locality'].str.lower()
data['Currency'] = data['Currency'].str.lower()
data['Has Table booking'] = data['Has Table booking'].str.lower()
data['Has Online delivery'] = data['Has Online delivery'].str.lower()
data['Is delivering now'] = data['Is delivering now'].str.lower()
data['Switch to order menu'] = data['Switch to order menu'].str.lower()

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Cuisines', 'Locality', 'Currency', 'Has Table booking',
                       'Has Online delivery', 'Is delivering now', 'Switch to order menu']

for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Standardize numerical columns
scaler = StandardScaler()
numerical_columns = ['Price range', 'Aggregate rating', 'Country Code', 'Average Cost for two',
                     'Rating color (numerical)', 'Rating text (numerical)', 'Votes']

for column in numerical_columns:
    data[[column]] = scaler.fit_transform(data[[column]])

# Function to get user input
def get_user_input():
    user_preferences = {}
    
    user_preferences['Cuisines'] = input("Enter preferred cuisines (e.g., Desserts, Italian, Pizza): ").lower()
    user_preferences['Price range'] = int(input("Enter price range (e.g., 1, 2, 3, 4): "))
    user_preferences['Locality'] = input("Enter preferred locality: ").lower()
    user_preferences['Aggregate rating'] = float(input("Enter minimum aggregate rating (e.g., 4.0): "))
    user_preferences['Country Code'] = int(input("Enter country code (e.g., 216): "))
    user_preferences['Average Cost for two'] = float(input("Enter average cost for two (e.g., 50): "))
    user_preferences['Currency'] = input("Enter currency (e.g., Dollar($)): ").lower()
    user_preferences['Has Table booking'] = input("Should it have table booking (Yes/No): ").lower()
    user_preferences['Has Online delivery'] = input("Should it have online delivery (Yes/No): ").lower()
    user_preferences['Is delivering now'] = input("Should it be delivering now (Yes/No): ").lower()
    user_preferences['Switch to order menu'] = input("Should it have switch to order menu (Yes/No): ").lower()
    user_preferences['Rating color (numerical)'] = int(input("Enter rating color (numerical): "))
    user_preferences['Rating text (numerical)'] = int(input("Enter rating text (numerical): "))
    user_preferences['Votes'] = int(input("Enter minimum votes: "))
    
    return user_preferences

# Get user preferences
user_preferences = get_user_input()

# Encode user preferences
user_preferences_encoded = {}

for key, value in user_preferences.items():
    if key in label_encoders:
        user_preferences_encoded[key] = label_encoders[key].transform([value])[0]
    elif key in numerical_columns:
        user_preferences_encoded[key] = value  # Keep numerical values as they are for comparison

# Prepare user preferences as a DataFrame for similarity calculation
user_preferences_df = pd.DataFrame([user_preferences_encoded])

# Extract features of interest for similarity calculation
columns_of_interest = ['Cuisines', 'Price range', 'Locality', 'Aggregate rating',
                       'Country Code', 'Average Cost for two', 'Currency',
                       'Has Table booking', 'Has Online delivery', 'Is delivering now',
                       'Switch to order menu', 'Rating color (numerical)', 'Rating text (numerical)', 'Votes']

# Calculate cosine similarity between user preferences and each restaurant
data_features = data[columns_of_interest]
user_features = user_preferences_df[columns_of_interest]

similarities = cosine_similarity(user_features, data_features)

# Find indices of top similar restaurants
indices = similarities.argsort()[0][::-1]  # Sort indices in descending order of similarity

# Get top recommendations (top 5 in this case)
top_indices = indices[:5]
recommendations = data.iloc[top_indices]

# Display recommendations
print(recommendations)


Enter preferred cuisines (e.g., Desserts, Italian, Pizza):  French, Japanese, Desserts
Enter price range (e.g., 1, 2, 3, 4):  3
Enter preferred locality:  Century City Mall, Poblacion, Makati City
Enter minimum aggregate rating (e.g., 4.0):  3
Enter country code (e.g., 216):  216
Enter average cost for two (e.g., 50):  50
Enter currency (e.g., Dollar($)):  Dollar($)
Should it have table booking (Yes/No):  Yes
Should it have online delivery (Yes/No):  No
Should it be delivering now (Yes/No):  No
Should it have switch to order menu (Yes/No):  No
Enter rating color (numerical):  2
Enter rating text (numerical):  3
Enter minimum votes:  314


      Restaurant ID              Restaurant Name  Country Code        City  \
249        17258496                 Hickory Park      3.482693  Des Moines   
614         5600960            Al Mukhtar Bakery      3.447449     Sharjah   
82         17284404  Austin's BBQ and Oyster Bar      3.482693      Albany   
1252         308022                   Farzi Cafe     -0.306015     Gurgaon   
615         5601340               Aroos Damascus      3.447449     Sharjah   

                                                Address  Locality  \
249                     1404 S Duff Ave, Ames, IA 50010        45   
614                 Near Safeer Mall, Al Nahda, Sharjah        31   
82                   2820 Meredyth Dr, Albany, GA 31707        38   
1252  7-8, Ground Floor, Cyber Hub, DLF Cyber City, ...       240   
615   Opposite Emirates NBD, Near First Gulf Bank, K...        33   

                        Locality Verbose  Longitude   Latitude  Cuisines  ...  \
249                     Ames, Des M

In [19]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load the dataset
file_path = 'Preprocessed_Dataset.csv'
data = pd.read_csv(file_path)

# Handle missing values using forward fill
data.ffill(inplace=True)

# Convert all categorical data to lowercase for case insensitivity
data['Cuisines'] = data['Cuisines'].str.lower()
data['Locality'] = data['Locality'].str.lower()
data['Currency'] = data['Currency'].str.lower()
data['Has Table booking'] = data['Has Table booking'].str.lower()
data['Has Online delivery'] = data['Has Online delivery'].str.lower()
data['Is delivering now'] = data['Is delivering now'].str.lower()
data['Switch to order menu'] = data['Switch to order menu'].str.lower()
data['City'] = data['City'].str.lower()

# Encode categorical variables
label_encoders = {}
categorical_columns = ['Cuisines', 'Locality', 'City', 'Currency', 'Has Table booking',
                       'Has Online delivery', 'Is delivering now', 'Switch to order menu']
for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Standardize numerical columns
scaler = StandardScaler()
numerical_columns = ['Price range', 'Aggregate rating', 'Country Code', 'Average Cost for two',
                     'Rating color (numerical)', 'Rating text (numerical)', 'Votes']

for column in numerical_columns:
    data[[column]] = scaler.fit_transform(data[[column]])

# Function to get user input
def get_user_input():
    user_preferences = {}
    
    user_preferences['Cuisines'] = input("Enter preferred cuisines (e.g., Desserts, Italian, Pizza): ").lower()
    user_preferences['Price range'] = int(input("\nEnter price range (e.g., 1, 2, 3, 4): "))
    user_preferences['City'] = input("\nEnter preferred city ('makati city', 'mandaluyong city', 'pasay city', 'pasig city', 'quezon city',"    
        "'san juan city', 'santa rosa', 'tagaytay city', 'taguig city', 'brasília', 'rio de janeiro', 'são paulo',"
        "'albany', 'armidale', 'athens', 'augusta', 'balingup', 'beechworth', 'boise', 'cedar rapids/iowa city',"
        "'chatham-kent', 'clatskanie', 'cochrane', 'columbus', 'consort', 'dalton', 'davenport', 'des moines',"
        "'dicky beach', 'dubuque', 'east ballina', 'fernley', 'flaxton', 'forrest', 'gainesville', 'hepburn springs',"
        "'huskisson', 'inverloch', 'lakes entrance', 'lakeview', 'lincoln', 'lorn', 'macedon', 'macon', 'mayfield',"
        "'mc millan', 'middleton beach', 'miller', 'monroe', 'montville', 'ojo caliente', 'orlando', 'palm cove',"
        "'paynesville', 'penola', 'pensacola', 'phillip island', 'pocatello', 'potrero', 'princeton', 'rest of hawaii',"
        "'savannah', 'singapore', 'sioux city', 'tampa bay', 'tanunda', 'trentham east', 'valdosta', 'vernonia',"
        "'victor harbor', 'vineland station', 'waterloo', 'weirton', 'winchester bay', 'yorkton', 'abu dhabi', 'dubai',"
        "'sharjah', 'agra', 'ahmedabad', 'allahabad', 'amritsar', 'aurangabad', 'bangalore', 'bhopal', 'bhubaneshwar',"
        "'chandigarh', 'chennai', 'coimbatore', 'dehradun', 'faridabad', 'ghaziabad', 'goa', 'gurgaon', 'guwahati',"
        "'hyderabad', 'indore', 'jaipur', 'kanpur', 'kochi', 'kolkata', 'lucknow', 'ludhiana', 'mangalore', 'mohali',"
        "'mumbai', 'mysore', 'nagpur', 'nashik', 'new delhi', 'noida', 'panchkula', 'patna', 'puducherry', 'pune',"
        "'ranchi', 'secunderabad', 'surat', 'vadodara', 'varanasi', 'vizag', 'bandung', 'bogor', 'jakarta', 'tangerang',"
        "'auckland', 'wellington city', 'birmingham', 'edinburgh', 'london', 'manchester', 'doha', 'cape town',"
        "'inner city', 'johannesburg', 'pretoria', 'randburg', 'sandton', 'colombo', 'ankara', 'istanbul'): " ).lower()
    user_preferences['Locality'] = input("\nEnter preferred locality: ").lower()
    user_preferences['Aggregate rating'] = float(input("\nEnter minimum aggregate rating (0-5.0): "))
    user_preferences['Country Code'] = int(input("\nEnter country code (162, 30, 216, 14, 37, 184, 214, 1, 94, 148, 215, 166, 189, 191, 208): "))
    user_preferences['Average Cost for two'] = float(input("\nEnter average cost for two (0-800000): "))
    user_preferences['Currency'] = input("Enter currency ('botswana pula(p)', 'brazilian real(r$)', 'dollar($)', 'emirati dirham(aed)',"
        "'indian rupees(rs.)', 'indonesian rupiah(idr)', 'newzealand($)', 'pounds(£)', 'qatari rial(qr)',"
        "'rand(r)', 'sri lankan rupee(lkr)', 'turkish lira(tl)')" ).lower()
    user_preferences['Has Table booking'] = input("\nShould it have table booking (Yes/No): ").lower()
    user_preferences['Has Online delivery'] = input("\nShould it have online delivery (Yes/No): ").lower()
    user_preferences['Is delivering now'] = input("\nShould it be delivering now (Yes/No): ").lower()
    user_preferences['Switch to order menu'] = input("\nShould it have switch to order menu (Yes/No): ").lower()
    user_preferences['Rating color (numerical)'] = int(input("\nEnter rating color (0-5): "))
    user_preferences['Rating text (numerical)'] = int(input("\nEnter rating text (0-5): "))
    user_preferences['Votes'] = int(input("\nEnter minimum votes: "))
    
    return user_preferences

# Get user preferences
user_preferences = get_user_input()

# Encode user preferences
user_preferences_encoded = {}

for key, value in user_preferences.items():
    if key in label_encoders:
        user_preferences_encoded[key] = label_encoders[key].transform([value])[0]
    elif key in numerical_columns:
        user_preferences_encoded[key] = value  # Keep numerical values as they are for comparison

# Prepare user preferences as a DataFrame for similarity calculation
user_preferences_df = pd.DataFrame([user_preferences_encoded])

# Extract features of interest for similarity calculation
columns_of_interest = ['Cuisines', 'Price range', 'Locality', 'Aggregate rating',
                       'Country Code', 'Average Cost for two', 'Currency',
                       'Has Table booking', 'Has Online delivery', 'Is delivering now',
                       'Switch to order menu', 'Rating color (numerical)', 'Rating text (numerical)', 'Votes']

# Calculate cosine similarity between user preferences and each restaurant
data_features = data[columns_of_interest]
user_features = user_preferences_df[columns_of_interest]

similarities = cosine_similarity(user_features, data_features)

# Find indices of top similar restaurants
indices = similarities.argsort()[0][::-1]  # Sort indices in descending order of similarity

# Get top recommendations (top 5 in this case)
top_indices = indices[:5]
recommendations = data.iloc[top_indices]

# Display recommendations
print(recommendations)


Enter preferred cuisines (e.g., Desserts, Italian, Pizza):  Desserts, Italian, Pizza

Enter price range (e.g., 1, 2, 3, 4):  3

Enter preferred city ('makati city', 'mandaluyong city', 'pasay city', 'pasig city', 'quezon city','san juan city', 'santa rosa', 'tagaytay city', 'taguig city', 'brasília', 'rio de janeiro', 'são paulo','albany', 'armidale', 'athens', 'augusta', 'balingup', 'beechworth', 'boise', 'cedar rapids/iowa city','chatham-kent', 'clatskanie', 'cochrane', 'columbus', 'consort', 'dalton', 'davenport', 'des moines','dicky beach', 'dubuque', 'east ballina', 'fernley', 'flaxton', 'forrest', 'gainesville', 'hepburn springs','huskisson', 'inverloch', 'lakes entrance', 'lakeview', 'lincoln', 'lorn', 'macedon', 'macon', 'mayfield','mc millan', 'middleton beach', 'miller', 'monroe', 'montville', 'ojo caliente', 'orlando', 'palm cove','paynesville', 'penola', 'pensacola', 'phillip island', 'pocatello', 'potrero', 'princeton', 'rest of hawaii','savannah', 'singapore', 'sioux city

      Restaurant ID    Restaurant Name  Country Code  City  \
249        17258496       Hickory Park      3.482693    36   
9283        7402935               Skye      1.332821    59   
615         5601340     Aroos Damascus      3.447449   117   
614         5600960  Al Mukhtar Bakery      3.447449   117   
121        17293873       Trappeze Pub      3.482693     8   

                                                Address  Locality  \
249                     1404 S Duff Ave, Ames, IA 50010        45   
9283  Menara BCA, Lantai 56, Jl. MH. Thamrin, Thamri...       375   
615   Opposite Emirates NBD, Near First Gulf Bank, K...        33   
614                 Near Safeer Mall, Al Nahda, Sharjah        31   
121                     269 N Hull St, Athens, GA 30601        77   

                            Locality Verbose   Longitude   Latitude  Cuisines  \
249                         Ames, Des Moines  -93.610084  42.010254       216   
9283  Grand Indonesia Mall, Thamrin, Jakarta  106.