### TASK 2

In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
data = pd.read_csv('dataset.csv')

In [3]:

# Fill missing values in 'Cuisines' with 'Unknown'
data['Cuisines'] = data['Cuisines'].fillna('Unknown')

# Fill missing values in 'Price range' with the most common value (mode)
data['Price range'] = data['Price range'].fillna(data['Price range'].mode()[0])

In [4]:
# Encode 'Cuisines' and 'Price range'
encoder = OneHotEncoder(sparse_output=False)
encoded_features = encoder.fit_transform(data[['Cuisines', 'Price range']])


In [5]:
# Create a DataFrame with the encoded features
encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(['Cuisines', 'Price range']))

# Combine encoded features with the original DataFrame
encoded_data = pd.concat([data, encoded_df], axis=1)

# Display the first few rows of the processed DataFrame
print(encoded_data.head())

   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, Ortigas, Mandaluyong City   
3      SM 

In [6]:
# Check for any remaining NaNs in the dataset
print(encoded_data.isna().sum())

# Optionally, fill remaining NaNs in numerical columns with the column mean or median
for col in encoded_data.select_dtypes(include=['float64', 'int64']).columns:
    encoded_data[col] = encoded_data[col].fillna(encoded_data[col].mean())


Restaurant ID                               0
Restaurant Name                             0
Country Code                                0
City                                        0
Address                                     0
                                           ..
Cuisines_World Cuisine, Patisserie, Cafe    0
Price range_1                               0
Price range_2                               0
Price range_3                               0
Price range_4                               0
Length: 1851, dtype: int64


In [10]:
def get_recommendations(user_preferences, encoded_data, num_recommendations=5):
    # Convert user preferences to a DataFrame and encode
    user_df = pd.DataFrame([user_preferences])
    user_vector = encoder.transform(user_df)
    
    # Compute cosine similarity between user preferences and all restaurants
    similarities = cosine_similarity(user_vector, encoded_data[encoder.get_feature_names_out(['Cuisines', 'Price range'])])
    
    # Get the indices of the top recommendations
    indices = similarities[0].argsort()[-num_recommendations:][::-1]
    
    # Return the top recommended restaurants
    return encoded_data.iloc[indices][['Restaurant Name', 'Cuisines', 'Average Cost for two', 'Aggregate rating']]


In [13]:
# Sample user preferences
sample_user_preferences = {'Cuisines': 'Italian', 'Price range': 2}

# Get recommendations for the sample user
recommendations = get_recommendations(sample_user_preferences, encoded_data)
print("Recommendations for the sample user:")
print(recommendations)


[[0.  0.  0.  ... 0.  0.  0.5]]
Recommendations for the sample user:
                     Restaurant Name Cuisines  Average Cost for two  \
3705                       Sinyora's  Italian                   500   
2070                       56 Fresca  Italian                   750   
1482                  Chilli Indiana  Italian                   500   
1106                   Da Pizza Zone  Italian                   500   
532   Mom & Dad's Italian Restaurant  Italian                    25   

      Aggregate rating  
3705               4.0  
2070               3.7  
1482               3.8  
1106               0.0  
532                3.7  
