In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import KFold


In [2]:
df=pd.read_csv('clean_swiggy_data.csv',encoding='latin1')

In [3]:
df.head()

Unnamed: 0,restaurant_name,ratings,cuisines,pricing,address,distance,city,pricing_impute
0,kfc,4.4,"burgers, fast food, rolls & wraps",80.0,vallabh vidyanagar,38.0,anand,80
1,pizza hut,4.3,pizzas,189.0,hadgood,42.0,anand,189
2,subway,4.4,"healthy food, salads, fast food",,mahadev nagar,32.0,anand,100
3,mcdonald's,4.4,"burgers, beverages, cafe, desserts",179.0,vallabh vidyanagar,32.0,anand,179
4,the belgian waffle co.,4.6,"waffle, desserts, ice cream, beverages",,triveni arcade,38.0,anand,49


In [4]:
df=df.drop(columns='pricing',axis=1)

In [5]:
df['ratings']=df['ratings'].round(1)
df['distance']=df['distance'].round(2)

In [6]:
df

Unnamed: 0,restaurant_name,ratings,cuisines,address,distance,city,pricing_impute
0,kfc,4.4,"burgers, fast food, rolls & wraps",vallabh vidyanagar,38.0,anand,80
1,pizza hut,4.3,pizzas,hadgood,42.0,anand,189
2,subway,4.4,"healthy food, salads, fast food",mahadev nagar,32.0,anand,100
3,mcdonald's,4.4,"burgers, beverages, cafe, desserts",vallabh vidyanagar,32.0,anand,179
4,the belgian waffle co.,4.6,"waffle, desserts, ice cream, beverages",triveni arcade,38.0,anand,49
...,...,...,...,...,...,...,...
4145,ganga gayatri samosa,4.3,"fast food, street food, snacks",bus stand,38.0,nadiyad,40
4146,bombay chopati bhelpuri center,4.4,"chaat, snacks",nadiyad,38.0,nadiyad,40
4147,janta paubhaji pulav ice cream center,4.0,indian,nadiyad,47.0,nadiyad,80
4148,bao box,4.2,"chinese, indian, asian",nadiyad,45.6,nadiyad,100


In [7]:
df[df.duplicated()]

Unnamed: 0,restaurant_name,ratings,cuisines,address,distance,city,pricing_impute
21,vadilal hangout,4.7,ice cream,karamsad,28.0,anand,80
25,grameen kulfi,4.6,"ice cream, desserts",vallabh vidyanagar,28.0,anand,120
27,jassi de parathe,4.6,"north indian, snacks, chaat",vallabh vidyanagar,32.0,anand,40
28,taco bell,4.3,"mexican, fast food, snacks",sojitra road,38.0,anand,80
29,starbucks coffee,4.4,"beverages, cafe, snacks, desserts, bakery, ice...",anand sojitra road,38.0,anand,100
30,cafe coffee day,4.5,"beverages, cafe, snacks, desserts, burgers, ic...",vallabh vidyanagar,32.0,anand,100
31,la pino'z pizza,4.3,"pizzas, pastas, italian, desserts, beverages",maruti solaris annex,42.0,anand,120
32,gourmet ice cream cakes by baskin robbins,4.8,"ice cream cakes, desserts, ice cream, bakery",vallabh vidyanagar,38.0,anand,80
34,barista coffee,5.0,"beverages, fast food, snacks, biryani, pastas,...",shah heights,38.0,anand,129
232,domino's pizza,4.2,"pizzas, italian, pastas, desserts",fatehgunj,22.0,vadodara,100


In [8]:
df[(df['restaurant_name']=='grameen kulfi') & (df['city']=='anand')]

Unnamed: 0,restaurant_name,ratings,cuisines,address,distance,city,pricing_impute
12,grameen kulfi,4.6,"ice cream, desserts",vallabh vidyanagar,28.0,anand,120
25,grameen kulfi,4.6,"ice cream, desserts",vallabh vidyanagar,28.0,anand,120


In [9]:
df=df.drop_duplicates()

In [10]:
df.shape

(4110, 7)

In [11]:
df.duplicated().sum()

0

In [12]:
df[(df['restaurant_name']=="al - shahi") & (df['city']=='mehsana')]

Unnamed: 0,restaurant_name,ratings,cuisines,address,distance,city,pricing_impute
2434,al - shahi,4.0,"mughlai, biryani",mehsana,22.0,mehsana,50
2454,al - shahi,4.0,"mughlai, biryani",mehsana,22.0,mehsana,100


In [13]:
df = df.drop_duplicates(subset=['restaurant_name', 'ratings', 'cuisines', 'address', 'distance', 'city'],keep='last')


In [14]:
df[(df['restaurant_name']=="al - shahi") & (df['city']=='mehsana')]

Unnamed: 0,restaurant_name,ratings,cuisines,address,distance,city,pricing_impute
2454,al - shahi,4.0,"mughlai, biryani",mehsana,22.0,mehsana,100


In [15]:
dfc=df.copy()

In [16]:
dfc.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
4145    False
4146    False
4147    False
4148    False
4149    False
Length: 4092, dtype: bool

In [17]:
dfc['cuisines']

0            burgers, fast food, rolls & wraps
1                                       pizzas
2              healthy food, salads, fast food
3           burgers, beverages, cafe, desserts
4       waffle, desserts, ice cream, beverages
                         ...                  
4145            fast food, street food, snacks
4146                             chaat, snacks
4147                                    indian
4148                    chinese, indian, asian
4149                      beverages, fast food
Name: cuisines, Length: 4092, dtype: object

In [18]:
import re
# remove the punctuation except commas
dfc['cuisines'] = dfc['cuisines'].apply(lambda x: re.sub(r'[^\w\s,]', '', x) if isinstance(x, str) else x)

In [19]:
dfc['cuisines']

0             burgers, fast food, rolls  wraps
1                                       pizzas
2              healthy food, salads, fast food
3           burgers, beverages, cafe, desserts
4       waffle, desserts, ice cream, beverages
                         ...                  
4145            fast food, street food, snacks
4146                             chaat, snacks
4147                                    indian
4148                    chinese, indian, asian
4149                      beverages, fast food
Name: cuisines, Length: 4092, dtype: object

In [20]:
def remove_space(text):
    # Splitting by comma to separate different cuisines
    cuisines = re.split(r'\s*,\s*', text)
    # Removing extra spaces and special characters, then joining words within each cuisine
    cleaned_cuisines = []
    for cuisine in cuisines:
        cleaned_cuisine = re.sub(r'\s+', '', cuisine) if len(cuisine.split()) > 1 else cuisine
        # this split the each word fast food as 'fast', 'food' then it check if word is > 1 means (fast,food) 2 words so >1 
        # then it will remove the space otherwise returns the as it is 
        cleaned_cuisines.append(cleaned_cuisine)
    # Joining the cleaned cuisines back together
    return ', '.join(cleaned_cuisines)

In [21]:
dfc['cuisines'] = dfc['cuisines'].apply(remove_space)

In [22]:
dfc['cuisines']

0               burgers, fastfood, rollswraps
1                                      pizzas
2               healthyfood, salads, fastfood
3          burgers, beverages, cafe, desserts
4       waffle, desserts, icecream, beverages
                        ...                  
4145             fastfood, streetfood, snacks
4146                            chaat, snacks
4147                                   indian
4148                   chinese, indian, asian
4149                      beverages, fastfood
Name: cuisines, Length: 4092, dtype: object

In [23]:
dfc['city'].value_counts()

city
gandhinagar    1554
ahmedabad      1543
vadodara        642
anand           200
nadiyad          80
mehsana          73
Name: count, dtype: int64

In [24]:
def recommedations(city,cuisines_weight=0.5,ratings_weight=0.3,distance_weight=0.2):
    df_city=dfc[dfc['city']==city]
    
    scaler = MinMaxScaler()
    #scaler=StandardScaler()
    df_city.loc[:,['ratings','distance']]=scaler.fit_transform(df_city[['ratings','distance']])
    
    tfidf=TfidfVectorizer()
    cuisine_metric=tfidf.fit_transform(df_city['cuisines'])
    
    rating_similarity=cosine_similarity(df_city[['ratings']],df_city[['ratings']])
    #print('before: ',df['ratings'].var())
    #print('after : ',df_city['ratings'].var())
    # var() is low so closer values that means ones metric
    distance_similarity=cosine_similarity(df_city[['distance']],df_city[['distance']])
     # Adjust distance similarity for self-comparisons
    np.fill_diagonal(distance_similarity, 1.0)
    cuisine_similarity=cosine_similarity(cuisine_metric,cuisine_metric)
    
    weighted_similarity = (cuisines_weight * cuisine_similarity +
                                ratings_weight * rating_similarity +
                                distance_weight * distance_similarity)
    
    #print("Rating Similarity:\n", rating_similarity)
    #print("Distance Similarity:\n", distance_similarity)
    #print("Cuisine Similarity:\n", cuisine_similarity)
    #print('weighted_similarity:\n',weighted_similarity)

    
    similarity_df = pd.DataFrame(weighted_similarity, columns=df_city['restaurant_name'], index=df_city['restaurant_name'])
    
    return similarity_df
    
    
  
    #print(city_combined_similarity)
    

In [25]:
sm=recommedations(city='mehsana')

In [26]:
sm.head(10)

restaurant_name,navjivan restaurant,real paprika,neelkanth restaurant,hotel tulsi,krishna restaurant,london yard pizza,sankalp restaurant,sam's pizza,mayur restaurant,shakti - the sandwich shop,...,jay ambe restaurant,navdurga bhajipav,sejan chinese & punjabi,pariwar restaurant,health point fruit & juice bar,bansi kathiyawadi,rudra fastfood & punjabi,vicky fry centre,bhavika restaurant,the chai theka specialist of chai
restaurant_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
navjivan restaurant,1.0,0.453117,0.8,0.49675,0.649461,0.3,0.378448,0.3,0.686401,0.381747,...,0.752967,0.3,0.432446,0.367174,0.371145,0.3,0.428682,0.3,0.468947,0.355267
real paprika,0.453117,1.0,0.653117,0.67502,0.719076,0.832678,0.697819,0.730736,0.700012,0.60975,...,0.606984,0.5,0.637989,0.542111,0.5446,0.5,0.634068,0.5,0.605912,0.534647
neelkanth restaurant,0.8,0.653117,1.0,0.69675,0.849461,0.5,0.578448,0.5,0.886401,0.581747,...,0.952967,0.5,0.632446,0.567174,0.571145,0.5,0.628682,0.5,0.668947,0.555267
hotel tulsi,0.49675,0.67502,0.69675,1.0,0.781505,0.57767,0.841386,0.611986,0.652049,0.641025,...,0.637471,0.585165,0.677311,0.63359,0.55731,0.5,0.672273,0.605511,0.709458,0.54452
krishna restaurant,0.649461,0.719076,0.849461,0.781505,1.0,0.5,0.612242,0.5,0.770065,0.616961,...,0.744172,0.5,0.6895,0.596111,0.601792,0.5,0.684115,0.5,0.741725,0.579075
london yard pizza,0.3,0.832678,0.5,0.57767,0.5,1.0,0.7234,0.846787,0.622766,0.587928,...,0.5,0.5,0.582601,0.5,0.5,0.5,0.580254,0.5,0.5,0.5
sankalp restaurant,0.378448,0.697819,0.578448,0.841386,0.612242,0.7234,1.0,0.8221,0.674651,0.653207,...,0.5,0.5,0.576721,0.558786,0.56226,0.5,0.574541,0.5,0.554263,0.548366
sam's pizza,0.3,0.730736,0.5,0.611986,0.5,0.846787,0.8221,1.0,0.677004,0.626775,...,0.5,0.5,0.619095,0.5,0.5,0.5,0.615711,0.5,0.5,0.5
mayur restaurant,0.686401,0.700012,0.886401,0.652049,0.770065,0.622766,0.674651,0.677004,1.0,0.681995,...,0.850054,0.5,0.713977,0.551912,0.658391,0.5,0.599446,0.5,0.630563,0.623043
shakti - the sandwich shop,0.381747,0.60975,0.581747,0.641025,0.616961,0.587928,0.653207,0.626775,0.681995,1.0,...,0.5,0.5,0.711663,0.561257,0.686904,0.5,0.577675,0.5,0.556545,0.645193


In [27]:
sm['navjivan restaurant']

restaurant_name
navjivan restaurant                  1.000000
real paprika                         0.453117
neelkanth restaurant                 0.800000
hotel tulsi                          0.496750
krishna restaurant                   0.649461
                                       ...   
bansi kathiyawadi                    0.300000
rudra fastfood & punjabi             0.428682
vicky fry centre                     0.300000
bhavika restaurant                   0.468947
the chai theka specialist of chai    0.355267
Name: navjivan restaurant, Length: 73, dtype: float64

In [28]:
dfc[(dfc['restaurant_name']=='navjivan restaurant') & (dfc['city']=='mehsana')]

Unnamed: 0,restaurant_name,ratings,cuisines,address,distance,city,pricing_impute
2418,navjivan restaurant,4.4,"chinese, biryani, beverages",radhakrishna township,18.0,mehsana,80


In [29]:
dfc[(dfc['restaurant_name']=='tasty meals restaurant & banquet') & (dfc['city']=='ahmedabad')]

Unnamed: 0,restaurant_name,ratings,cuisines,address,distance,city,pricing_impute
2415,tasty meals restaurant & banquet,4.2,"indian, chinese, thai, mexican, continental, p...",naroda,31.6,ahmedabad,120


In [30]:
dfc[(dfc['restaurant_name']=="sam's pizza") & (dfc['city']=='mehsana')]

Unnamed: 0,restaurant_name,ratings,cuisines,address,distance,city,pricing_impute
2425,sam's pizza,4.6,"pizzas, desserts",mehsana,28.0,mehsana,40
2440,sam's pizza,4.6,pizzas,mehsana,28.0,mehsana,115


In [31]:
dfc[(dfc['restaurant_name']=="gj 25 china town restaurant") & (dfc['city']=='ahmedabad')].index[0]

2416

In [32]:
dfc = dfc.groupby(['restaurant_name', 'ratings', 'address', 'distance', 'city'], as_index=False).agg({
    'cuisines': lambda x: ', '.join(sorted(set(x))),
    'pricing_impute': 'mean'  # or any other aggregation you want for numeric columns
     })


In [33]:
def r(city, user_cuisines=None, num_recommendations=5, cuisines_weight=0.5, ratings_weight=0.3, distance_weight=0.2):
    
    
    df_city = dfc[dfc['city'] == city]
    
    # Scale ratings and distance
    scaler = MinMaxScaler()
    df_city.loc[:,['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])
    
    
    # Compute TF-IDF for cuisines
    tfidf = TfidfVectorizer()
    cuisine_metric = tfidf.fit_transform(df_city['cuisines'])
    
     # If user_cuisines are provided, ensure they exist in the dataset
    if user_cuisines:
        available_cuisines = df_city['cuisines'].str.lower().unique()
        user_cuisines_lower = [c.lower() for c in user_cuisines]
        
        # Check if any of the requested cuisines exist in the dataset
        if not any(cuisine in ', '.join(available_cuisines) for cuisine in user_cuisines_lower):
            return f"No restaurants offering the requested cuisines: {', '.join(user_cuisines)}."
        
    
    # Compute similarity matrices
    rating_similarity = cosine_similarity(df_city[['ratings']], df_city[['ratings']])
    distance_similarity = cosine_similarity(df_city[['distance']], df_city[['distance']])
    np.fill_diagonal(distance_similarity, 1.0)  # Adjust distance similarity for self-comparisons
    cuisine_similarity = cosine_similarity(cuisine_metric, cuisine_metric)
    
    # Apply user preferences to cuisine similarity
    if user_cuisines:
        user_cuisines_str = ', '.join(user_cuisines)
        user_cuisine_vector = tfidf.transform([user_cuisines_str])
        user_cuisine_similarity = cosine_similarity(cuisine_metric, user_cuisine_vector).flatten()
        
        # Boost cuisine similarity for matching restaurants
        cuisine_similarity *= user_cuisine_similarity[:, np.newaxis]
    
    # Calculate weighted similarity
    weighted_similarity = (cuisines_weight * cuisine_similarity +
                           ratings_weight * rating_similarity +
                           distance_weight * distance_similarity)
    
    # Create similarity DataFrame
    similarity_df = pd.DataFrame(weighted_similarity, columns=df_city['restaurant_name'], index=df_city['restaurant_name'])
    
    # Aggregate similarity scores for all restaurants
    aggregated_scores = similarity_df.sum(axis=0)
    
    # Get the top N recommendations based on aggregated scores
    top_recommendations_overall = aggregated_scores.nlargest(num_recommendations).index.tolist()
    
    
    return top_recommendations_overall


# r(city='mehsana', user_cuisines=['chinese'])


In [34]:
r(city='mehsana',user_cuisines=['Pizzas'])

['jay ambe fast food',
 'hungritos',
 '7 heart pizzeria',
 'london wraps',
 'zorko brand of food lovers']

In [35]:
recommendations = r(city='mehsana', user_cuisines=['chinese', 'italian'])
unique_cuisines = dfc[dfc['restaurant_name'].isin(recommendations)]['cuisines'].unique()
print("Unique cuisines in recommendations:", unique_cuisines)

# Ensure diversity of cuisines is respected
assert len(unique_cuisines) > 1, "Recommendations are not diverse!"


Unique cuisines in recommendations: ['chinese, punjabi, beverages, snacks, pizzas, fastfood'
 'southindian, chinese, fastfood, pizzas, beverages' 'chinese, beverages'
 'biryani, chinese, snacks, desserts, beverages'
 'chinese, biryani, beverages']


In [36]:
# Test with an unknown cuisine
#recommendations = r(city='mehsana', user_cuisines=['exotic'])
#print("Recommendations for unknown cuisine:", recommendations)

# Test with a city that has few or no restaurants
recommendations = r(city='mehsana', user_cuisines=['chaat'])
print("Recommendations for an unknown city:", recommendations)


Recommendations for an unknown city: ['brothers cafe', "mcdonald's", "mcdonald's gourmet burger collection", "bhukhkhad's cafe", 'food costa']


In [37]:
dfc[(dfc['restaurant_name']=="mcdonald's gourmet burger collection") & (dfc['city']=='mehsana')][['restaurant_name','ratings','address','distance','city','cuisines','pricing_impute']]

Unnamed: 0,restaurant_name,ratings,address,distance,city,cuisines,pricing_impute
2133,mcdonald's gourmet burger collection,4.5,mehsana,22.0,mehsana,"burgers, beverages, cafe, desserts",80.0


In [38]:
dfc.shape

(4091, 7)

In [39]:
dfc.duplicated().sum()


0

In [40]:
#dfc.to_excel("C:\\Users\\Dell\\Downloads\\final_web_data_swiggy.xlsx",index=False)

In [41]:
dfc['city'].value_counts()

city
gandhinagar    1554
ahmedabad      1543
vadodara        642
anand           200
nadiyad          80
mehsana          72
Name: count, dtype: int64

In [42]:
def r2(city, user_cuisines=None, num_recommendations=5, cuisines_weight=0.5, ratings_weight=0.4, distance_weight=0.0):
    df_city = dfc[dfc['city'] == city]
    
    df_city['ratings_origin']=df_city['ratings']
    df_city['distance_origin']=df_city['distance']
    
    # Scale ratings and distance
    scaler = MinMaxScaler()
    df_city.loc[:,['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])
    
    # Compute TF-IDF for cuisines
    tfidf = TfidfVectorizer()
    cuisine_metric = tfidf.fit_transform(df_city['cuisines'])
    
    # Compute similarity matrices
    rating_similarity = cosine_similarity(df_city[['ratings']], df_city[['ratings']])
    distance_similarity = cosine_similarity(df_city[['distance']], df_city[['distance']])
    np.fill_diagonal(distance_similarity, 1.0)  # Adjust distance similarity for self-comparisons
    cuisine_similarity = cosine_similarity(cuisine_metric, cuisine_metric)
    
    # Apply user preferences to cuisine similarity
    if user_cuisines:
        user_cuisines_str = ', '.join(user_cuisines)
        user_cuisine_vector = tfidf.transform([user_cuisines_str])
        user_cuisine_similarity = cosine_similarity(cuisine_metric, user_cuisine_vector).flatten()
        
        # Boost cuisine similarity for matching restaurants
        cuisine_similarity *= user_cuisine_similarity[:, np.newaxis]
    
    # Calculate weighted similarity
    weighted_similarity = (cuisines_weight * cuisine_similarity +
                           ratings_weight * rating_similarity +
                           distance_weight * distance_similarity)
    
    # Create similarity DataFrame
    similarity_df = pd.DataFrame(weighted_similarity, columns=df_city['restaurant_name'], index=df_city['restaurant_name'])
    
    # Aggregate similarity scores for all restaurants
    aggregated_scores = similarity_df.sum(axis=0)
    
    # Get the top N recommendations based on aggregated scores
    top_recommendations_overall = aggregated_scores.nlargest(num_recommendations).index.tolist()
    
     # Return a DataFrame with the top recommendations
    top_recommendations_df = df_city[df_city['restaurant_name'].isin(top_recommendations_overall)]

    return top_recommendations_df[['restaurant_name','ratings_origin','address','cuisines','distance_origin']]

    
    #return top_recommendations_overall


# r(city='mehsana', user_cuisines=['chinese'])


In [43]:
r2(city='mehsana',user_cuisines=['pizzas'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_city['ratings_origin']=df_city['ratings']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_city['distance_origin']=df_city['distance']


Unnamed: 0,restaurant_name,ratings_origin,address,cuisines,distance_origin
22,7 heart pizzeria,4.2,mehsana,"pizzas, fastfood",22.0
1467,hungritos,3.7,mehsana,"snacks, fastfood, beverages, pizzas",32.0
1562,jay ambe fast food,4.3,mehsana,"fastfood, pizzas, beverages",22.0
1949,london wraps,4.2,radhanpur road,"fastfood, pizzas",28.0
2232,moment mocha,4.1,mehsana,"beverages, fastfood, pizzas, snacks",18.0


In [44]:
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler
import numpy as np

def recommend_knn(city, user_cuisines, num_neighbors=5, cuisines_weight=7, ratings_weight=2, distance_weight=1):
    # Filter the DataFrame based on city
    df_city = dfc[dfc['city'] == city]
    
    
    df_city['ratings_origin']=df_city['ratings']
    df_city['distance_origin']=df_city['distance']
    
    # Scale the features (ratings and distance)
    scaler = MinMaxScaler()
    df_city[['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])
    
    # Vectorize cuisines using TF-IDF
    tfidf = TfidfVectorizer()
    cuisine_vectors = tfidf.fit_transform(df_city['cuisines']).toarray()
    
    # Apply weights to each feature
    cuisine_vectors_weighted = cuisine_vectors * cuisines_weight
    ratings_weighted = df_city[['ratings']].values * ratings_weight
    distance_weighted = df_city[['distance']].values * distance_weight
    
    # Combine features (cuisines, ratings, distance) for KNN
    features = np.hstack([cuisine_vectors_weighted, ratings_weighted, distance_weighted])
    
    # Fit the KNN model
    knn = NearestNeighbors(n_neighbors=num_neighbors, metric='cosine')
    knn.fit(features)
    
    # Transform user input cuisines into the same TF-IDF space
    user_cuisine_vector = tfidf.transform([', '.join(user_cuisines)]).toarray()
    
    # Apply weights to the user vector for cuisines and use placeholder for ratings and distance
    user_cuisine_vector_weighted = user_cuisine_vector * cuisines_weight
    user_vector = np.hstack([user_cuisine_vector_weighted, np.array([[0.5 * ratings_weight, 0.5 * distance_weight]])])  # Placeholder for ratings and distance
    
    # Find the nearest neighbors (recommended restaurants)
    distances, indices = knn.kneighbors(user_vector)
    
    # Get the top recommended restaurants
    recommended_restaurants = df_city.iloc[indices[0]].copy()

    return recommended_restaurants[['restaurant_name', 'ratings_origin', 'address', 'cuisines', 'distance_origin']]


In [45]:
recommend_knn(city='mehsana',user_cuisines=['chinese'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_city['ratings_origin']=df_city['ratings']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_city['distance_origin']=df_city['distance']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_city[['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])


Unnamed: 0,restaurant_name,ratings_origin,address,cuisines,distance_origin
1841,krishna restaurant,4.1,suramya park society,"chinese, beverages",28.0
1582,jay bhavani vadapav,4.2,mehsana,"fastfood, chinese",22.0
1566,jay ambe restaurant,3.9,mehsana,"chinese, biryani",28.0
2376,neelkanth restaurant,4.5,kunal,"chinese, biryani, beverages",22.0
2364,navjivan restaurant,4.4,radhakrishna township,"chinese, biryani, beverages",18.0


In [46]:
dfc['cuisines'].unique()

array(['arabian, continental, american, turkish', 'snacks, chinese',
       'northindian, pizzas', ...,
       'beverages, pizzas, momos, sandwich, snacks, fastfood, rollswraps, grill, pastas, mexican',
       'beverages, pizzas, fastfood, burgers',
       'burgers, pastas, snacks, beverages'], dtype=object)

In [47]:
# Assuming cuisines are already in a list format per row
unique_cuisines = dfc['cuisines'].unique()

# Print unique cuisines
print("Unique Cuisines:")
for cuisine in unique_cuisines:
    print(cuisine)


Unique Cuisines:
arabian, continental, american, turkish
snacks, chinese
northindian, pizzas
indian, chinese, continental, desserts, beverages
biryani, continental, oriental, indian
fastfood
healthyfood, snacks, northindian, salads, sandwich
mughlai, chinese, biryani, fastfood, desserts
northindian, biryani, snacks
southindian
northindian, chaat, snacks, beverages
pizzas, italianamerican, pastas, fastfood, cafe
indian, northindian
salads, mexican
indian
pizzas
icecream, pizzas, fastfood, burgers, snacks, beverages
chinese, biryani, beverages, continental
beverages, desserts
indian, italian, continental, northindian, chinese, beverages, snacks, fastfood
pizzas, fastfood
cafe, beverages, fastfood, pizzas, snacks
bakery, waffle, icecreamcakes
indian, burgers, jain
pizzas, fastfood, burgers, cafe
cafe, juices, beverages, desserts, fastfood, burgers, snacks
juices, beverages, desserts, healthyfood
bakery, desserts
burgers, fastfood, snacks
biryani
biryani, tandoor
chinese
indian, tandoor, s

In [48]:
def rp(city, user_cuisines=None, num_recommendations=5, cuisines_weight=0.5, ratings_weight=0.3, distance_weight=0.2):
    df_city = dfc[dfc['city'] == city]
    
    # Scale ratings and distance
    scaler = MinMaxScaler()
    df_city.loc[:, ['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])
    
    # Compute TF-IDF for cuisines
    tfidf = TfidfVectorizer()
    cuisine_metric = tfidf.fit_transform(df_city['cuisines'])
    
    # Check if user-provided cuisines exist in the dataset
    message = ""
    if user_cuisines:
        available_cuisines = df_city['cuisines'].str.lower().unique()
        user_cuisines_lower = [c.lower() for c in user_cuisines]
        
        # If none of the user-provided cuisines exist in the dataset
        if not any(cuisine in ', '.join(available_cuisines) for cuisine in user_cuisines_lower):
            return f"No restaurants offering the requested cuisines: {', '.join(user_cuisines)}."
        
        # If the cuisines are present but less popular, display the message
        popular_cuisines_threshold = 3  # Set a threshold for considering cuisines "less popular"
        matched_cuisines = [c for c in user_cuisines_lower if any(c in cuisine for cuisine in available_cuisines)]
        
        if len(matched_cuisines) < popular_cuisines_threshold:
            message = "Dear user, your requested cuisine(s) are less popular, so the recommendations are based on overall higher ratings, higher cuisines similarity score , or distance."
    
    # Compute similarity matrices
    rating_similarity = cosine_similarity(df_city[['ratings']], df_city[['ratings']])
    distance_similarity = cosine_similarity(df_city[['distance']], df_city[['distance']])
    np.fill_diagonal(distance_similarity, 1.0)  # Adjust distance similarity for self-comparisons
    cuisine_similarity = cosine_similarity(cuisine_metric, cuisine_metric)
    
    # Apply user preferences to cuisine similarity
    if user_cuisines:
        user_cuisines_str = ', '.join(user_cuisines)
        user_cuisine_vector = tfidf.transform([user_cuisines_str])
        user_cuisine_similarity = cosine_similarity(cuisine_metric, user_cuisine_vector).flatten()
        
        # Boost cuisine similarity for matching restaurants
        cuisine_similarity *= user_cuisine_similarity[:, np.newaxis]
    
    # Calculate weighted similarity
    weighted_similarity = (cuisines_weight * cuisine_similarity +
                           ratings_weight * rating_similarity +
                           distance_weight * distance_similarity)
    
    # Create similarity DataFrame
    similarity_df = pd.DataFrame(weighted_similarity, columns=df_city['restaurant_name'], index=df_city['restaurant_name'])
    
    # Aggregate similarity scores for all restaurants
    aggregated_scores = similarity_df.sum(axis=0)
    
    # Get the top N recommendations based on aggregated scores
    top_recommendations_overall = aggregated_scores.nlargest(num_recommendations).index.tolist()
    
    # Return the recommendations along with the message if cuisines were less popular
    if message:
        print(message)
    
    return top_recommendations_overall





In [49]:
# Example usage:
rp(city='mehsana', user_cuisines=['chinese'])



Dear user, your requested cuisine(s) are less popular, so the recommendations are based on overall higher ratings, higher cuisines similarity score , or distance.


['krishna restaurant',
 'hotel highlaned',
 'jay bhavani vadapav',
 'neelkanth restaurant',
 'hotel tulsi']

In [50]:
def rq(city, user_cuisines=None, num_recommendations=5, cuisines_weight=0.5, ratings_weight=0.3, distance_weight=0.2):
    df_city = dfc[dfc['city'] == city]
    
    # Scale ratings and distance
    scaler = MinMaxScaler()
    df_city.loc[:, ['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])
    
    # Compute TF-IDF for cuisines
    tfidf = TfidfVectorizer()
    cuisine_metric = tfidf.fit_transform(df_city['cuisines'])
    
    # Define the popularity threshold
    popular_cuisines_threshold = 2

    message = None  # To store message if needed
    
    # If user_cuisines are provided, ensure they exist in the dataset
    if user_cuisines:
        available_cuisines = df_city['cuisines'].str.lower().unique()
        user_cuisines_lower = [c.lower() for c in user_cuisines]
        
        # Count the number of matching cuisines
        matching_cuisines_count = sum(cuisine in ', '.join(available_cuisines) for cuisine in user_cuisines_lower)
        
        if matching_cuisines_count == 0:
            return f"No restaurants offering the requested cuisines: {', '.join(user_cuisines)}."
        elif matching_cuisines_count < popular_cuisines_threshold:
            message = ("Dear user, your requested cuisine(s) are less popular, so the recommendations are "
                       "based on overall higher ratings, similar cuisines, or distance.")
    
    # Compute similarity matrices
    rating_similarity = cosine_similarity(df_city[['ratings']], df_city[['ratings']])
    distance_similarity = cosine_similarity(df_city[['distance']], df_city[['distance']])
    np.fill_diagonal(distance_similarity, 1.0)  # Adjust distance similarity for self-comparisons
    cuisine_similarity = cosine_similarity(cuisine_metric, cuisine_metric)
    
    # Apply user preferences to cuisine similarity
    if user_cuisines:
        user_cuisines_str = ', '.join(user_cuisines)
        user_cuisine_vector = tfidf.transform([user_cuisines_str])
        user_cuisine_similarity = cosine_similarity(cuisine_metric, user_cuisine_vector).flatten()
        
        # Boost cuisine similarity for matching restaurants
        cuisine_similarity *= user_cuisine_similarity[:, np.newaxis]
    
    # Calculate weighted similarity
    weighted_similarity = (cuisines_weight * cuisine_similarity +
                           ratings_weight * rating_similarity +
                           distance_weight * distance_similarity)
    
    # Create similarity DataFrame
    similarity_df = pd.DataFrame(weighted_similarity, columns=df_city['restaurant_name'], index=df_city['restaurant_name'])
    
    # Aggregate similarity scores for all restaurants
    aggregated_scores = similarity_df.sum(axis=0)
    
    # Get the top N recommendations based on aggregated scores
    top_recommendations_overall = aggregated_scores.nlargest(num_recommendations).index.tolist()
    
    # Return the message along with the recommendations
    if message:
        return message, top_recommendations_overall
    else:
        return top_recommendations_overall


In [51]:
dd=dfc['cuisines'].value_counts()

In [52]:
dd[dd<3].sort_values(ascending=False).head(60)

cuisines
indian, chinese, northindian                                                                                           2
burgers, american, fastfood, snacks                                                                                    2
beverages, pizzas, fastfood, burgers, cafe, snacks, momos, mexican, waffle, desserts                                   2
american, cafe                                                                                                         2
fastfood, snacks, northindian, chinese                                                                                 2
burgers, coffee, fastfood, snacks, beverages                                                                           2
sandwich, snacks, burgers, pizzas, beverages                                                                           2
waffle, beverages, icecream, desserts                                                                                  2
bengali, biryani, tando

In [53]:
df[df['restaurant_name']=='lapinoz']

Unnamed: 0,restaurant_name,ratings,cuisines,address,distance,city,pricing_impute


In [54]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import pandas as pd
from scipy.stats import spearmanr

def k(city, user_cuisines=None, num_recommendations=5, cuisines_weight=0.5, ratings_weight=0.3, distance_weight=0.2, expert_ranking=None):
    df_city = dfc[dfc['city'] == city]
    
    # Scale ratings and distance
    scaler = MinMaxScaler()
    df_city.loc[:, ['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])
    
    # Compute TF-IDF for cuisines
    tfidf = TfidfVectorizer()
    cuisine_metric = tfidf.fit_transform(df_city['cuisines'])
    
    # If user_cuisines are provided, ensure they exist in the dataset
    if user_cuisines:
        available_cuisines = df_city['cuisines'].str.lower().unique()
        user_cuisines_lower = [c.lower() for c in user_cuisines]
        
        # Check if any of the requested cuisines exist in the dataset
        if not any(cuisine in ', '.join(available_cuisines) for cuisine in user_cuisines_lower):
            return f"No restaurants offering the requested cuisines: {', '.join(user_cuisines)}."
        
    # Compute similarity matrices
    rating_similarity = cosine_similarity(df_city[['ratings']], df_city[['ratings']])
    distance_similarity = cosine_similarity(df_city[['distance']], df_city[['distance']])
    np.fill_diagonal(distance_similarity, 1.0)  # Adjust distance similarity for self-comparisons
    cuisine_similarity = cosine_similarity(cuisine_metric, cuisine_metric)
    
    # Apply user preferences to cuisine similarity
    if user_cuisines:
        user_cuisines_str = ', '.join(user_cuisines)
        user_cuisine_vector = tfidf.transform([user_cuisines_str])
        user_cuisine_similarity = cosine_similarity(cuisine_metric, user_cuisine_vector).flatten()
        
        # Boost cuisine similarity for matching restaurants
        cuisine_similarity *= user_cuisine_similarity[:, np.newaxis]
    
    # Calculate weighted similarity
    weighted_similarity = (cuisines_weight * cuisine_similarity +
                           ratings_weight * rating_similarity +
                           distance_weight * distance_similarity)
    
    # Create similarity DataFrame
    similarity_df = pd.DataFrame(weighted_similarity, columns=df_city['restaurant_name'], index=df_city['restaurant_name'])
    
    # Aggregate similarity scores for all restaurants
    aggregated_scores = similarity_df.sum(axis=0)
    
    # Get the top N recommendations based on aggregated scores
    top_recommendations_overall = aggregated_scores.nlargest(num_recommendations).index.tolist()
    
    # If an expert ranking is provided, calculate Spearman's Rank Correlation
    if expert_ranking is not None:
        # Ensure the expert ranking and system ranking have the same length
        expert_ranking = [r for r in expert_ranking if r in top_recommendations_overall]  # Filter expert_ranking to contain only restaurants in the system recommendations
        
        # Get the ranks for both lists
        system_ranking = top_recommendations_overall[:len(expert_ranking)]
        
        # Convert the rankings into a numerical format
        system_ranks = [system_ranking.index(r) for r in system_ranking]
        expert_ranks = [expert_ranking.index(r) for r in expert_ranking]
        
        # Calculate Spearman's correlation coefficient
        spearman_corr, _ = spearmanr(system_ranks, expert_ranks)
        
        return top_recommendations_overall, spearman_corr
    
    return top_recommendations_overall


In [55]:
# Example expert-provided ranking for the city 'mehsana' with Chinese cuisine preference
expert_ranking = ['krishna restaurant', 'jay ambe fastfood','jay bhavani vadapav','neelkanth restaurant','hotel tulsi']



# Call the function with the expert ranking to calculate Spearman’s correlation
recommendations, spearman_corr = k(city='mehsana', user_cuisines=['chinese'], expert_ranking=expert_ranking)

print(f"Top Recommendations: {recommendations}")
print(f"Spearman Correlation with Expert Ranking: {spearman_corr}")

Top Recommendations: ['krishna restaurant', 'hotel highlaned', 'jay bhavani vadapav', 'neelkanth restaurant', 'hotel tulsi']
Spearman Correlation with Expert Ranking: 1.0


In [56]:
dfc1=dfc.copy()

In [57]:
dfc1.shape

(4091, 7)

In [58]:
new=pd.DataFrame([{'restaurant_name':"gordhan thal",
    'ratings':4.4,
    'cuisines':'gujarati',
    'address':'sarkhej road',
    'distance':28.0,
    'city':'ahmedabad',
    'pricing_impute':200}])



In [59]:
dfc1=pd.concat([dfc1,new],ignore_index=True)

In [60]:
dfc1[(dfc1['city']=='ahmedabad') & (dfc1['restaurant_name']=="gordhan thal")]

Unnamed: 0,restaurant_name,ratings,address,distance,city,cuisines,pricing_impute
4091,gordhan thal,4.4,sarkhej road,28.0,ahmedabad,gujarati,200.0


In [61]:
def rl(city, user_cuisines=None,num_recommendations=5,cuisines_weight=0.5,ratings_weight=0.3,distance_weight=0.2):
    df_city = dfc1[dfc1['city'] == city]
    
    # Scale ratings and distance
    scaler = MinMaxScaler()
    df_city.loc[:,['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])
    
    # Compute TF-IDF for cuisines
    tfidf = TfidfVectorizer()
    cuisine_metric = tfidf.fit_transform(df_city['cuisines'])
    
     # If user_cuisines are provided, ensure they exist in the dataset
    if user_cuisines:
        available_cuisines = df_city['cuisines'].str.lower().unique()
        user_cuisines_lower = [c.lower() for c in user_cuisines]
        
        # Check if any of the requested cuisines exist in the dataset
        if not any(cuisine in ', '.join(available_cuisines) for cuisine in user_cuisines_lower):
            return f"No restaurants offering the requested cuisines: {', '.join(user_cuisines)}."
        
    
    # Compute similarity matrices
    rating_similarity = cosine_similarity(df_city[['ratings']], df_city[['ratings']])
    distance_similarity = cosine_similarity(df_city[['distance']], df_city[['distance']])
    np.fill_diagonal(distance_similarity, 1.0)  # Adjust distance similarity for self-comparisons
    cuisine_similarity = cosine_similarity(cuisine_metric, cuisine_metric)
    
    # Apply user preferences to cuisine similarity
    if user_cuisines:
        user_cuisines_str = ', '.join(user_cuisines)
        user_cuisine_vector = tfidf.transform([user_cuisines_str])
        user_cuisine_similarity = cosine_similarity(cuisine_metric, user_cuisine_vector).flatten()
        
        # Boost cuisine similarity for matching restaurants
        cuisine_similarity *= user_cuisine_similarity[:, np.newaxis]
    
    # Calculate weighted similarity
    weighted_similarity = (cuisines_weight * cuisine_similarity +
                           ratings_weight * rating_similarity +
                           distance_weight * distance_similarity)
    
    # Create similarity DataFrame
    similarity_df = pd.DataFrame(weighted_similarity, columns=df_city['restaurant_name'], index=df_city['restaurant_name'])
    
    # Aggregate similarity scores for all restaurants
    aggregated_scores = similarity_df.sum(axis=0)
    
    # Get the top N recommendations based on aggregated scores
    top_recommendations_overall = aggregated_scores.nlargest(num_recommendations).index.tolist()
    
    
    return top_recommendations_overall


# r(city='mehsana', user_cuisines=['chinese'])


In [62]:
rl(city='ahmedabad',user_cuisines=['gujarati'])

['annkut',
 'hari om centre',
 'maharaja restaurant',
 'shiv shakti fast food',
 'gordhan thal']

In [63]:
dfc1[(dfc1['city']=='ahmedabad') & (dfc1['restaurant_name']=="annkut")]

Unnamed: 0,restaurant_name,ratings,address,distance,city,cuisines,pricing_impute
180,annkut,4.5,prahlad nagar,38.0,ahmedabad,gujarati,209.0


In [64]:
dfc1[(dfc1['city']=='mehsana') & (dfc1['restaurant_name']=="hungritos")]

Unnamed: 0,restaurant_name,ratings,address,distance,city,cuisines,pricing_impute
1467,hungritos,3.7,mehsana,32.0,mehsana,"snacks, fastfood, beverages, pizzas",250.0


In [65]:
def w(city, user_cuisines=None, num_recommendations=5, cuisines_weight=0.5, ratings_weight=0.3, distance_weight=0.2):
    df_city = dfc[dfc['city'] == city]
    
    # Scale ratings and distance
    scaler = MinMaxScaler()
    df_city.loc[:,['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])
    
    # Compute TF-IDF for cuisines
    tfidf = TfidfVectorizer()
    cuisine_metric = tfidf.fit_transform(df_city['cuisines'])
    
     # If user_cuisines are provided, ensure they exist in the dataset
    if user_cuisines:
        available_cuisines = df_city['cuisines'].str.lower().unique()
        user_cuisines_lower = [c.lower() for c in user_cuisines]
        
        # Check if any of the requested cuisines exist in the dataset
        if not any(cuisine in ', '.join(available_cuisines) for cuisine in user_cuisines_lower):
            return f"No restaurants offering the requested cuisines: {', '.join(user_cuisines)}."
        
    
    # Compute similarity matrices
    rating_similarity = cosine_similarity(df_city[['ratings']], df_city[['ratings']])
    distance_similarity = cosine_similarity(df_city[['distance']], df_city[['distance']])
    np.fill_diagonal(distance_similarity, 1.0)  # Adjust distance similarity for self-comparisons
    cuisine_similarity = cosine_similarity(cuisine_metric, cuisine_metric)
    
    # Apply user preferences to cuisine similarity
    if user_cuisines:
        user_cuisines_str = ', '.join(user_cuisines)
        user_cuisine_vector = tfidf.transform([user_cuisines_str])
        user_cuisine_similarity = cosine_similarity(cuisine_metric, user_cuisine_vector).flatten()
        
        # Boost cuisine similarity for matching restaurants
        cuisine_similarity *= user_cuisine_similarity[:, np.newaxis]
    
    # Calculate weighted similarity
    weighted_similarity = (cuisines_weight * cuisine_similarity +
                           ratings_weight * rating_similarity +
                           distance_weight * distance_similarity)
    
    # Create similarity DataFrame
    similarity_df = pd.DataFrame(weighted_similarity, columns=df_city['restaurant_name'], index=df_city['restaurant_name'])
    
    # Aggregate similarity scores for all restaurants
    aggregated_scores = similarity_df.sum(axis=0)
    
    # Get the top N recommendations based on aggregated scores
    top_recommendations_overall = aggregated_scores.nlargest(num_recommendations).index.tolist()
    
    
    return top_recommendations_overall


# r(city='mehsana', user_cuisines=['chinese'])


In [66]:
w(city='mehsana', user_cuisines=['snacks'])

['hungritos',
 'murli food home-radhanpur road',
 'hotel highlaned',
 'shakti - the sandwich shop',
 'sejan chinese & punjabi']

In [67]:
dfc1[(dfc1['city']=='mehsana') & (dfc1['restaurant_name']=="zorko brand of food lovers")]

Unnamed: 0,restaurant_name,ratings,address,distance,city,cuisines,pricing_impute
4087,zorko brand of food lovers,5.0,mehsana,28.0,mehsana,"beverages, pizzas, fastfood, burgers",40.0


In [68]:
dfc1['cuisines']

0                 arabian, continental, american, turkish
1                 arabian, continental, american, turkish
2                                         snacks, chinese
3                                     northindian, pizzas
4       indian, chinese, continental, desserts, beverages
                              ...                        
4087                 beverages, pizzas, fastfood, burgers
4088                                               pastas
4089                   burgers, pastas, snacks, beverages
4090                                      thalis, biryani
4091                                             gujarati
Name: cuisines, Length: 4092, dtype: object

In [69]:
dfc

Unnamed: 0,restaurant_name,ratings,address,distance,city,cuisines,pricing_impute
0,#shawarma,4.4,navrangpura,32.0,ahmedabad,"arabian, continental, american, turkish",100.0
1,#shawarma,4.4,navrangpura,102.0,gandhinagar,"arabian, continental, american, turkish",129.0
2,159 diner,3.3,gandhinagar,28.0,gandhinagar,"snacks, chinese",80.0
3,1944 - the hocco kitchen,4.5,sargasan,28.0,gandhinagar,"northindian, pizzas",150.0
4,1944 - the hocco kitchen,4.5,sayajiganj,28.0,vadodara,"indian, chinese, continental, desserts, beverages",75.0
...,...,...,...,...,...,...,...
4086,zorko brand of food lovers,4.1,mahadev nagar,38.0,anand,"beverages, pizzas, momos, sandwich, snacks, fa...",50.0
4087,zorko brand of food lovers,5.0,mehsana,28.0,mehsana,"beverages, pizzas, fastfood, burgers",40.0
4088,zorko-brand of food lovers,4.3,naroda,82.0,gandhinagar,pastas,99.0
4089,zoy,4.1,ghatlodia,72.4,gandhinagar,"burgers, pastas, snacks, beverages",100.0


In [70]:
dfc.shape

(4091, 7)

In [71]:
df.shape

(4092, 7)

In [72]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

# Function to provide recommendations based on city, cuisine, and a specific restaurant
def get_recommendations(df):
    # Get user input for city, cuisine, and restaurant name
    selected_city = input("Enter the city: ")
    selected_cuisine = input("Enter the cuisine: ")
    restaurant_name = input("Enter the restaurant name:")

    # Filter data for the specific city and cuisine
    filtered_df = df[(df['city'] == selected_city) & (df['cuisines'].str.contains(selected_cuisine, case=False))].reset_index(drop=True)
    
    # Check if there are enough restaurants after filtering
    if len(filtered_df) < 2:
        print(f"Not enough restaurants in {selected_city} for the cuisine '{selected_cuisine}' to provide recommendations.")
        return
    
    # Apply TF-IDF on 'cuisines' for the filtered data
    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(filtered_df['cuisines'])
    
    # Check if the restaurant exists in the filtered dataset
    if restaurant_name not in filtered_df['restaurant_name'].values:
        print(f"The restaurant '{restaurant_name}' was not found in {selected_city} for the cuisine '{selected_cuisine}'.")
        return
    
    # Find the index of the selected restaurant
    restaurant_idx = filtered_df[filtered_df['restaurant_name'] == restaurant_name].index[0]
    
    # Calculate cosine similarity between the selected restaurant and all others
    cosine_similarities = cosine_similarity(tfidf_matrix[restaurant_idx], tfidf_matrix)
    
    # Get indices of the top 3 most similar restaurants
    similar_indices = cosine_similarities[0].argsort()[-6:][::-1]  # Get top 3 most similar excluding the restaurant itself
    
    # Exclude the first index (which will be the restaurant itself)
    similar_indices = similar_indices[1:]
    
    # Get the names of the recommended restaurants
    recommended_restaurants = filtered_df.iloc[similar_indices]['restaurant_name']
    
    # Print the recommended restaurants
    print(f"\nRecommendations for '{restaurant_name}' in {selected_city}:")
    print(recommended_restaurants.values)

# Sample DataFrame (modify this with your actual data)


# Get recommendations based on user inputt
get_recommendations(df)

Enter the city: 
Enter the cuisine: 
Enter the restaurant name:
Not enough restaurants in  for the cuisine '' to provide recommendations.


In [73]:
dfc1[(dfc1['city']=='mehsana') & (dfc1['restaurant_name']=="hotel tulsi")]

Unnamed: 0,restaurant_name,ratings,address,distance,city,cuisines,pricing_impute
1447,hotel tulsi,4.3,mehsana,32.0,mehsana,"southindian, chinese, fastfood, pizzas, beverages",80.0


In [74]:
def get_recommendations(df, num_recommendations=5, cuisines_weight=0.5, ratings_weight=0.3, distance_weight=0.3):
    # Get user input for city, cuisine, and optionally restaurant name
    selected_city = input("Enter the city: ")
    selected_cuisine = input("Enter the cuisine: ")
    restaurant_name = input("Enter the restaurant name (or press Enter to skip): ")

    # Filter data for the specific city and cuisine
    df_city = df[(df['city'] == selected_city) & (df['cuisines'].str.contains(selected_cuisine, case=False))].reset_index(drop=True)
    
    # Check if there are enough restaurants after filtering
    if len(df_city) < 2:
        print(f"Not enough restaurants in {selected_city} for the cuisine '{selected_cuisine}' to provide recommendations.")
        return
    
    # Scale ratings and distance
    scaler = MinMaxScaler()
    df_city[['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])

    # Compute TF-IDF for cuisines
    tfidf = TfidfVectorizer()
    cuisine_matrix = tfidf.fit_transform(df_city['cuisines'])

    # Compute similarity matrices for cuisines, ratings, and distance
    cuisine_similarity = cosine_similarity(cuisine_matrix)
    rating_similarity = cosine_similarity(df_city[['ratings']])
    distance_similarity = cosine_similarity(df_city[['distance']])

    # Check if a specific restaurant name is provided
    if restaurant_name:
        # Ensure the restaurant exists in the filtered data
        if restaurant_name not in df_city['restaurant_name'].values:
            print(f"The restaurant '{restaurant_name}' was not found in {selected_city} for the cuisine '{selected_cuisine}'.")
            return

        # Find the index of the selected restaurant
        restaurant_idx = df_city[df_city['restaurant_name'] == restaurant_name].index[0]

        # Calculate weighted similarity using specified weights
        weighted_similarity = (
            cuisines_weight * cuisine_similarity[restaurant_idx] +
            ratings_weight * rating_similarity[restaurant_idx] +
            distance_weight * distance_similarity[restaurant_idx]
        )

        # Get indices of the top recommendations
        recommendations_idx = np.argsort(weighted_similarity)[-num_recommendations-1:][::-1]

        # Remove the restaurant itself from recommendations
        recommendations_idx = [idx for idx in recommendations_idx if idx != restaurant_idx][:num_recommendations]

        # Get the recommended restaurants
        recommended_restaurants = df_city.iloc[recommendations_idx]
        
        print(f"\nRecommendations similar to '{restaurant_name}' in {selected_city}:")
    
    else:
        # Aggregate similarity scores for all restaurants when no specific restaurant is provided
        cuisine_scores = cuisine_similarity.sum(axis=0)
        rating_scores = rating_similarity.sum(axis=0)
        distance_scores = distance_similarity.sum(axis=0)

        # Calculate weighted similarity for all restaurants
        weighted_similarity = (
            cuisines_weight * cuisine_scores +
            ratings_weight * rating_scores +
            distance_weight * distance_scores
        )

        # Get indices of the top recommendations
        recommendations_idx = np.argsort(weighted_similarity)[-num_recommendations:][::-1]

        # Get the recommended restaurants
        recommended_restaurants = df_city.iloc[recommendations_idx]
        
        print(f"\nTop {num_recommendations} recommendations in {selected_city} for cuisine '{selected_cuisine}':")
    
    # Display the recommendations
    print(recommended_restaurants[['restaurant_name', 'ratings', 'address', 'cuisines', 'distance']])

# Sample usage with a DataFrame (make sure to replace df with your actual data)
get_recommendations(df)

Enter the city: 
Enter the cuisine: 
Enter the restaurant name (or press Enter to skip): 
Not enough restaurants in  for the cuisine '' to provide recommendations.


In [75]:
def get(selected_city,selected_cuisine,restaurant_name=None,num_recommendations=5, cuisines_weight=0.5, ratings_weight=0.3, distance_weight=0.3):
    # Get user input for city, cuisine, and optionally restaurant name


    # Filter data for the specific city and cuisine
    df_city = dfc[(dfc['city'] == selected_city) & (dfc['cuisines'].str.contains(selected_cuisine, case=False))].reset_index(drop=True)
    
    # Check if there are enough restaurants after filtering
    if len(df_city) < 2:
        print(f"Not enough restaurants in {selected_city} for the cuisine '{selected_cuisine}' to provide recommendations.")
        return
    
    # Scale ratings and distance
    scaler = MinMaxScaler()
    df_city[['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])
    
    df_city['ratings_origin']=df_city['ratings']
    df_city['distance_origin']=df_city['distance']

    # Compute TF-IDF for cuisines
    tfidf = TfidfVectorizer()
    cuisine_matrix = tfidf.fit_transform(df_city['cuisines'])

    # Compute similarity matrices for cuisines, ratings, and distance
    cuisine_similarity = cosine_similarity(cuisine_matrix)
    rating_similarity = cosine_similarity(df_city[['ratings']])
    distance_similarity = cosine_similarity(df_city[['distance']])

    # Check if a specific restaurant name is provided
    if restaurant_name:
        # Ensure the restaurant exists in the filtered data
        if restaurant_name not in df_city['restaurant_name'].values:
            print(f"The restaurant '{restaurant_name}' was not found in {selected_city} for the cuisine '{selected_cuisine}'.")
            return

        # Find the index of the selected restaurant
        restaurant_idx = df_city[df_city['restaurant_name'] == restaurant_name].index[0]

        # Calculate weighted similarity using specified weights
        weighted_similarity = (
            cuisines_weight * cuisine_similarity[restaurant_idx] +
            ratings_weight * rating_similarity[restaurant_idx] +
            distance_weight * distance_similarity[restaurant_idx]
        )

        # Get indices of the top recommendations
        recommendations_idx = np.argsort(weighted_similarity)[-num_recommendations-1:][::-1]

        # Remove the restaurant itself from recommendations
        recommendations_idx = [idx for idx in recommendations_idx if idx != restaurant_idx][:num_recommendations]

        # Get the recommended restaurants
        recommended_restaurants = df_city.iloc[recommendations_idx]
        
        print(f"\nRecommendations similar to '{restaurant_name}' in {selected_city}:")
    
    else:
        # Aggregate similarity scores for all restaurants when no specific restaurant is provided
        cuisine_scores = cuisine_similarity.sum(axis=0)
        rating_scores = rating_similarity.sum(axis=0)
        distance_scores = distance_similarity.sum(axis=0)

        # Calculate weighted similarity for all restaurants
        weighted_similarity = (
            cuisines_weight * cuisine_scores +
            ratings_weight * rating_scores +
            distance_weight * distance_scores
        )

        # Get indices of the top recommendations
        recommendations_idx = np.argsort(weighted_similarity)[-num_recommendations:][::-1]

        # Get the recommended restaurants
        recommended_restaurants = df_city.iloc[recommendations_idx]
        
        print(f"\nTop {num_recommendations} recommendations in {selected_city} for cuisine '{selected_cuisine}':")
    
    # Display the recommendations
    print(recommended_restaurants[['restaurant_name', 'ratings_origin', 'address', 'cuisines', 'distance_origin']])

# Sample usage with a DataFrame (make sure to replace df with your actual data)

In [76]:
get(selected_city='mehsana',selected_cuisine='pizzas')


Top 5 recommendations in mehsana for cuisine 'pizzas':
               restaurant_name  ratings_origin  address  \
29  zorko brand of food lovers        1.000000  mehsana   
11          jay ambe fast food        0.461538  mehsana   
18                  patel cafe        0.538462  mehsana   
8              hotel highlaned        0.538462  mehsana   
3             bhukhkhad's cafe        0.307692  mehsana   

                                             cuisines  distance_origin  
29               beverages, pizzas, fastfood, burgers           0.5000  
11                        fastfood, pizzas, beverages           0.2000  
18  chinese, desserts, beverages, icecream, snacks...           0.3000  
8   chinese, punjabi, beverages, snacks, pizzas, f...           0.5400  
3   desserts, beverages, salads, snacks, pizzas, p...           0.1335  


In [77]:
def rp_or_get_recommendations(city, user_cuisines='fastfood', num_recommendations=5, cuisines_weight=0.5, ratings_weight=0.3, distance_weight=0.2, restaurant_name=None):
    """
    Recommends restaurants based on either:
    - City and cuisine preferences (first logic), or
    - A specific restaurant's name (second logic).
    """
    
    # If restaurant_name is provided, use the second logic for recommendations
    if restaurant_name:
        # Filter data for the specific city and cuisine
        filtered_df = dfc[(dfc['city'] == city) & (dfc['cuisines'].str.contains(user_cuisines, case=False))].reset_index(drop=True)
        
        # Check if there are enough restaurants after filtering
        if len(filtered_df) < 2:
            print(f"Not enough restaurants in {city} for the cuisine '{user_cuisines}' to provide recommendations.")
            return
        
        # Apply TF-IDF on 'cuisines' for the filtered data
        tfidf = TfidfVectorizer()
        tfidf_matrix = tfidf.fit_transform(filtered_df['cuisines'])
        
        # Check if the restaurant exists in the filtered dataset
        if restaurant_name not in filtered_df['restaurant_name'].values:
            print(f"The restaurant '{restaurant_name}' was not found in {city} for the cuisine '{user_cuisines}'.")
            return
        
        # Find the index of the selected restaurant
        restaurant_idx = filtered_df[filtered_df['restaurant_name'] == restaurant_name].index[0]
        
        # Calculate cosine similarity between the selected restaurant and all others
        cosine_similarities = cosine_similarity(tfidf_matrix[restaurant_idx], tfidf_matrix)
        
        # Get indices of the top 5 most similar restaurants
        similar_indices = cosine_similarities[0].argsort()[-(num_recommendations + 1):][::-1]  # Get top 5 most similar excluding the restaurant itself
        
        # Exclude the first index (which will be the restaurant itself)
        similar_indices = similar_indices[1:]
        
        # Get the names of the recommended restaurants
        recommended_restaurants = filtered_df.iloc[similar_indices]['restaurant_name']
        
        # Print the recommended restaurants
        print(f"\nRecommendations for '{restaurant_name}' in {city}:")
        print(recommended_restaurants.values)
        return

    # If no restaurant_name is provided, use the first logic for recommendations
    else:
        df_city = dfc[dfc['city'] == city]
        df_city['ratings_origin'] = df_city['ratings']
        

        # Scale ratings and distance 
        scaler = MinMaxScaler()
        df_city.loc[:, ['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])

        # Compute TF-IDF for cuisines
        tfidf = TfidfVectorizer()
        cuisine_metric = tfidf.fit_transform(df_city['cuisines'])

        message = ""
        if user_cuisines:
            available_cuisines = df_city['cuisines'].str.lower().unique()
            user_cuisines_lower = [c.lower() for c in user_cuisines]

            # Check if user-provided cuisines exist in the dataset
            matched_cuisines = [c for c in user_cuisines_lower if any(c in cuisine for cuisine in available_cuisines)]

            if not matched_cuisines:  # No matching cuisines found
                return pd.DataFrame(columns=['restaurant_name', 'ratings_origin', 'address', 'cuisines',
                                             'distance_origin']), "No restaurants offering the requested cuisines: {}".format(', '.join(user_cuisines))

            # If the cuisines are present but less popular
            if len(matched_cuisines) < 2:
                message = "Dear user, your requested cuisine(s) are less popular, so the recommendations are based on overall higher ratings, higher cuisines similarity score, or distance."

        # Compute similarity matrices
        rating_similarity = cosine_similarity(df_city[['ratings']], df_city[['ratings']])
        distance_similarity = cosine_similarity(df_city[['distance']], df_city[['distance']])
        np.fill_diagonal(distance_similarity, 1.0)  # Adjust distance similarity for self-comparisons
        cuisine_similarity = cosine_similarity(cuisine_metric, cuisine_metric)

        # Apply user preferences to cuisine similarity
        if user_cuisines:
            user_cuisines_str = ', '.join(user_cuisines)
            user_cuisine_vector = tfidf.transform([user_cuisines_str])
            user_cuisine_similarity = cosine_similarity(cuisine_metric, user_cuisine_vector).flatten()

            # Boost cuisine similarity for matching restaurants
            cuisine_similarity *= user_cuisine_similarity[:, np.newaxis]

        # Calculate weighted similarity
        weighted_similarity = (cuisines_weight * cuisine_similarity +
                               ratings_weight * rating_similarity +
                               distance_weight * distance_similarity)

        # Create similarity DataFrame
        similarity_df = pd.DataFrame(weighted_similarity, columns=df_city['restaurant_name'],
                                     index=df_city['restaurant_name'])

        # Aggregate similarity scores for all restaurants
        aggregated_scores = similarity_df.sum(axis=0)

        # Get the top N recommendations based on aggregated scores
        top_recommendations = aggregated_scores.nlargest(num_recommendations).index.tolist()

        # Create a DataFrame for the top recommendations with specific columns
        top_recommendations_df = df_city[df_city['restaurant_name'].isin(top_recommendations)].copy()

        # Select only the specified columns
        top_recommendations_df = top_recommendations_df[
            ['restaurant_name', 'ratings_origin', 'address', 'cuisines', 'pricing_impute']]

        # Add a message column to the recommendations DataFrame
        top_recommendations_df['message'] = message

        return top_recommendations_df, message


In [78]:
rp_or_get_recommendations(city='mehsana',user_cuisines='pizzas')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_city['ratings_origin'] = df_city['ratings']


(          restaurant_name  ratings_origin  address  \
 22       7 heart pizzeria             4.2  mehsana   
 93             al - shahi             4.0  mehsana   
 108  all by biryani chefs             3.2  mehsana   
 148      amrutsari kulcha             4.4  mehsana   
 227   ashapura restaurant             4.4  mehsana   
 
                          cuisines  pricing_impute message  
 22               pizzas, fastfood            80.0          
 93               mughlai, biryani           100.0          
 108   biryani, mughlai, beverages            50.0          
 148            punjabi, beverages            89.0          
 227  pizzas, northindian, chinese            40.0          ,
 '')

In [79]:
def rp_or_get_recommendations2(city, user_cuisines='fastfood', num_recommendations=5, cuisines_weight=0.5, ratings_weight=0.3, distance_weight=0.2, restaurant_name=None):
    """
    Recommends restaurants based on either:
    - City and cuisine preferences (first logic), or
    - A specific restaurant's name (second logic).
    """
    
    # Ensure user_cuisines is always a list
    if isinstance(user_cuisines, str):
        user_cuisines = [user_cuisines]
    
    # If restaurant_name is provided, use the second logic for recommendations
    if restaurant_name:
        # Filter data for the specific city and cuisine
        filtered_df = dfc[(dfc['city'] == city) & (dfc['cuisines'].str.contains('|'.join(user_cuisines), case=False))].reset_index(drop=True)
        
        # Check if there are enough restaurants after filtering
        if len(filtered_df) < 2:
            print(f"Not enough restaurants in {city} for the cuisine(s) '{', '.join(user_cuisines)}' to provide recommendations.")
            return
        
        # Apply TF-IDF on 'cuisines' for the filtered data
        tfidf = TfidfVectorizer()
        tfidf_matrix = tfidf.fit_transform(filtered_df['cuisines'])
        
        # Check if the restaurant exists in the filtered dataset
        if restaurant_name not in filtered_df['restaurant_name'].values:
            print(f"The restaurant '{restaurant_name}' was not found in {city} for the cuisine(s) '{', '.join(user_cuisines)}'.")
            return
        
        # Find the index of the selected restaurant
        restaurant_idx = filtered_df[filtered_df['restaurant_name'] == restaurant_name].index[0]
        
        # Calculate cosine similarity between the selected restaurant and all others
        cosine_similarities = cosine_similarity(tfidf_matrix[restaurant_idx], tfidf_matrix)
        
        # Get indices of the top 5 most similar restaurants
        similar_indices = cosine_similarities[0].argsort()[-(num_recommendations + 1):][::-1]  # Get top 5 most similar excluding the restaurant itself
        
        # Exclude the first index (which will be the restaurant itself)
        similar_indices = similar_indices[1:]
        
        # Get the names of the recommended restaurants
        recommended_restaurants = filtered_df.iloc[similar_indices]['restaurant_name']
        
        # Print the recommended restaurants
        print(f"\nRecommendations for '{restaurant_name}' in {city}:")
        print(recommended_restaurants.values)
        return

    # If no restaurant_name is provided, use the first logic for recommendations
    else:
        df_city = dfc[dfc['city'] == city]
        df_city['ratings_origin'] = df_city['ratings']
        
        # Scale ratings and distance 
        scaler = MinMaxScaler()
        df_city.loc[:, ['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])

        # Compute TF-IDF for cuisines
        tfidf = TfidfVectorizer()
        cuisine_metric = tfidf.fit_transform(df_city['cuisines'])

        message = ""
        if user_cuisines:
            available_cuisines = df_city['cuisines'].str.lower().unique()
            user_cuisines_lower = [c.lower() for c in user_cuisines]

            # Check if user-provided cuisines exist in the dataset
            matched_cuisines = [c for c in user_cuisines_lower if any(c in cuisine for cuisine in available_cuisines)]

            if not matched_cuisines:  # No matching cuisines found
                return pd.DataFrame(columns=['restaurant_name', 'ratings_origin', 'address', 'cuisines', 'distance_origin']), "No restaurants offering the requested cuisines: {}".format(', '.join(user_cuisines))

            # If the cuisines are present but less popular
            if len(matched_cuisines) < 2:
                message = "Dear user, your requested cuisine(s) are less popular, so the recommendations are based on overall higher ratings, higher cuisines similarity score, or distance."

        # Compute similarity matrices
        rating_similarity = cosine_similarity(df_city[['ratings']], df_city[['ratings']])
        distance_similarity = cosine_similarity(df_city[['distance']], df_city[['distance']])
        np.fill_diagonal(distance_similarity, 1.0)  # Adjust distance similarity for self-comparisons
        cuisine_similarity = cosine_similarity(cuisine_metric, cuisine_metric)

        # Apply user preferences to cuisine similarity
        if user_cuisines:
            user_cuisines_str = ', '.join(user_cuisines)
            user_cuisine_vector = tfidf.transform([user_cuisines_str])
            user_cuisine_similarity = cosine_similarity(cuisine_metric, user_cuisine_vector).flatten()

            # Boost cuisine similarity for matching restaurants
            cuisine_similarity *= user_cuisine_similarity[:, np.newaxis]

        # Calculate weighted similarity
        weighted_similarity = (cuisines_weight * cuisine_similarity +
                               ratings_weight * rating_similarity +
                               distance_weight * distance_similarity)

        # Create similarity DataFrame
        similarity_df = pd.DataFrame(weighted_similarity, columns=df_city['restaurant_name'],
                                     index=df_city['restaurant_name'])

        # Aggregate similarity scores for all restaurants
        aggregated_scores = similarity_df.sum(axis=0)

        # Get the top N recommendations based on aggregated scores
        top_recommendations = aggregated_scores.nlargest(num_recommendations).index.tolist()

        # Create a DataFrame for the top recommendations with specific columns
        top_recommendations_df = df_city[df_city['restaurant_name'].isin(top_recommendations)].copy()

        # Select only the specified columns
        top_recommendations_df = top_recommendations_df[
            ['restaurant_name', 'ratings_origin', 'address', 'cuisines', 'pricing_impute']]

        # Add a message column to the recommendations DataFrame
        top_recommendations_df['message'] = message

        return top_recommendations_df, message


In [80]:
rp_or_get_recommendations2(city='mehsana',user_cuisines=['pizzas'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_city['ratings_origin'] = df_city['ratings']


(                 restaurant_name  ratings_origin         address  \
 22              7 heart pizzeria             4.2         mehsana   
 1467                   hungritos             3.7         mehsana   
 1562          jay ambe fast food             4.3         mehsana   
 1949                london wraps             4.2  radhanpur road   
 4087  zorko brand of food lovers             5.0         mehsana   
 
                                   cuisines  pricing_impute  \
 22                        pizzas, fastfood            80.0   
 1467   snacks, fastfood, beverages, pizzas           250.0   
 1562           fastfood, pizzas, beverages            40.0   
 1949                      fastfood, pizzas            80.0   
 4087  beverages, pizzas, fastfood, burgers            40.0   
 
                                                 message  
 22    Dear user, your requested cuisine(s) are less ...  
 1467  Dear user, your requested cuisine(s) are less ...  
 1562  Dear user, your requ

In [81]:
def rp_or_get_recommendations3(city, user_cuisines='fastfood', num_recommendations=5, cuisines_weight=0.5, ratings_weight=0.3, distance_weight=0.2, restaurant_name=None):
    """
    Recommends restaurants based on either:
    - City and cuisine preferences (first logic), or
    - A specific restaurant's name (second logic).
    """
    
    # Ensure user_cuisines is always a list
    if isinstance(user_cuisines, str):
        user_cuisines = [user_cuisines]
        
      
    
    # If restaurant_name is provided, use the second logic for recommendations
    if restaurant_name:
        # Filter data for the specific city and cuisine
        filtered_df = dfc[(dfc['city'] == city) & (dfc['cuisines'].str.contains('|'.join(user_cuisines), case=False))].reset_index(drop=True)
        
         
        
        # Check if there are enough restaurants after filtering
        if len(filtered_df) < 2:
            print(f"Not enough restaurants in {city} for the cuisine(s) '{', '.join(user_cuisines)}' to provide recommendations.")
            return
        
        # Apply TF-IDF on 'cuisines' for the filtered data
        tfidf = TfidfVectorizer()
        tfidf_matrix = tfidf.fit_transform(filtered_df['cuisines'])
        
        # Check if the restaurant exists in the filtered dataset
        if restaurant_name not in filtered_df['restaurant_name'].values:
            print(f"The restaurant '{restaurant_name}' was not found in {city} for the cuisine(s) '{', '.join(user_cuisines)}'.")
            return
        
        # Find the index of the selected restaurant
        restaurant_idx = filtered_df[filtered_df['restaurant_name'] == restaurant_name].index[0]
        
        # Calculate cosine similarity between the selected restaurant and all others
        cosine_similarities = cosine_similarity(tfidf_matrix[restaurant_idx], tfidf_matrix)
        
        # Get indices of the top 5 most similar restaurants
        similar_indices = cosine_similarities[0].argsort()[-(num_recommendations + 1):][::-1]  # Get top 5 most similar excluding the restaurant itself
        
        # Exclude the first index (which will be the restaurant itself)
        similar_indices = similar_indices[1:]
        
        # Get the recommended restaurants based on similarity
        recommended_restaurants = filtered_df.iloc[similar_indices]

        # Selecting the specific columns you want
        recommended_restaurants = recommended_restaurants[
            ['restaurant_name', 'ratings', 'address', 'cuisines', 'pricing_impute']]

        # Rename the 'ratings' column to 'ratings_origin' to match your desired output format
        recommended_restaurants.rename(columns={'ratings': 'ratings_origin'}, inplace=True)

        # Print the recommended restaurants
        print(f"\nRecommendations for '{restaurant_name}' in {city}:")
        print(recommended_restaurants)
        return recommended_restaurants

    # If no restaurant_name is provided, use the first logic for recommendations
    else:
        df_city = dfc[dfc['city'] == city]
        df_city['ratings_origin'] = df_city['ratings']
        
        # Scale ratings and distance 
        scaler = MinMaxScaler()
        df_city.loc[:, ['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])

        # Compute TF-IDF for cuisines
        tfidf = TfidfVectorizer()
        cuisine_metric = tfidf.fit_transform(df_city['cuisines'])

        message = ""
        if user_cuisines:
            available_cuisines = df_city['cuisines'].str.lower().unique()
            user_cuisines_lower = [c.lower() for c in user_cuisines]

            # Check if user-provided cuisines exist in the dataset
            matched_cuisines = [c for c in user_cuisines_lower if any(c in cuisine for cuisine in available_cuisines)]

            if not matched_cuisines:  # No matching cuisines found
                return pd.DataFrame(columns=['restaurant_name', 'ratings_origin', 'address', 'cuisines', 'pricing_impute']), "No restaurants offering the requested cuisines: {}".format(', '.join(user_cuisines))

            # If the cuisines are present but less popular
            if len(matched_cuisines) < 2:
                message = "Dear user, your requested cuisine(s) are less popular, so the recommendations are based on overall higher ratings, higher cuisines similarity score, or distance."

        # Compute similarity matrices
        rating_similarity = cosine_similarity(df_city[['ratings']], df_city[['ratings']])
        distance_similarity = cosine_similarity(df_city[['distance']], df_city[['distance']])
        np.fill_diagonal(distance_similarity, 1.0)  # Adjust distance similarity for self-comparisons
        cuisine_similarity = cosine_similarity(cuisine_metric, cuisine_metric)

        # Apply user preferences to cuisine similarity
        if user_cuisines:
            user_cuisines_str = ', '.join(user_cuisines)
            user_cuisine_vector = tfidf.transform([user_cuisines_str])
            user_cuisine_similarity = cosine_similarity(cuisine_metric, user_cuisine_vector).flatten()

            # Boost cuisine similarity for matching restaurants
            cuisine_similarity *= user_cuisine_similarity[:, np.newaxis]

        # Calculate weighted similarity
        weighted_similarity = (cuisines_weight * cuisine_similarity +
                               ratings_weight * rating_similarity +
                               distance_weight * distance_similarity)

        # Create similarity DataFrame
        similarity_df = pd.DataFrame(weighted_similarity, columns=df_city['restaurant_name'],
                                     index=df_city['restaurant_name'])

        # Aggregate similarity scores for all restaurants
        aggregated_scores = similarity_df.sum(axis=0)

        # Get the top N recommendations based on aggregated scores
        top_recommendations = aggregated_scores.nlargest(num_recommendations).index.tolist()

        # Create a DataFrame for the top recommendations with specific columns
        top_recommendations_df = df_city[df_city['restaurant_name'].isin(top_recommendations)].copy()

        # Select only the specified columns
        top_recommendations_df = top_recommendations_df[
            ['restaurant_name', 'ratings_origin', 'address', 'cuisines', 'pricing_impute']]

        # Add a message column to the recommendations DataFrame
        top_recommendations_df['message'] = message

        return top_recommendations_df, message


In [82]:
rp_or_get_recommendations3(city='mehsana',user_cuisines=['pizzas'],restaurant_name='jay ambe fast food')


Recommendations for 'jay ambe fast food' in mehsana:
               restaurant_name  ratings_origin         address  \
17                moment mocha             4.1         mehsana   
10                   hungritos             3.7         mehsana   
29  zorko brand of food lovers             5.0         mehsana   
14                london wraps             4.2  radhanpur road   
0             7 heart pizzeria             4.2         mehsana   

                                cuisines  pricing_impute  
17   beverages, fastfood, pizzas, snacks           100.0  
10   snacks, fastfood, beverages, pizzas           250.0  
29  beverages, pizzas, fastfood, burgers            40.0  
14                      fastfood, pizzas            80.0  
0                       pizzas, fastfood            80.0  


Unnamed: 0,restaurant_name,ratings_origin,address,cuisines,pricing_impute
17,moment mocha,4.1,mehsana,"beverages, fastfood, pizzas, snacks",100.0
10,hungritos,3.7,mehsana,"snacks, fastfood, beverages, pizzas",250.0
29,zorko brand of food lovers,5.0,mehsana,"beverages, pizzas, fastfood, burgers",40.0
14,london wraps,4.2,radhanpur road,"fastfood, pizzas",80.0
0,7 heart pizzeria,4.2,mehsana,"pizzas, fastfood",80.0


In [83]:
def rp_or_get_recommendations55(city, user_cuisines='fastfood', num_recommendations=5, cuisines_weight=0.5, ratings_weight=0.3, distance_weight=0.2, restaurant_name=None):
    """
    Recommends restaurants based on either:
    - City and cuisine preferences (first logic), or
    - A specific restaurant's name (second logic).
    """
    
    # Ensure user_cuisines is always a list
    if isinstance(user_cuisines, str):
        user_cuisines = [user_cuisines]
    
    # If restaurant_name is provided, use the second logic for recommendations
    if restaurant_name:
        # Filter data for the specific city and cuisine
        filtered_df = dfc[(dfc['city'] == city) & (dfc['cuisines'].str.contains('|'.join(user_cuisines), case=False))].reset_index(drop=True)
        filtered_df['ratings_origin'] = filtered_df['ratings']
        
        # Check if there are enough restaurants after filtering
        if len(filtered_df) < 2:
            return pd.DataFrame(columns=['restaurant_name', 'ratings_origin', 'address', 'cuisines', 'pricing_impute']), \
                f"Not enough restaurants in {city} for the cuisine(s) '{', '.join(user_cuisines)}' to provide recommendations."
        
        # Apply TF-IDF on 'cuisines' for the filtered data
        tfidf = TfidfVectorizer()
        tfidf_matrix = tfidf.fit_transform(filtered_df['cuisines'])
        
        # Check if the restaurant exists in the filtered dataset
        if restaurant_name not in filtered_df['restaurant_name'].values:
            return pd.DataFrame(columns=['restaurant_name', 'ratings_origin', 'address', 'cuisines', 'pricing_impute']), \
                f"The restaurant '{restaurant_name}' was not found in {city} for the cuisine(s) '{', '.join(user_cuisines)}'."
        
        # Find the index of the selected restaurant
        restaurant_idx = filtered_df[filtered_df['restaurant_name'] == restaurant_name].index[0]
        
        # Calculate cosine similarity between the selected restaurant and all others
        cosine_similarities = cosine_similarity(tfidf_matrix[restaurant_idx], tfidf_matrix)
        
        # Get indices of the top 5 most similar restaurants
        similar_indices = cosine_similarities[0].argsort()[-(num_recommendations + 1):][::-1]  # Get top 5 most similar excluding the restaurant itself
        
        # Exclude the first index (which will be the restaurant itself)
        similar_indices = similar_indices[1:]
        
        # Get the recommended restaurants
        recommended_restaurants = filtered_df.iloc[similar_indices][['restaurant_name', 'ratings_origin', 'address', 'cuisines', 'pricing_impute']]
        
        # Return the DataFrame with recommendations
        return recommended_restaurants, None  # No message needed as it is included in the result
    
    # If no restaurant_name is provided, use the first logic for recommendations
    else:
        # Original logic for recommendations when no restaurant_name is provided
        df_city = dfc[dfc['city'] == city].copy()
        df_city.loc[:, 'ratings_origin'] = df_city['ratings']
        # Scale ratings and distance 
        scaler = MinMaxScaler()
        df_city.loc[:, ['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])

        # Compute TF-IDF for cuisines
        tfidf = TfidfVectorizer()
        cuisine_metric = tfidf.fit_transform(df_city['cuisines'])

        message = ""
        if user_cuisines:
            available_cuisines = df_city['cuisines'].str.lower().unique()
            user_cuisines_lower = [c.lower() for c in user_cuisines]

            # Check if user-provided cuisines exist in the dataset
            matched_cuisines = [c for c in user_cuisines_lower if any(c in cuisine for cuisine in available_cuisines)]

            if not matched_cuisines:  # No matching cuisines found
                return pd.DataFrame(columns=['restaurant_name', 'ratings_origin', 'address', 'cuisines', 'distance_origin']), "No restaurants offering the requested cuisines: {}".format(', '.join(user_cuisines))

            # If the cuisines are present but less popular
            if len(matched_cuisines) < 1:
                message = "Dear user, your requested cuisine(s) are less popular, so the recommendations are based on overall higher ratings, higher cuisines similarity score, or distance."

        # Compute similarity matrices
        rating_similarity = cosine_similarity(df_city[['ratings']], df_city[['ratings']])
        distance_similarity = cosine_similarity(df_city[['distance']], df_city[['distance']])
        np.fill_diagonal(distance_similarity, 1.0)  # Adjust distance similarity for self-comparisons
        cuisine_similarity = cosine_similarity(cuisine_metric, cuisine_metric)

        # Apply user preferences to cuisine similarity
        if user_cuisines:
            user_cuisines_str = ', '.join(user_cuisines)
            user_cuisine_vector = tfidf.transform([user_cuisines_str])
            user_cuisine_similarity = cosine_similarity(cuisine_metric, user_cuisine_vector).flatten()

            # Boost cuisine similarity for matching restaurants
            cuisine_similarity *= user_cuisine_similarity[:, np.newaxis]

        # Calculate weighted similarity
        weighted_similarity = (cuisines_weight * cuisine_similarity +
                               ratings_weight * rating_similarity +
                               distance_weight * distance_similarity)

        # Create similarity DataFrame
        similarity_df = pd.DataFrame(weighted_similarity, columns=df_city['restaurant_name'],
                                     index=df_city['restaurant_name'])

        # Aggregate similarity scores for all restaurants
        aggregated_scores = similarity_df.sum(axis=0)

        # Get the top N recommendations based on aggregated scores
        top_recommendations = aggregated_scores.nlargest(num_recommendations).index.tolist()

        # Create a DataFrame for the top recommendations with specific columns
        top_recommendations_df = df_city[df_city['restaurant_name'].isin(top_recommendations)].copy()

        # Select only the specified columns
        top_recommendations_df = top_recommendations_df[
            ['restaurant_name', 'ratings_origin', 'address', 'cuisines', 'pricing_impute']]

        # Add a message column to the recommendations DataFrame
        top_recommendations_df['message'] = message

        return top_recommendations_df, message


In [84]:
rp_or_get_recommendations55(city='mehsana',user_cuisines=['chinese'],restaurant_name='food adda')

(          restaurant_name  ratings_origin                      address  \
 7     jay bhavani vadapav             4.2                      mehsana   
 5             hotel tulsi             4.3                      mehsana   
 4         hotel highlaned             4.4                      mehsana   
 18   shree sai restaurant             4.4  tirupati shahibaug township   
 19  the foodies fast food             3.8            geb visnagar road   
 
                                              cuisines  pricing_impute  
 7                                   fastfood, chinese            40.0  
 5   southindian, chinese, fastfood, pizzas, beverages            80.0  
 4   chinese, punjabi, beverages, snacks, pizzas, f...            40.0  
 18                         chinese, punjabi, fastfood           150.0  
 19                 chinese, snacks, beverages, pizzas            80.0  ,
 None)

In [90]:
def ex(city, user_cuisines=None, num_recommendations=5, cuisines_weight=0.6, ratings_weight=0.4):
    
    
    df_city = dfc[dfc['city'] == city]
    
    # Scale ratings and distance
    scaler = MinMaxScaler()
    #df_city.loc[:,['ratings', 'distance']] = scaler.fit_transform(df_city[['ratings', 'distance']])
    df_city.loc[:,['ratings']] = scaler.fit_transform(df_city[['ratings']])
    
    
    # Compute TF-IDF for cuisines
    tfidf = TfidfVectorizer()
    cuisine_metric = tfidf.fit_transform(df_city['cuisines'])
    
     # If user_cuisines are provided, ensure they exist in the dataset
    if user_cuisines:
        available_cuisines = df_city['cuisines'].str.lower().unique()
        user_cuisines_lower = [c.lower() for c in user_cuisines]
        
        # Check if any of the requested cuisines exist in the dataset
        if not any(cuisine in ', '.join(available_cuisines) for cuisine in user_cuisines_lower):
            return f"No restaurants offering the requested cuisines: {', '.join(user_cuisines)}."
        
    
    # Compute similarity matrices
    rating_similarity = cosine_similarity(df_city[['ratings']], df_city[['ratings']])
    #distance_similarity = cosine_similarity(df_city[['distance']], df_city[['distance']])
    #np.fill_diagonal(distance_similarity, 1.0)  # Adjust distance similarity for self-comparisons
    cuisine_similarity = cosine_similarity(cuisine_metric, cuisine_metric)
    
    # Apply user preferences to cuisine similarity
    if user_cuisines:
        user_cuisines_str = ', '.join(user_cuisines)
        user_cuisine_vector = tfidf.transform([user_cuisines_str])
        user_cuisine_similarity = cosine_similarity(cuisine_metric, user_cuisine_vector).flatten()
        
        # Boost cuisine similarity for matching restaurants
        cuisine_similarity *= user_cuisine_similarity[:, np.newaxis]
    
    # Calculate weighted similarity
    weighted_similarity = (cuisines_weight * cuisine_similarity +
                           ratings_weight * rating_similarity)
    
    # Create similarity DataFrame
    similarity_df = pd.DataFrame(weighted_similarity, columns=df_city['restaurant_name'], index=df_city['restaurant_name'])
    
    # Aggregate similarity scores for all restaurants
    aggregated_scores = similarity_df.sum(axis=0)
    
    # Get the top N recommendations based on aggregated scores
    top_recommendations_overall = aggregated_scores.nlargest(num_recommendations).index.tolist()
    
    
    return top_recommendations_overall


# r(city='mehsana', user_cuisines=['chinese'])

In [91]:
ex(city='mehsana',user_cuisines=['snacks'])

['hungritos',
 'moment mocha',
 'the foodies fast food',
 'murli food home-radhanpur road',
 'hotel highlaned']

In [92]:
dfc[(dfc['city']=='mehsana') & (dfc['restaurant_name']=="hotel highlaned")]

Unnamed: 0,restaurant_name,ratings,address,distance,city,cuisines,pricing_impute
1428,hotel highlaned,4.4,mehsana,28.8,mehsana,"chinese, punjabi, beverages, snacks, pizzas, f...",40.0
