In [1]:
# Step 1:
# Import libraries
import pandas as pd
import pickle
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
import plotly.express as px

In [2]:
# Step 2:
# loading the dataset
df = pd.read_csv('dataset/swiggy.csv')

In [3]:
# Step 3:
# Row with name : NA are dropped because important feature like rating, rating_count, cost, cuisine, lic_no is also NA
df = df.dropna(subset=['name'])

In [4]:
# Step 4:
# Converting cost to float
df["cost"] = df["cost"].replace('₹ ', "", regex=True).astype(float)

In [7]:
df.columns

Index(['id', 'name', 'city', 'rating', 'rating_count', 'cost', 'cuisine',
       'lic_no', 'link', 'address', 'menu'],
      dtype='object')

In [5]:
# Step 5:
# Filling missing values in cost and cuisine columns
for col in ["cost", "cuisine"]:
     default_value = 0 if col == "cost" else "Unknown"
     df[col] = (
          df.groupby('name')[col]
          .transform(lambda x: x.fillna(method='ffill').fillna(method='bfill'))
          .fillna(default_value)
          )

  .transform(lambda x: x.fillna(method='ffill').fillna(method='bfill'))
  .transform(lambda x: x.fillna(method='ffill').fillna(method='bfill'))
  .transform(lambda x: x.fillna(method='ffill').fillna(method='bfill'))


In [6]:
# Step 6:
# Converting rating to float
df["rating"]=df["rating"].replace('--',0).astype(float)

In [7]:
df_backup = df.copy()

In [None]:
df=df_backup.copy()

In [7]:
# Step 7:
# Splitting city into area and main city
df[["area", "city_main"]] = df["city"].str.rsplit(',', n=1, expand=True)

# df["city_main"] = df["city"].astype(str).str.split(',').str[1].str.strip()

# cleaning area and city_main columns
df["area"] = df["area"].str.strip()

df["city_main"] = df["city_main"].str.strip()
# Filling missing values in city_main column
df['city_main'].fillna("Other", inplace=True)



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['city_main'].fillna("Other", inplace=True)


In [8]:
location = {}
for index, row in df.iterrows():
    city = row['city_main'].lower()
    area = row['area'].lower()
    if city not in location:
        location[city] = []
    if area not in location[city]:
        location[city].append(area)

location
pickle.dump(location, open('pickles/city_area.pkl', 'wb'))

In [9]:
# step 8:
df.drop(columns=['city'], inplace=True)

In [10]:
# Step 9:
# splitting cuisine into cuisine_1 and cuisine_2
# df[["cuisine_1", "cuisine_2"]] = df["cuisine"].str.split(',', n=1, expand=True)
df[["cuisine_1", "cuisine_2"]] = df["cuisine"].str.split(',', n=1, expand=True).apply(lambda c: c.str.strip())


In [None]:
# cleaning cuisine_1 and cuisine_2 columns
# cuisine_types = df["cuisine_1"].unique().tolist()+ df["cuisine_2"].unique().tolist()
cuisine_types = list(set(df["cuisine_1"].dropna()).union(df["cuisine_2"].dropna()))
# pd.DataFrame(cuisine_types, columns=["cuisine_type"]).to_csv('dataset/cuisine_types.csv', index=False)



In [11]:
remove_cuisine = ["8:15 To 11:30 Pm","Attractive Combos Available","Code valid on bill over Rs.99","Combo","Default","Discount offer from Garden Cafe Express Kankurgachi","Free Delivery ! Limited Stocks!","Grocery products","MAX 2 Combos per Order!","Meat","Popular Brand Store","Special Discount from (Hotel Swagath)","SVANidhi Street Food Vendor","Use Code JUMBO30 to avail", "Use code XPRESS121 to avail.","Unknown"]
for cuisine in ["cuisine_1", "cuisine_2"]:
     df[cuisine] = df[cuisine].replace(remove_cuisine, "Other")
     df[cuisine] = df[cuisine].replace("Bakery products", "Bakery")
     df[cuisine] = df[cuisine].replace("BEVERAGE", "Beverages")
     df[cuisine] = df[cuisine].replace("Biryani - Shivaji Military Hotel", "Biryani")

In [12]:
pickle.dump(df, open('pickles/processed_df.pkl', 'wb'))

In [13]:
cuisine_types = list(set(df["cuisine_1"].dropna()).union(df["cuisine_2"].dropna()))
pickle.dump(cuisine_types, open('pickles/cuisines.pkl', 'wb'))
# len(cuisine_types)

In [14]:
# Step 10:
# Enconding cuisine_1 and cuisine_2 columns using OneHotEncoder and saving it as pickle file
oneHot_encoder_cuisine = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encode_oneHot_cuisine = oneHot_encoder_cuisine.fit_transform(df[["cuisine_1", "cuisine_2"]]) 
pickle.dump(oneHot_encoder_cuisine, open('pickles/oneHot_cuisine.pkl', 'wb'))
encoded_df_oneHot_cuisine = pd.DataFrame(encode_oneHot_cuisine, columns=oneHot_encoder_cuisine.get_feature_names_out(['cuisine_1', 'cuisine_2']))
df = pd.concat([df.reset_index(drop=True), encoded_df_oneHot_cuisine.reset_index(drop=True)], axis=1)
df.drop(columns=['cuisine_1', 'cuisine_2', 'cuisine'], inplace=True)

In [15]:
# Step 11:
# Enconding city_main column using OneHotEncoder and saving it as pickle file
oneHot_encoder_city = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encode_oneHot_city = oneHot_encoder_city.fit_transform(df[['city_main']])
pickle.dump(oneHot_encoder_city, open('pickles/oneHot_city.pkl', 'wb'))
encoded_df_oneHot_city = pd.DataFrame(encode_oneHot_city, columns=oneHot_encoder_city.get_feature_names_out(['city_main']))
df = pd.concat([df.reset_index(drop=True), encoded_df_oneHot_city.reset_index(drop=True)], axis=1)
df.drop(columns=['city_main'], inplace=True)

In [16]:
# Step 12:
area_to_oneHot = False  # Set to False to use LabelEncoder instead of OneHotEncoder
if area_to_oneHot:
    # Encoding area column using OneHotEncoder and saving it as pickle file
    oneHot_encoder_area = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
    encode_oneHot_area = oneHot_encoder_area.fit_transform(df[['area']])
    pickle.dump(oneHot_encoder_area, open('pickles/oneHot_area.pkl', 'wb'))
    encoded_df_oneHot_area = pd.DataFrame(encode_oneHot_area, columns=oneHot_encoder_area.get_feature_names_out(['area']))
    df = pd.concat([df.reset_index(drop=True), encoded_df_oneHot_area.reset_index(drop=True)], axis=1)
    df.drop(columns=['area'], inplace=True)
else:
    print(df["area"])
    # Encoding area column using LabelEncoder and saving it as pickle file
    label_encoder_area = LabelEncoder()
    df["area"]= label_encoder_area.fit_transform(df['area'])
    pickle.dump(label_encoder_area, open('pickles/label_area.pkl', 'wb'))
    

0           Abohar
1           Abohar
2           Abohar
3           Abohar
4           Abohar
            ...   
148450    Yavatmal
148451    Yavatmal
148452    Yavatmal
148453    Yavatmal
148454    Yavatmal
Name: area, Length: 148455, dtype: object


In [17]:
# Step 13:
# Encoding rating_count column using LabelEncoder and saving it as pickle file
pickle.dump(df["rating_count"], open('pickles/rating_counts.pkl', 'wb'))
label_encoder_rating_count = LabelEncoder()
df["rating_count"]=label_encoder_rating_count.fit_transform(df["rating_count"])
pickle.dump(label_encoder_rating_count, open('pickles/encoder_rating_count.pkl', 'wb'))

In [18]:
# Step 14:
# Drop lic_no, address, menu and link columns as they are not impactfull for analysis
df.drop(columns=['name','lic_no', 'address', 'menu', 'link'], inplace=True)

In [19]:
# Step 15:
# index id
df.set_index('id', inplace=True)

In [20]:
# Step 16:
# clustering with optimal number of clusters
from sklearn.cluster import KMeans
best_k = 6  # From elbow graph
model = KMeans(n_clusters=best_k).fit(df)
df['cluster'] = model.predict(df)

# pickle.dump(df, open('pickles/encoded_df.pkl', 'wb'))
pickle.dump(model, open('pickles/kmeans_model.pkl', 'wb'))

In [None]:
df.reset_index(inplace=True)
df.columns

(0         567335
 1         531342
 2         158203
 3         187912
 4         543530
            ...  
 148450    553122
 148451    562647
 148452    559435
 148453    418989
 148454    447770
 Name: id, Length: 148455, dtype: int64,
 0         2
 1         2
 2         2
 3         2
 4         2
          ..
 148450    5
 148451    5
 148452    5
 148453    5
 148454    5
 Name: cluster, Length: 148455, dtype: int32)

In [None]:
####### Recommendation System Starts#######

In [111]:
user_input = {
    "rating": 3.0,
    "rating_count": "50+ ratings",
    "cost": 250,
    "area": "Adyar",
    "cuisine_1": "Unknown",
    "cuisine_2": "Unknown",
    "city_main": "Chennai"
}
user_df = pd.DataFrame([user_input])

In [112]:
import numpy as np
# -------------------------
# 2. Encode numeric + label encoded values
# -------------------------
enc_rating = np.array([[user_df['rating'].iloc[0]]])
enc_cost = np.array([[user_df['cost'].iloc[0]]])
enc_rating_count = label_encoder_rating_count.transform(
    user_df['rating_count']
).reshape(1, -1)
enc_area = label_encoder_area.transform(
    user_df['area']
).reshape(1, -1)

In [113]:
# -------------------------
# 3. OneHot encodings
# -------------------------
enc_cuisine = oneHot_encoder_cuisine.transform(
    user_df[['cuisine_1', 'cuisine_2']]
)
enc_city = oneHot_encoder_city.transform(
    user_df[['city_main']]
)

In [114]:
# -------------------------
# 4. Build FINAL VECTOR (Exact feature order used in training)
# -------------------------
final_vector = np.hstack([
    enc_rating,         # feature_rating
    enc_rating_count,   # feature_rating_count
    enc_cost,           # feature_cost
    enc_area,           # feature_area
    enc_cuisine,        # feature_cuisine_*
    enc_city            # feature_city_*
])

In [115]:
# -------------------------
# 5. Predict Cluster
# -------------------------
cluster = model.predict(final_vector)[0]
cluster_df = df[df['cluster'] == cluster].copy()



In [116]:
cluster_vector = cluster_df.drop(columns=['cluster']).values

In [117]:
from sklearn.metrics.pairwise import cosine_similarity
sim_scores = cosine_similarity(final_vector, cluster_vector)[0]
cluster_df["similarity_score"] = sim_scores

In [118]:
cluster_df.sort_values(by='similarity_score', ascending=False ).head(10)

Unnamed: 0_level_0,rating,rating_count,cost,area,cuisine_1_8:15 To 11:30 Pm,cuisine_1_Afghani,cuisine_1_African,cuisine_1_American,cuisine_1_Andhra,cuisine_1_Arabian,...,"city_main_New BEL Road,Bangalore",city_main_Noida,city_main_Other,city_main_Pune,city_main_Surat,city_main_Vadodara,city_main_Vijayawada,city_main_Vizag,cluster,similarity_score
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10540,4.1,5,300.0,6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0.999971
279404,3.8,4,250.0,6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0.999971
234933,4.2,5,300.0,6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0.99997
30150,4.3,5,300.0,6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0.99997
231590,4.3,5,300.0,6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0.99997
56318,4.3,5,300.0,6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0.99997
146253,3.8,4,300.0,6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0.999969
480768,4.0,4,250.0,6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0.999968
503857,4.5,5,300.0,6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0.999968
128420,4.5,5,300.0,6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0.999968


In [110]:
cluster_df.sort_values(by='similarity_score', ascending=False ).to_csv('dataset/swiggy_recommendation_28112025.csv', index=True)

In [119]:
# loading the dataset
df_test = pd.read_csv('dataset/swiggy.csv')

In [120]:
recommendations_encoded = cluster_df.sort_values(by='similarity_score', ascending=False )

In [121]:
recommendations=df_test[df_test['id'].isin(recommendations_encoded.index)].copy()

In [122]:
recommendations["similarity_score"] = recommendations["id"].map(recommendations_encoded["similarity_score"])

In [123]:
# Final multi-column sort
recommendations = recommendations.sort_values(
    by=['similarity_score', "city"],
    ascending=[False, True]
)
recommendations.head(10)

Unnamed: 0,id,name,city,rating,rating_count,cost,cuisine,lic_no,link,address,menu,similarity_score
43224,10540,Murugan Idli Shop,"Adyar,Chennai",4.1,500+ ratings,₹ 300,South Indian,12417000000000,https://www.swiggy.com/restaurants/murugan-idl...,"Murugan Idli Shop, E 149 6th Avenue, Near vala...",Menu/10540.json,0.999971
43437,279404,Noodle Doodle,"Adyar,Chennai",3.8,50+ ratings,₹ 250,"Chinese,Tibetan",12414002003495,https://www.swiggy.com/restaurants/noodle-dood...,"Noodle Doodle, No.7/3M, 4th Main Road, Basent ...",Menu/279404.json,0.999971
43345,234933,Wow! Momo,"Adyar,Chennai",4.2,500+ ratings,₹ 300,"Tibetan,Healthy Food",12420002001411,https://www.swiggy.com/restaurants/wow-momo-ad...,"Wow! Momo, no 13/2, nehru nagar, 2nd main road...",Menu/234933.json,0.99997
43241,56318,Munveedu,"Adyar,Chennai",4.3,500+ ratings,₹ 300,"South Indian,Chettinad",12421002001648,https://www.swiggy.com/restaurants/munveedu-ad...,"Munveedu, 34/72A, 4th Main Road, Gandhi Nagar,...",Menu/56318.json,0.99997
43388,231590,Chinese Wox,"Adyar,Chennai",4.3,500+ ratings,₹ 300,Chinese,license,https://www.swiggy.com/restaurants/chinese-wox...,"Chinese Wox, 2/10, Gandhi Mandapam Road, Kottu...",Menu/231590.json,0.99997
43403,30150,Adyar Sree Bhavan,"Adyar,Chennai",4.3,500+ ratings,₹ 300,"South Indian,North Indian",22419538000526,https://www.swiggy.com/restaurants/adyar-sree-...,"Adyar Sree Bhavan, 138, Lattice Bridge Road, A...",Menu/30150.json,0.99997
43253,146253,Saldo,"Adyar,Chennai",3.8,50+ ratings,₹ 300,"Continental,Tandoor",license,https://www.swiggy.com/restaurants/saldo-kastu...,"Saldo, 7/12 , 8th Main Road , Ground Floor,kas...",Menu/146253.json,0.999969
43251,480768,Rolls & Bowls Company,"Adyar,Chennai",4.0,50+ ratings,₹ 250,"Chinese,North Indian",12422002000188,https://www.swiggy.com/restaurants/rolls-and-b...,"Rolls & Bowls Company, Ground Floor, Door No- ...",Menu/480768.json,0.999968
43231,5719,Prem's Graama Bhojanam,"Adyar,Chennai",4.5,500+ ratings,₹ 300,South Indian,12419002004124,https://www.swiggy.com/restaurants/prems-graam...,"Prem's Graama Bhojanam, 19/90, Sardar Patel Ro...",Menu/5719.json,0.999968
43371,128420,Hotel Sri Lakshmi Sagar,"Adyar,Chennai",4.5,500+ ratings,₹ 300,"North Indian,South Indian",12417002002943,https://www.swiggy.com/restaurants/hotel-sri-l...,"Hotel Sri Lakshmi Sagar, 177, LB Road, Kamaraj...",Menu/128420.json,0.999968


In [125]:
pickle.dump(df_test, open('pickles/encoded_df.pkl', 'wb'))

In [81]:
recommendations

Unnamed: 0,id,name,city,rating,rating_count,cost,cuisine,lic_no,link,address,menu,similarity_score
0,297700,Kumbakarna Biryani House,Adoni,3.3,50+ ratings,₹ 250,"Biryani,South Indian",20121022000134,https://www.swiggy.com/restaurants/kumbakarna-...,"Kumbakarna Biryani House, Below Dwaraka comple...",Menu/297700.json,0.999975
1,128420,Hotel Sri Lakshmi Sagar,"Adyar,Chennai",4.5,500+ ratings,₹ 300,"North Indian,South Indian",12417002002943,https://www.swiggy.com/restaurants/hotel-sri-l...,"Hotel Sri Lakshmi Sagar, 177, LB Road, Kamaraj...",Menu/128420.json,0.999973
2,191366,The Biryani Life,"Adyar,Chennai",4.0,20+ ratings,₹ 250,"Biryani,Hyderabadi",12419002000307,https://www.swiggy.com/restaurants/the-biryani...,"The Biryani Life, 26/10, Kasthuribai Nagar, 3r...",Menu/191366.json,0.999968
3,350553,Agra Biryani Hotspot,Agra,3.9,50+ ratings,₹ 300,"Biryani,Kebabs",22720590000200,https://www.swiggy.com/restaurants/agra-biryan...,"Agra Biryani Hotspot, S S272 SHAHEED NAGAR WAR...",Menu/350553.json,0.999965
4,485620,Hotel VIP International Agartala,Agartala,2.7,50+ ratings,₹ 300,"Biryani,Kebabs",12521010000287,https://www.swiggy.com/restaurants/hotel-vip-i...,"Hotel VIP International Agartala, Ujan Abhoyna...",Menu/485620.json,0.999964
...,...,...,...,...,...,...,...,...,...,...,...,...
148450,222071,M/S K/K Restaurant,Tezpur,--,Too Few Ratings,,"Indian,Chinese",20319113000233,https://www.swiggy.com/restaurants/m-s-k-k-res...,"M/S K/K Restaurant, Near Mahabhairab Mandir",Menu/222071.json,0.024137
148451,304862,AAKASH FOODS,Tiruvannamalai,--,Too Few Ratings,,Indian,12418024000892,https://www.swiggy.com/restaurants/aakash-food...,"AAKASH FOODS, NO 7 , SRIRAM NAGAR,POLUR ROAD,T...",Menu/304862.json,0.024133
148452,384579,SHRI MAHAVIR GRUH UDHYOG,"Vijay Nagar,Indore",--,Too Few Ratings,,Snacks,21421850003316,https://www.swiggy.com/restaurants/shri-mahavi...,"SHRI MAHAVIR GRUH UDHYOG, 102/584 NIRMAL REGEN...",Menu/384579.json,0.024128
148453,209433,The Nirvana Fast Food Centre,Jalpaiguri,4.0,100+ ratings,,"Indian,Chinese",22819009000721,https://www.swiggy.com/restaurants/the-nirvana...,"The Nirvana Fast Food Centre, Merchant Rd, Jal...",Menu/209433.json,0.024128


In [88]:
# Final recommendation

# ---------------------
# Load Pickled Files
# ---------------------

import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


with open("pickles/city_area.pkl", "rb") as f:
    city_area_dict = pickle.load(f)

with open("pickles/cuisines.pkl", "rb") as f:
    cuisines_list = pickle.load(f)

with open("pickles/rating_counts.pkl", "rb") as f:
    rating_list = pickle.load(f)

with open("pickles/oneHot_cuisine.pkl", "rb") as f:
    encoder_cuisine = pickle.load(f)

with open("pickles/oneHot_city.pkl", "rb") as f:
    encoder_city = pickle.load(f)

with open("pickles/label_area.pkl", "rb") as f:
    label_area = pickle.load(f)

with open("pickles/encoder_rating_count.pkl", "rb") as f:
    encoder_rating_count = pickle.load(f)

with open("pickles/kmeans_model.pkl", "rb") as f:
    model = pickle.load(f)

with open("pickles/processed_df.pkl", "rb") as f:
    processed_df = pickle.load(f)

with open("pickles/encoded_df.pkl", "rb") as f:
    encoded_df = pickle.load(f)


def get_recommendations(city, area, cuisine, min_rating):
    # df = processed_df.copy()
    user_df = pd.DataFrame([{
        "rating": 3,
        "rating_count": min_rating,
        "cost": processed_df['cost'].median(),
        "area": area.lower(),
        "cuisine_1": cuisine,
        "cuisine_2": cuisine,
        "city_main": city
    }])
    # -------------------------
    # 2. Encode numeric + label encoded values
    # -------------------------
    enc_rating = np.array([[user_df['rating'].iloc[0]]])
    enc_cost = np.array([[user_df['cost'].iloc[0]]])
    enc_rating_count = encoder_rating_count.transform(
        user_df['rating_count']
    ).reshape(1, -1)
    # enc_area = label_area.transform(
    #     user_df['area']
    # ).reshape(1, -1)
    # ---- FIX: Normalize area before encoding ----
    user_area = user_df['area'].iloc[0].lower().strip()

    # Make label_encoder classes lowercase
    label_area.classes_ = np.array([c.lower() for c in label_area.classes_])

    # Now encode safely
    enc_area = label_area.transform([user_area]).reshape(1, -1)

    # -------------------------
    # 3. OneHot encodings
    # -------------------------
    enc_cuisine = encoder_cuisine.transform(
        user_df[['cuisine_1', 'cuisine_2']]
    )
    enc_city = encoder_city.transform(
        user_df[['city_main']]
    )

    # -------------------------
    # 4. Build FINAL VECTOR (Exact feature order used in training)
    # -------------------------
    final_vector = np.hstack([
        enc_rating,         # feature_rating
        enc_rating_count,   # feature_rating_count
        enc_cost,           # feature_cost
        enc_area,           # feature_area
        enc_cuisine,        # feature_cuisine_*
        enc_city            # feature_city_*
    ])

    # -------------------------
    # 5. Predict Cluster
    # -------------------------
    # cluster = model.predict(final_vector)[0]
    # cluster_df = encoded_df[encoded_df['cluster'] == cluster].copy()
    # cluster_vector = cluster_df.drop(columns=['cluster']).values
    # sim_scores = cosine_similarity(final_vector, cluster_vector)[0]
    # cluster_df["similarity_score"] = sim_scores

    cluster_df = encoded_df.copy()
    cluster_vector = encoded_df.drop(columns=['cluster']).values
    # print(encoded_df.drop(columns=['cluster']).columns)
    sim_scores = cosine_similarity(final_vector, cluster_vector)[0]
    cluster_df["similarity_score"] = sim_scores
    

    recommendations_encoded = cluster_df.sort_values(by='similarity_score', ascending=False )
    recommendations=processed_df[processed_df['id'].isin(recommendations_encoded.index)].copy()
    recommendations["similarity_score"] = recommendations["id"].map(recommendations_encoded["similarity_score"])
    recommendations = recommendations.sort_values(
        by=['similarity_score', "city_main"],
        ascending=[False, True]
    )
    
    # print(recommendations.head(10))
    return recommendations
    # return df



In [89]:
results = get_recommendations("Chennai", "adyar", "Biryani", "100+ ratings")

In [91]:
# results[results["city_main"] == "Chennai"]
results.head(10)

Unnamed: 0,id,name,rating,rating_count,cost,cuisine,lic_no,link,address,menu,area,city_main,cuisine_1,cuisine_2,similarity_score
43451,303976,Dindigul Velu Biryani Kadai,4.3,100+ ratings,300.0,"Biryani,Indian",12419002004733,https://www.swiggy.com/restaurants/dindigul-ve...,"Dindigul Velu Biryani Kadai, NO.163, SMS Build...",Menu/303976.json,Adyar,Chennai,Biryani,Indian,0.999975
579,387274,THE SERIAL GRILLER,3.5,100+ ratings,300.0,"Biryani,Chinese",22521050000391,https://www.swiggy.com/restaurants/the-serial-...,"THE SERIAL GRILLER, 3, Officers Quarter Ln, Kr...",Menu/387274.json,Agartala,Other,Biryani,Chinese,0.999972
43303,487355,Big Bowl Company,3.5,100+ ratings,250.0,"Chinese,North Indian",12422002000188,https://www.swiggy.com/restaurants/big-bowl-co...,"Big Bowl Company, Ground Floor, Door No- 24/11...",Menu/487355.json,Adyar,Chennai,Chinese,North Indian,0.999966
111075,135332,Anna Snacks,4.4,100+ ratings,400.0,South Indian,21521073000272,https://www.swiggy.com/restaurants/anna-snacks...,"Anna Snacks, Anna Snacks, 1, Rainbow Apartment...",Menu/135332.json,Airoli,Mumbai,South Indian,,0.999966
111121,49651,Domino's Pizza,4.4,100+ ratings,400.0,Pizzas,11513014003415,https://www.swiggy.com/restaurants/dominos-piz...,"Domino's Pizza, Surme Plaza, Mumbra, Thane Sho...",Menu/49651.json,Airoli,Mumbai,Pizzas,,0.999966
111063,207933,Chetak Sweet Point,4.3,100+ ratings,400.0,Indian,11519015000256,https://www.swiggy.com/restaurants/chetak-swee...,"Chetak Sweet Point, Vraj vihar CHS Ltd Shop no...",Menu/207933.json,Airoli,Mumbai,Indian,,0.999965
111308,328874,Charcoal Eats - Biryani & Beyond,4.4,100+ ratings,499.0,"Biryani,North Indian",11520015000289,https://www.swiggy.com/restaurants/charcoal-ea...,"Charcoal Eats - Biryani & Beyond, SHOP NO 2, D...",Menu/328874.json,Airoli,Mumbai,Biryani,North Indian,0.999965
111169,303833,Priyanka pure veg restaurant,4.2,100+ ratings,400.0,Indian,11518015000347,https://www.swiggy.com/restaurants/priyanka-pu...,"Priyanka pure veg restaurant, Priyanka Pure Ve...",Menu/303833.json,Airoli,Mumbai,Indian,,0.999965
111127,413980,KFC,4.1,100+ ratings,400.0,"American,Snacks",11521015000210,https://www.swiggy.com/restaurants/kfc-madhuri...,"KFC, Shop Nos. A10, A11, A13, A14 and A15, Gro...",Menu/413980.json,Airoli,Mumbai,American,Snacks,0.999965
111206,29445,Sai Nidhi,4.1,100+ ratings,400.0,"North Indian,South Indian",11518015000055,https://www.swiggy.com/restaurants/sai-nidhi-a...,"Sai Nidhi, 105 Panchsheel Vishal Complex 1 S...",Menu/29445.json,Airoli,Mumbai,North Indian,South Indian,0.999965


In [None]:
# -----------------------
# TIERED PRIORITY SYSTEM
# -----------------------

def compute_priority(row):
    city = "chennai" 
    area = "adyar" 
    c1 = "Biryani"
    
    score = 0

    # Perfect match: area + cuisine + city
    if (row["area"] == area and 
        (row["cuisine_1"] == c1 or row["cuisine_2"] == c1) and 
        row["city_main"] == city):
        score += 4

    # City + Cuisine match
    elif ((row["cuisine_1"] == c1 or row["cuisine_2"] == c1) and 
           row["city_main"] == city):
        score += 3

    # Area + City match
    elif (row["area"] == area and row["city_main"] == city):
        score += 2

    # City-only match
    elif row["city_main"] == city:
        score += 1

    return score

results["priority"] = results.apply(compute_priority, axis=1)

# FINAL SORTING:
# results = results.sort_values(
#     by=["priority", "similarity_score"],
#     ascending=[False, False]
# )
results = results.sort_values(
    by=["priority"],
    ascending=[True]
)



579       0
111075    0
111121    0
111063    0
111308    0
         ..
24864     0
24867     0
24886     0
126375    0
88256     0
Name: priority, Length: 40561, dtype: int64


In [53]:
results.head(10)
    

Unnamed: 0,id,name,rating,rating_count,cost,cuisine,lic_no,link,address,menu,area,city_main,cuisine_1,cuisine_2,similarity_score,priority
579,387274,THE SERIAL GRILLER,3.5,100+ ratings,300.0,"Biryani,Chinese",22521050000391,https://www.swiggy.com/restaurants/the-serial-...,"THE SERIAL GRILLER, 3, Officers Quarter Ln, Kr...",Menu/387274.json,Agartala,Other,Biryani,Chinese,0.99998,0
111075,135332,Anna Snacks,4.4,100+ ratings,400.0,South Indian,21521073000272,https://www.swiggy.com/restaurants/anna-snacks...,"Anna Snacks, Anna Snacks, 1, Rainbow Apartment...",Menu/135332.json,Airoli,Mumbai,South Indian,,0.999974,0
111121,49651,Domino's Pizza,4.4,100+ ratings,400.0,Pizzas,11513014003415,https://www.swiggy.com/restaurants/dominos-piz...,"Domino's Pizza, Surme Plaza, Mumbra, Thane Sho...",Menu/49651.json,Airoli,Mumbai,Pizzas,,0.999974,0
111063,207933,Chetak Sweet Point,4.3,100+ ratings,400.0,Indian,11519015000256,https://www.swiggy.com/restaurants/chetak-swee...,"Chetak Sweet Point, Vraj vihar CHS Ltd Shop no...",Menu/207933.json,Airoli,Mumbai,Indian,,0.999973,0
111308,328874,Charcoal Eats - Biryani & Beyond,4.4,100+ ratings,499.0,"Biryani,North Indian",11520015000289,https://www.swiggy.com/restaurants/charcoal-ea...,"Charcoal Eats - Biryani & Beyond, SHOP NO 2, D...",Menu/328874.json,Airoli,Mumbai,Biryani,North Indian,0.999973,0
111169,303833,Priyanka pure veg restaurant,4.2,100+ ratings,400.0,Indian,11518015000347,https://www.swiggy.com/restaurants/priyanka-pu...,"Priyanka pure veg restaurant, Priyanka Pure Ve...",Menu/303833.json,Airoli,Mumbai,Indian,,0.999973,0
111127,413980,KFC,4.1,100+ ratings,400.0,"American,Snacks",11521015000210,https://www.swiggy.com/restaurants/kfc-madhuri...,"KFC, Shop Nos. A10, A11, A13, A14 and A15, Gro...",Menu/413980.json,Airoli,Mumbai,American,Snacks,0.999973,0
111206,29445,Sai Nidhi,4.1,100+ ratings,400.0,"North Indian,South Indian",11518015000055,https://www.swiggy.com/restaurants/sai-nidhi-a...,"Sai Nidhi, 105 Panchsheel Vishal Complex 1 S...",Menu/29445.json,Airoli,Mumbai,North Indian,South Indian,0.999973,0
111183,212537,Oven Story Pizza,4.0,100+ ratings,400.0,Pizzas,11519015000412,https://www.swiggy.com/restaurants/oven-story-...,"Oven Story Pizza, Shop No 15 and 16, Bhoskar B...",Menu/212537.json,Airoli,Mumbai,Pizzas,,0.999972,0
111341,361270,Wendy's,4.0,100+ ratings,400.0,"American,Burgers",11519015000412,https://www.swiggy.com/restaurants/wendys-sect...,"Wendy's, Shop No 15 and 16, Bhoskar Bhavan, Be...",Menu/361270.json,Airoli,Mumbai,American,Burgers,0.999972,0
