<h1>Content based filtering</h1>

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# User data
users = {
    'user_id': [1, 2, 3, 4],
    'age': [25, 30, 22, 29],
    'gender': ['Male', 'Female', 'Male', 'Other'],
    'dietary_preference': ['Vegetarian', 'Vegan', 'Non-Vegetarian', 'Vegetarian'],
    'allergies': ['Nuts', 'Gluten', 'None', 'Dairy'],
    'preferred_cuisine': ['Indian', 'Chinese', 'Continental', 'Indian'],
    'health_goals': ['Low-carb', 'High-protein', 'Balanced', 'Low-fat']
}

# Dish data
dishes = {
    'dish_id': [1, 2, 3, 4],
    'name': ["Paneer Tikka Wrap", "Chicken Burger", "Veggie Pizza", "Tofu Stir Fry"],
    'cuisine': ["Indian", "American", "Italian", "Chinese"],
    'category': ["Main Course", "Main Course", "Main Course", "Main Course"],
    'ingredients': ["Paneer, spices", "Chicken, lettuce", "Vegetables, dairy", "Tofu, vegetables"],
    'dietary_tags': ["Vegetarian", "Non-Veg", "Vegetarian", "Vegan"],
    'calories': [250, 350, 300, 200],
    'spiciness': [3, 1, 2, 4],
    'price': [120, 150, 200, 180]
}
order_df = pd.DataFrame({
    'order_id': [1, 2, 3, 4],
    'user_id': [1, 2, 3, 4],
    'dish_id': [1, 3, 4, 2],
    'order_date': ['2023-11-01', '2023-11-02', '2023-11-03', '2023-11-04'],
    'rating': [5, 4, 5, 3],
    'feedback': ['Delicious and filling', 'Tasty but slightly spicy', 'Loved the flavors', 'Could be better'],
    'quantity': [1, 2, 1, 1],
    'total_price': [120, 400, 180, 150]
})

# Convert dictionaries to DataFrames
user_df = pd.DataFrame(users)
dish_df = pd.DataFrame(dishes)

In [3]:
user_id=4
user_location = user_df[user_df['user_id']==user_id].iloc[0]

In [4]:
user_location

user_id                        4
age                           29
gender                     Other
dietary_preference    Vegetarian
allergies                  Dairy
preferred_cuisine         Indian
health_goals             Low-fat
Name: 3, dtype: object

In [5]:
user_preferences = f"{user_location['dietary_preference']} {user_location['allergies']} {user_location['preferred_cuisine']} {user_location['health_goals']}"

In [6]:
user_preferences

'Vegetarian Dairy Indian Low-fat'

In [7]:
dish_df['combined_dish']=dish_df.apply(lambda x: f"{x['cuisine']} {x['dietary_tags']} {x['calories']}",axis=1)

In [8]:
dish_df.head()

Unnamed: 0,dish_id,name,cuisine,category,ingredients,dietary_tags,calories,spiciness,price,combined_dish
0,1,Paneer Tikka Wrap,Indian,Main Course,"Paneer, spices",Vegetarian,250,3,120,Indian Vegetarian 250
1,2,Chicken Burger,American,Main Course,"Chicken, lettuce",Non-Veg,350,1,150,American Non-Veg 350
2,3,Veggie Pizza,Italian,Main Course,"Vegetables, dairy",Vegetarian,300,2,200,Italian Vegetarian 300
3,4,Tofu Stir Fry,Chinese,Main Course,"Tofu, vegetables",Vegan,200,4,180,Chinese Vegan 200


In [9]:
tfidf_vectorizer = TfidfVectorizer()

In [10]:
tfidf_matrix = tfidf_vectorizer.fit_transform(dish_df['combined_dish'].tolist()+[user_preferences])

In [11]:
tfidf_matrix.toarray()

array([[0.        , 0.69015927, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.55681615, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.4622077 ],
       [0.        , 0.        , 0.        , 0.5       , 0.5       ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.5       , 0.5       , 0.        , 0.        ],
       [0.        , 0.        , 0.63907044, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.63907044,
        0.        , 0.        , 0.        , 0.        , 0.42799292],
       [0.57735027, 0.        , 0.        , 0.        , 0.        ,
        0.57735027, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.57735027, 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.49389914, 0.49389914, 0.39847472, 0.        ,
        0.49389914, 0.        , 0.        , 

In [12]:
cosine_similarities = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten()

In [13]:
dish_df['similarity'] = cosine_similarities

In [14]:
dish_df.head()

Unnamed: 0,dish_id,name,cuisine,category,ingredients,dietary_tags,calories,spiciness,price,combined_dish,similarity
0,1,Paneer Tikka Wrap,Indian,Main Course,"Paneer, spices",Vegetarian,250,3,120,Indian Vegetarian 250,0.374762
1,2,Chicken Burger,American,Main Course,"Chicken, lettuce",Non-Veg,350,1,150,American Non-Veg 350,0.0
2,3,Veggie Pizza,Italian,Main Course,"Vegetables, dairy",Vegetarian,300,2,200,Italian Vegetarian 300,0.141567
3,4,Tofu Stir Fry,Chinese,Main Course,"Tofu, vegetables",Vegan,200,4,180,Chinese Vegan 200,0.0


In [15]:
filtered_dish = dish_df[~dish_df['ingredients'].str.contains(user_location['allergies'],case=False)]

In [16]:
filtered_dish

Unnamed: 0,dish_id,name,cuisine,category,ingredients,dietary_tags,calories,spiciness,price,combined_dish,similarity
0,1,Paneer Tikka Wrap,Indian,Main Course,"Paneer, spices",Vegetarian,250,3,120,Indian Vegetarian 250,0.374762
1,2,Chicken Burger,American,Main Course,"Chicken, lettuce",Non-Veg,350,1,150,American Non-Veg 350,0.0
3,4,Tofu Stir Fry,Chinese,Main Course,"Tofu, vegetables",Vegan,200,4,180,Chinese Vegan 200,0.0


In [17]:
top_dishes = filtered_dish.sort_values(by='similarity', ascending=False)

# Display recommended dishes
print(top_dishes[['name', 'cuisine', 'dietary_tags', 'calories', 'similarity']])

                name   cuisine dietary_tags  calories  similarity
0  Paneer Tikka Wrap    Indian   Vegetarian       250    0.374762
1     Chicken Burger  American      Non-Veg       350    0.000000
3      Tofu Stir Fry   Chinese        Vegan       200    0.000000


In [18]:
order_df.head()

Unnamed: 0,order_id,user_id,dish_id,order_date,rating,feedback,quantity,total_price
0,1,1,1,2023-11-01,5,Delicious and filling,1,120
1,2,2,3,2023-11-02,4,Tasty but slightly spicy,2,400
2,3,3,4,2023-11-03,5,Loved the flavors,1,180
3,4,4,2,2023-11-04,3,Could be better,1,150


In [19]:
highly_rated_dishes = order_df[order_df['rating']>3]

<h1>Collaborative filtering</h1>



In [21]:
user_item_matrix = order_df.pivot(index='user_id',columns='dish_id',values='rating').fillna(0)

In [22]:
user_item_matrix

dish_id,1,2,3,4
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,5.0,0.0,0.0,0.0
2,0.0,0.0,4.0,0.0
3,0.0,0.0,0.0,5.0
4,0.0,3.0,0.0,0.0


In [23]:
from sklearn.neighbors import NearestNeighbors

In [24]:
knn_model = NearestNeighbors(metric='cosine',n_neighbors=2)
knn_model.fit(user_item_matrix)

In [25]:
user_item_matrix

dish_id,1,2,3,4
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,5.0,0.0,0.0,0.0
2,0.0,0.0,4.0,0.0
3,0.0,0.0,0.0,5.0
4,0.0,3.0,0.0,0.0


In [26]:
user_item_matrix[user_id]

user_id
1    0.0
2    0.0
3    5.0
4    0.0
Name: 4, dtype: float64

In [27]:
user_item_matrix.iloc[1]

dish_id
1    0.0
2    0.0
3    4.0
4    0.0
Name: 2, dtype: float64

In [28]:
item_user_matrix = user_item_matrix.T
item_user_matrix

user_id,1,2,3,4
dish_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,5.0,0.0,0.0,0.0
2,0.0,0.0,0.0,3.0
3,0.0,4.0,0.0,0.0
4,0.0,0.0,5.0,0.0


In [29]:
recommended_dishes_list=[]
def recommended_dishes(user_id,user_item_matrix,knn_model,top=2):
    user_index=user_id-1
    print(user_index)
    distances,indices = knn_model.kneighbors(user_item_matrix.iloc[user_index].values.reshape(1,-1),n_neighbors=top+1)
    neighbors_indices = indices[0][1:]
    print(neighbors_indices)
    for neighbour in neighbors_indices:
        neighbour_location = user_item_matrix.iloc[neighbour] 
        #print(neighbour_location)
        print(type(neighbour_location))
        for dish_id,rating in neighbour_location.items():  
            print(f"{dish_id} {rating}")
            if rating>3 and user_item_matrix.iloc[user_index,dish_id-1]==0:
                print(dish_id)
                recommended_dishes_list.append(dish_id)
                print(f"Recommended dishes {recommended_dishes_list}")
            
        
       


In [30]:
recommended_dishes(1,user_item_matrix,knn_model)

0
[1 2]
<class 'pandas.core.series.Series'>
1 0.0
2 0.0
3 4.0
3
Recommended dishes [3]
4 0.0
<class 'pandas.core.series.Series'>
1 0.0
2 0.0
3 0.0
4 5.0
4
Recommended dishes [3, 4]


In [31]:
dish_df.head()

Unnamed: 0,dish_id,name,cuisine,category,ingredients,dietary_tags,calories,spiciness,price,combined_dish,similarity
0,1,Paneer Tikka Wrap,Indian,Main Course,"Paneer, spices",Vegetarian,250,3,120,Indian Vegetarian 250,0.374762
1,2,Chicken Burger,American,Main Course,"Chicken, lettuce",Non-Veg,350,1,150,American Non-Veg 350,0.0
2,3,Veggie Pizza,Italian,Main Course,"Vegetables, dairy",Vegetarian,300,2,200,Italian Vegetarian 300,0.141567
3,4,Tofu Stir Fry,Chinese,Main Course,"Tofu, vegetables",Vegan,200,4,180,Chinese Vegan 200,0.0


In [32]:
dish_name=[]
for i in recommended_dishes_list:
     name = dish_df.loc[dish_df['dish_id']==i,'name'].values[0]
     dish_name.append(name)    
dish_name

['Veggie Pizza', 'Tofu Stir Fry']

In [73]:
# Convert collaborative recommendations to DataFrame
collab_dishes = pd.DataFrame(recommended_dishes_list, columns=['dish_id'])
collab_dishes['collab_score'] = 1  # Simple score for collaborative filtering
collab_dishes





Unnamed: 0,dish_id,collab_score
0,3,1
1,4,1


In [79]:
top_dishes

Unnamed: 0,dish_id,name,cuisine,category,ingredients,dietary_tags,calories,spiciness,price,combined_dish,similarity
0,1,Paneer Tikka Wrap,Indian,Main Course,"Paneer, spices",Vegetarian,250,3,120,Indian Vegetarian 250,0.374762
1,2,Chicken Burger,American,Main Course,"Chicken, lettuce",Non-Veg,350,1,150,American Non-Veg 350,0.0
3,4,Tofu Stir Fry,Chinese,Main Course,"Tofu, vegetables",Vegan,200,4,180,Chinese Vegan 200,0.0


In [81]:
# Merge content and collaborative recommendations
hybrid_dishes = pd.merge(top_dishes, collab_dishes, on='dish_id', how='outer')
hybrid_dishes['collab_score'] = hybrid_dishes['collab_score'].fillna(0)
hybrid_dishes['total_score'] = hybrid_dishes['similarity'] + hybrid_dishes['collab_score']
hybrid_dishes_filtered=hybrid_dishes.dropna()
hybrid_dishes_filtered


Unnamed: 0,dish_id,name,cuisine,category,ingredients,dietary_tags,calories,spiciness,price,combined_dish,similarity,collab_score,total_score
0,1,Paneer Tikka Wrap,Indian,Main Course,"Paneer, spices",Vegetarian,250.0,3.0,120.0,Indian Vegetarian 250,0.374762,0.0,0.374762
1,2,Chicken Burger,American,Main Course,"Chicken, lettuce",Non-Veg,350.0,1.0,150.0,American Non-Veg 350,0.0,0.0,0.0
3,4,Tofu Stir Fry,Chinese,Main Course,"Tofu, vegetables",Vegan,200.0,4.0,180.0,Chinese Vegan 200,0.0,1.0,1.0


In [85]:
# Sort by total score for final recommendations
final_recommendations = hybrid_dishes_filtered.sort_values(by='total_score', ascending=False)
print(final_recommendations[['dish_id', 'name', 'similarity', 'collab_score', 'total_score']])

   dish_id               name  similarity  collab_score  total_score
3        4      Tofu Stir Fry    0.000000           1.0     1.000000
0        1  Paneer Tikka Wrap    0.374762           0.0     0.374762
1        2     Chicken Burger    0.000000           0.0     0.000000


In [34]:
# from sklearn.neighbors import NearestNeighbors

# def recommended_dishes_item_based(user_id, user_item_matrix, top=2):
#     # Transpose the matrix for item-based collaborative filtering
#     item_user_matrix = user_item_matrix.T
    
#     # Initialize KNN model
#     knn_model = NearestNeighbors(metric='cosine', n_neighbors=top + 1)
#     knn_model.fit(item_user_matrix)
    
#     # List to store recommended dishes
#     recommended_dishes_list = []
    
#     # Find items (dishes) that the user has rated highly (e.g., rating > 3)
#     liked_dishes = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] > 3].index.tolist()
    
#     for liked_dish in liked_dishes:
#         # Find similar items for each liked dish
#         dish_index = liked_dish - 1  # Convert dish_id to index
#         distances, indices = knn_model.kneighbors(item_user_matrix.iloc[dish_index].values.reshape(1, -1), n_neighbors=top + 1)
        
#         # Skip the first index because it is the liked dish itself
#         similar_dishes = indices[0][1:]
        
#         for similar_dish_index in similar_dishes:
#             # Get the dish ID from the index
#             similar_dish_id = item_user_matrix.index[similar_dish_index]
            
#             # Recommend this dish if the user hasn't rated it yet
#             if user_item_matrix.loc[user_id, similar_dish_id] == 0:
#                 recommended_dishes_list.append(similar_dish_id)
#                 print(f"Recommended dish: {similar_dish_id}")
    
#     print(f"Final recommended dishes for user {user_id}: {recommended_dishes_list}")
#     return recommended_dishes_list

# # Example usage:
# # Get recommendations for a specific user
# recommended_dishes_item_based(user_id=1, user_item_matrix=user_item_matrix, top=2)
