In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity

In [20]:
#Using Instacart Market Basket Analysis dataset from Kaggle

aisles = pd.read_csv('aisles.csv')
departments = pd.read_csv('departments.csv')
order_products_prior = pd.read_csv('order_products__prior.csv')
order_products_train = pd.read_csv('order_products__train.csv')
orders = pd.read_csv('orders.csv')
products = pd.read_csv('products.csv')

In [3]:
order_products = pd.concat([order_products_prior, order_products_train])
order_products = pd.merge(order_products, products, on='product_id')
order_products = pd.merge(order_products, orders, on='order_id')

In [4]:
order_products

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order
0,2,33120,1,1,Organic Egg Whites,86,16,202279,prior,3,5,9,8.0
1,2,28985,2,1,Michigan Organic Kale,83,4,202279,prior,3,5,9,8.0
2,2,9327,3,0,Garlic Powder,104,13,202279,prior,3,5,9,8.0
3,2,45918,4,1,Coconut Butter,19,13,202279,prior,3,5,9,8.0
4,2,30035,5,0,Natural Sweetener,17,13,202279,prior,3,5,9,8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
33819101,3421063,14233,3,1,Natural Artesian Water,115,7,169679,train,30,0,10,4.0
33819102,3421063,35548,4,1,Twice Baked Potatoes,13,20,169679,train,30,0,10,4.0
33819103,3421070,35951,1,1,Organic Unsweetened Almond Milk,91,16,139822,train,15,6,10,8.0
33819104,3421070,16953,2,1,Creamy Peanut Butter,88,13,139822,train,15,6,10,8.0


## Collaborative Filtering

In [9]:
unique_users = order_products['user_id'].astype('category').cat.codes.values
unique_products = order_products['product_id'].astype('category').cat.codes.values

In [10]:
user_item_sparse = coo_matrix(
    (order_products['reordered'].values, (unique_users, unique_products))
).tocsr()

In [11]:
user_knn = NearestNeighbors(metric='cosine', algorithm='brute')
user_knn.fit(user_item_sparse)

In [12]:
user_codes = order_products[['user_id']].drop_duplicates().reset_index(drop=True)
user_idx_map = {u: i for i, u in enumerate(user_codes['user_id'])}

In [13]:
def recommend_knn(user_id, num_recommendations=5):
    if user_id not in user_idx_map:
        return "User not found!"
    user_idx = user_idx_map[user_id]
    distances, indices = user_knn.kneighbors(user_item_sparse[user_idx], n_neighbors=6)
    similar_users = indices.flatten()[1:]
    recommended_products = order_products[order_products['user_id'].isin(user_codes.iloc[similar_users]['user_id'])]
    
    user_purchased = set(order_products[order_products['user_id'] == user_id]['product_id'])
    top_products = [p for p in recommended_products['product_id'].value_counts().index if p not in user_purchased][:num_recommendations]
    return products[products['product_id'].isin(top_products)][['product_id', 'product_name']]

recommend_knn(202279)

Unnamed: 0,product_id,product_name
906,907,Premium Sliced Bacon
26347,26348,Mixed Fruit Fruit Snacks
35382,35383,Classic White Bread
35560,35561,Organic Tortilla Chips
38767,38768,Sweet Kale Salad Mix


## Content-based Filtering

In [15]:
product_features = pd.get_dummies(products[['aisle_id', 'department_id']])

In [16]:
cosine_sim = cosine_similarity(product_features)
product_idx = {product: i for i, product in enumerate(products['product_id'])}

In [28]:
def recommend_content_based(product_id, num_recommendations=5):
    if product_id not in product_idx:
        return "Product not found!"
    
    idx = product_idx[product_id]
    similar_indices = np.argsort(cosine_sim[idx])[::-1][1:num_recommendations+1]
    
    recommended_products = products.iloc[similar_indices][['product_id', 'product_name']]
    return recommended_products

recommend_content_based(26348)

Unnamed: 0,product_id,product_name
38675,38676,"Brussels Bytes, Chili Pumpkin Seed Crunch"
114,115,Scooby-Doo! Fruit Flavored Snacks
38577,38578,Organic Fuji Red Crunchy Apple Chips
38354,38355,Mighty Sticks Fruit and Veggie Snacks
38312,38313,Organic Apple Banana GoGo Squeez


In [18]:
def get_real_time_recommendations(user_id=None, product_id=None, method="hybrid", num_recommendations=5):
    if method == "collaborative":
        return recommend_knn(user_id, num_recommendations)
    elif method == "content":
        return recommend_content_based(product_id, num_recommendations)
    elif method == "hybrid":
        collab_recommendations = recommend_knn(user_id, num_recommendations)
        content_recommendations = recommend_content_based(product_id, num_recommendations)
        return pd.concat([collab_recommendations, content_recommendations]).drop_duplicates().reset_index(drop=True)
    else:
        return "Invalid method!"


In [26]:
print(get_real_time_recommendations(user_id=202279, method="collaborative").to_string(index=False))

 product_id             product_name
        907     Premium Sliced Bacon
      26348 Mixed Fruit Fruit Snacks
      35383      Classic White Bread
      35561   Organic Tortilla Chips
      38768     Sweet Kale Salad Mix


In [29]:
print(get_real_time_recommendations(product_id=100, method="content").to_string(index=False))

 product_id                                                      product_name
      49667                          Enchilada with Spanish Rice & Beans Meal
          4 Smart Ones Classic Favorites Mini Rigatoni With Vodka Cream Sauce
         30                        Three Cheese Ziti, Marinara with Meatballs
      49681                           Spaghetti with Meatballs and Sauce Meal
         81                                    Blakes Chicken Parmesan Dinner


In [31]:
print(get_real_time_recommendations(user_id=139822, product_id=45918, method="hybrid").to_string(index=False))

 product_id                                  product_name
       1194                         Natural Artisan Water
      16220                                    Cream Soda
      24852                                        Banana
      30744 Classic Tender Liver & Chicken Feast Cat Food
      36923                   Trop50 No Pulp Orange Juice
      28721                       Classico Pure Olive Oil
      28919          Seasoned Roasted Garlic Rice Vinegar
      28933          Extra Virgin Olive Oil Cooking Spray
      42516                        Organic Grass Fed Ghee
      24623              Chef Size Extra Virgin Olive Oil
