Through ALS, we are taking a matrix of user/item interactions and figuring out the latent features that relate them to each other. This matrix factorisation method reduces the dimensions/ features (while keeping relevant information) into a smaller matrix of user features and item features.

The matrix factorisation results is:
1. One smaller matrix with dimensions: num of users x latent feature --> contains latent user feature vectors for each user
2. And another matrix with dimensions: num of items x latent feature --> contains latent item feature vectors for each item

Multiplying these two feature matrices together approximates the original matrix, but now we have two matrices that are dense including a number of latent features for each of our items and users.

In [31]:
import pandas as pd
import numpy as np
import random
import scipy.sparse as sparse
from tqdm import tqdm_notebook as tqdm
import implicit
from sklearn.preprocessing import MinMaxScaler

In [32]:
df = pd.read_csv('../inputs/profile_features_clean.csv')

In [33]:
df.sample(5)

Unnamed: 0,account_balance,card_brand_users,has_coupon,birthday_year,birthday_month,age,user_id,address,zone_id,salesperson_id,...,delivery_order_id,item_id,item_type,quantity,unit_price,name,macros,ingredients,temperature,category
47865,0,Visa,0,,,,33994,51 Kee Sun Avenue,61.0,0,...,98538,472.0,main,1.0,1395.0,Sous‑vide Turkey on Chilled Capellini,"Calories: 510, Protein: 50g, Fat: 14g, Carb: ...","capellini, sous-vide turkey, compressed shoyu ...",chilled,regular
10356,0,Visa,0,,,,1548,One Raffles Quay taxi stand,74.0,0,...,9204,184.0,main,2.0,995.0,Spicy Quinoa and Oregano Sweet Potato Salad,"Calories: 408, Fat: 9g, Carb: 73g, Protein: 11g","sweet potato, quinoa, celery, sweet peas, rais...",chilled,main
51687,0,Visa,1,,,,22629,20 Pasir Panjang Road,61.0,0,...,95643,459.0,main,5.0,950.0,Roasted Pumpkin and Black Rice Salad,"Calories: 575, Protein: 27.7g, Fat: 8.5g, Carb...","pumpkin, black rice, brown rice, sweet corn, c...",chilled,regular
12996,0,Visa,0,,,,29110,407 Upper Changi Road North,61.0,0,...,76928,376.0,main,25.0,995.0,Grain Chicken Rice,"Calories: 517, Fat: 7g, Carb: 76g, Protein: 36g","chicken, brown rice, white rice, kailan, chick...",warm,regular
26509,0,Visa,0,,,,28460,77 Ayer Rajah Crescent,61.0,0,...,105970,377.0,main,1.0,895.0,Dragon Well Basil Rice,"Calories: 495, Fat: 15g, Carb: 75g, Protein: 16g","brown rice, black rice, egg, white cabbage, ce...",warm,regular


In [34]:
df.dtypes

account_balance               int64
card_brand_users             object
has_coupon                    int64
birthday_year               float64
birthday_month              float64
age                         float64
user_id                       int64
address                      object
zone_id                     float64
salesperson_id                int64
discount                    float64
due_dates_only               object
card_details                 object
card_brand_delivery_info     object
source                       object
delivery_fee                float64
meal_wave                    object
surcharge_amount            float64
promo_code_used               int64
gave_feedback                 int64
district                    float64
delivery_order_id             int64
item_id                     float64
item_type                    object
quantity                    float64
unit_price                  float64
name                         object
macros                      

In [35]:
#Drop rows with missing values for item_id
df = df[df.item_id.notnull()]

In [36]:
#Convert item_id into integer type
df.item_id = df.item_id.astype(int)

In [37]:
menu = pd.read_csv('../raw/meals.csv')

In [38]:
df.shape

(57494, 31)

In [39]:
data = df.groupby(['user_id','item_id']).quantity.sum().reset_index()

In [40]:
data.sample(5)

Unnamed: 0,user_id,item_id,quantity
3492,1828,229,1.0
8393,6067,377,1.0
29575,35007,186,1.0
13547,13433,470,1.0
9901,8351,443,1.0


In [41]:
sparse_mat = sparse.csr_matrix((data['quantity'], (data['user_id'], data['item_id'])))

In [42]:
#Number of possible interactions in the matrix
matrix_size = sparse_mat.shape[0]*sparse_mat.shape[1]

#Num of items with interactions
count_interactions = sparse_mat.size

#Compute matrix sparsity
sparsity = 100*(1 - (float(count_interactions)/float(matrix_size)))

print (sparsity)

99.89438704841044


In [43]:
# Set parameters
confidence_coef = 30
factors = 60
regularization = 0.1
iterations = 100

# Initialize model
model = implicit.als.AlternatingLeastSquares(factors=factors, regularization=regularization, iterations=iterations)

# Fit model
model.fit((sparse_mat.T*confidence_coef).astype('double'))

# Get user and item vectors from our trained model
user_vecs = model.user_factors
item_vecs = model.item_factors

100%|██████████| 100.0/100 [00:05<00:00, 18.91it/s]


In [44]:
def recommend(user_id, sparse_mat, user_vecs, item_vecs, num_items=15):
    user_interactions = sparse_mat[user_id,:].toarray()
    user_interactions = user_interactions.reshape(-1) + 1
    #Make items already interacted zero
    user_interactions[user_interactions > 1] = 0
    #Get dot product of user vector and all item vectors
    rec_vector = user_vecs[user_id,:].dot(item_vecs.T)
    
    #Scale dot product result between 0 and 1
    min_max = MinMaxScaler()
    rec_vector_scaled = min_max.fit_transform(rec_vector.reshape(-1,1))[:,0]
    #Get recommendation vector
    recommend_vector = user_interactions * rec_vector_scaled 
    #Sort into order of best recommendations
    item_idx = np.argsort(recommend_vector)[::-1][:num_items]
    
    #Start empty list to store titles and scores
    items = []
    scores = []
    #Append recommended item name and scores tolist
    for idx in item_idx:
        items.append(menu.name.loc[menu.id == idx].iloc[0])
        scores.append(recommend_vector[idx])
        
    recommendations = pd.DataFrame({'name': items, 'score': scores})

    return recommendations

In [45]:
menu.id = menu.id.astype(int)
menu = menu[menu['id'].isin(data.item_id)]

In [46]:
item_name = pd.DataFrame(menu[['id','name']])
data_with_name = data.merge(item_name,how='left',left_on='item_id',right_on='id')

In [47]:
for i in data['user_id'].sample(5):
    recommendations = recommend(i, sparse_mat, user_vecs, item_vecs)

    print( '\nTRANSACTION HISTORY FOR USER : ' + str(i) + '\n')
    print( data_with_name[data_with_name['user_id']==i][['name','item_id','quantity',]])
    print( '\nRECOMMEND FOLLOWING ITEMS \n')
    print( recommendations)
    


TRANSACTION HISTORY FOR USER : 26046

                             name  item_id  quantity
21678  Grilled Farm Fresh Chicken      336       1.0
21679          Thai Chicken Salad      368       1.0
21680      Dragon Well Basil Rice      377       1.0

RECOMMEND FOLLOWING ITEMS 

                                                name     score
0                                     Wholemeal Pita  0.603194
1                        Cantonese Black Pepper Beef  0.507568
2                                    Thai Basil Beef  0.500173
3                                   Cinnamon Pumpkin  0.493481
4                           Dark Chocolate Lava Cake  0.431613
5                                       Chirashi-don  0.417793
6                  Slow Braised Beef and Duck Confit  0.415420
7                         Chilli Chimichurri Chicken  0.410333
8              Szechuan Pepper Sous‑Vide Duck Breast  0.406063
9                     Ginger Puree and Shiitake Dory  0.386212
10         Braised Oyster S