In [26]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from implicit.als import AlternatingLeastSquares

# load the user-item matrix from a CSV file
df= pd.read_csv('UserBehavior.csv', dtype={'user_id':int, 'item_id':int, 'category_id':int, 'behavior':str, 'timestamp':np.int64})
# convert the DataFrame to a sparse CSR matrix
# Convert behavior types to scores
behavior_scores = {'pv': 1, 'fav': 2, 'cart': 3, 'buy': 9}
df['score'] = df['behavior'].map(behavior_scores)

# Sum up all scores of a user
user_scores = df.groupby(['user_id', 'item_id'])['score'].sum()

# Create a user-item matrix
user_item_matrix = user_scores.unstack()
user_item_matrix.head()

item_id,324,330,422,812,1110,1197,1211,1260,1369,1503,...,5161681,5161688,5161884,5161924,5162153,5162320,5162564,5162737,5162944,5162993
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
100,,,,,,,,,,,...,,,,,,,,,,
1000,,,,,,,,,,,...,,,,,,,,,,
10001,,,,,,,,,,,...,,,,,,,,,,
10008,,,,,,,,,,,...,,,,,,,,,,


In [32]:
from sklearn.model_selection import train_test_split
from scipy import sparse
train, test = train_test_split(user_item_matrix, test_size=0.2,shuffle=False)
train = sparse.csr_matrix(train.fillna(0).values)
test = sparse.csr_matrix(test.fillna(0).values)

# Initialize and fit the ALS model on the sparse matrix
model = AlternatingLeastSquares(factors=50, regularization=0.01, iterations=50)
model.fit(train)

  0%|          | 0/50 [00:00<?, ?it/s]

In [34]:
user_embeddings = model.user_factors
item_embeddings = model.item_factors
predicted_scores = user_embeddings.dot(item_embeddings.T)
predicted_scores

array([[-4.2510126e-04,  0.0000000e+00, -9.4193419e-05, ...,
         0.0000000e+00,  9.4622716e-05,  2.7303335e-03],
       [-2.8602313e-02,  0.0000000e+00,  1.1777886e-02, ...,
         0.0000000e+00,  8.4421048e-03,  4.0478542e-02],
       [ 1.7503260e-03,  0.0000000e+00,  1.7577731e-05, ...,
         0.0000000e+00,  2.8822874e-04, -2.1649167e-04],
       ...,
       [-6.3631684e-04,  0.0000000e+00,  6.3084797e-03, ...,
         0.0000000e+00,  5.3915467e-02, -4.0981784e-03],
       [-7.8634225e-04,  0.0000000e+00,  1.3275833e-03, ...,
         0.0000000e+00, -9.9034617e-03,  2.1448579e-02],
       [-1.3145996e-02,  0.0000000e+00,  3.7989677e-03, ...,
         0.0000000e+00, -3.3845268e-02, -9.8722207e-04]], dtype=float32)

In [46]:
print(test[1])

  (0, 113)	1.0
  (0, 380)	1.0
  (0, 593)	1.0
  (0, 702)	1.0
  (0, 825)	2.0
  (0, 846)	1.0
  (0, 914)	1.0
  (0, 976)	2.0
  (0, 1120)	3.0
  (0, 1248)	1.0
  (0, 1286)	1.0
  (0, 1720)	2.0
  (0, 1732)	1.0
  (0, 2005)	2.0
  (0, 2185)	1.0
  (0, 2324)	1.0
  (0, 2360)	1.0
  (0, 2465)	1.0
  (0, 2660)	1.0
  (0, 2708)	1.0
  (0, 2715)	1.0
  (0, 2834)	1.0
  (0, 2862)	1.0
  (0, 3184)	1.0
  (0, 3367)	1.0
  :	:
  (0, 30513)	2.0
  (0, 30678)	2.0
  (0, 30826)	1.0
  (0, 30919)	1.0
  (0, 30951)	1.0
  (0, 31174)	1.0
  (0, 31303)	1.0
  (0, 31476)	2.0
  (0, 31679)	4.0
  (0, 31720)	2.0
  (0, 31906)	1.0
  (0, 32436)	1.0
  (0, 32657)	1.0
  (0, 32814)	2.0
  (0, 32881)	2.0
  (0, 33121)	1.0
  (0, 33143)	1.0
  (0, 33238)	1.0
  (0, 33289)	1.0
  (0, 33308)	1.0
  (0, 33603)	1.0
  (0, 33787)	1.0
  (0, 33818)	2.0
  (0, 33972)	1.0
  (0, 34001)	1.0


In [43]:
userid = 1
ids, scores = model.recommend(userid, test[userid], N=10, filter_already_liked_items=False)
#return 10 items with highest scores
print(ids, scores)

[19719  1674  6901 32156 10706 27141  3926 10690 21572  3398] [0.72527075 0.7102726  0.70699286 0.6935417  0.6935417  0.69354165
 0.6747566  0.62928516 0.60140985 0.545172  ]


In [50]:
recommended_items = set(ids)
test_set = set(test[userid].nonzero()[1])
true_positives = recommended_items.intersection(test_set)
false_positives = recommended_items - true_positives
print(true_positives)
print(false_positives)

set()
{10690, 21572, 27141, 3398, 19719, 1674, 10706, 6901, 3926, 32156}
