In [75]:
# Data manipulation
import numpy as np
import pandas as pd
pd.options.display.max_rows = 100

# Modeling
from matrix_factorization import BaselineModel, KernelMF, train_update_test_split
from seenTable import *
from save_load_data import *
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Other
import os
import random
import sys

# Reload imported code 
%load_ext autoreload
%autoreload 2

# Print all output
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
    
rand_seed = 2
np.random.seed(rand_seed)
random.seed(rand_seed)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load data

In [29]:
# read data 
review_data = pd.read_csv('User2reviwe.csv')

# Drop first column
review_data.drop(columns=review_data.columns[0], 
        axis=1, 
        inplace=True)

X = review_data[['user_id', 'item_id']]
y = review_data['rating']

# Prepare data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Prepare data for online learning
X_train_initial, y_train_initial, X_train_update, y_train_update, X_test_update, y_test_update = train_update_test_split(review_data, frac_new_users=0.2)

review_data.head(10)

Unnamed: 0,user_id,item_id,rating
0,u1,i1,0.3
1,u1,i2,0.9
2,u1,i3,1.0
3,u1,i4,0.0
4,u1,i5,0.0
5,u1,i6,0.0
6,u1,i7,0.85
7,u1,i8,0.0
8,u1,i9,0.9
9,u1,i10,0.0


# Matrix Factorization

In [30]:
%%time 
matrix_fact = KernelMF(n_epochs = 20, n_factors = 100, verbose = 1, lr = 0.001, reg = 0.005)
matrix_fact.fit(X_train, y_train)

pred = matrix_fact.predict(X_test)
rmse = mean_squared_error(y_test, pred, squared = False)

print(f'\nTest RMSE: {rmse:.4f}')

Epoch  1 / 20  -  train_rmse: 0.33964107191601084
Epoch  2 / 20  -  train_rmse: 0.3382751100742315
Epoch  3 / 20  -  train_rmse: 0.33700842383982177
Epoch  4 / 20  -  train_rmse: 0.33583110390134174
Epoch  5 / 20  -  train_rmse: 0.33472745934466175
Epoch  6 / 20  -  train_rmse: 0.3336866114545438
Epoch  7 / 20  -  train_rmse: 0.33270117724050047
Epoch  8 / 20  -  train_rmse: 0.3317622785579609
Epoch  9 / 20  -  train_rmse: 0.33086440771361925
Epoch  10 / 20  -  train_rmse: 0.3300010652675208
Epoch  11 / 20  -  train_rmse: 0.32917103620864907
Epoch  12 / 20  -  train_rmse: 0.3283670532903583
Epoch  13 / 20  -  train_rmse: 0.3275872824523815
Epoch  14 / 20  -  train_rmse: 0.32682779823017477
Epoch  15 / 20  -  train_rmse: 0.3260868311903864
Epoch  16 / 20  -  train_rmse: 0.32536209194313853
Epoch  17 / 20  -  train_rmse: 0.3246526876865066
Epoch  18 / 20  -  train_rmse: 0.32395516283292386
Epoch  19 / 20  -  train_rmse: 0.3232694561433092
Epoch  20 / 20  -  train_rmse: 0.322594106591195


## Getting list of recommendations for a user


In [35]:
user = 'u2'
items_known = X_train.query('user_id == @user')['item_id']
res=matrix_fact.recommend(user=user, items_known=items_known)
res

Unnamed: 0,user_id,item_id,rating_pred
5,u2,i25,0.503786
24,u2,i4,0.479098
14,u2,i10,0.456883
15,u2,i73,0.44664
8,u2,i28,0.412866
18,u2,i83,0.407404
7,u2,i89,0.365732
17,u2,i82,0.336883
22,u2,i81,0.325934
4,u2,i46,0.283529


In [68]:
#load seenTable 
seenTableDataFrame=loadDatFarame('../seenTable.pkl')

In [70]:
# add recommendations to seenTable
addToSeenTable(df=seenTableDataFrame,userId='u1',itemIds=res['item_id'])
seenTableDataFrame=loadDatFarame('../seenTable.pkl')
seenTableDataFrame

Unnamed: 0,user_id,item_id,date
0,u1,i25,706075200.0
1,u1,i4,706075200.0
2,u1,i10,706075200.0
3,u1,i73,706075200.0
4,u1,i28,706075200.0
5,u1,i83,706075200.0
6,u1,i89,706075200.0
7,u1,i82,706075200.0
8,u1,i81,706075200.0
9,u1,i46,706075200.0


In [89]:
#revovme item from seenTable after certine time like for example 5 days
#amount here is in seconds
removeExpiredDateFromSeenTable(tablePath='../seenTable.pkl',amount=150)
seenTableDataFrame=loadDatFarame('../seenTable.pkl')
seenTableDataFrame

Unnamed: 0,user_id,item_id,date
10,u1,i25,706075200.0
11,u1,i4,706075200.0
12,u1,i10,706075200.0
13,u1,i73,706075200.0
14,u1,i28,706075200.0
15,u1,i83,706075200.0
16,u1,i89,706075200.0
17,u1,i82,706075200.0
18,u1,i81,706075200.0
19,u1,i46,706075200.0
