In [2]:
# Data manipulation
import numpy as np
import pandas as pd
from pickle import dump, load
pd.options.display.max_rows = 100

# Modeling
from matrix_factorization import BaselineModel, KernelMF, train_update_test_split
from seenTable import *
from save_load_data import *
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Other
import os
import random
import sys

# Reload imported code 
%load_ext autoreload
%autoreload 2

# Print all output
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
    
rand_seed = 2
np.random.seed(rand_seed)
random.seed(rand_seed)

# Load data

In [3]:
# read data 
review_data = load(open('user2review.pkl', 'rb'))

""" # Drop first column
review_data.drop(columns=review_data.columns[0], 
        axis=1, 
        inplace=True) """

X = review_data[['user_id', 'item_id']]
y = review_data['rating']

# Prepare data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Prepare data for online learning
X_train_initial, y_train_initial, X_train_update, y_train_update, X_test_update, y_test_update = train_update_test_split(review_data, frac_new_users=0.2)

review_data.head(100)

' # Drop first column\nreview_data.drop(columns=review_data.columns[0], \n        axis=1, \n        inplace=True) '

Unnamed: 0,user_id,item_id,rating
0,626b28707fe7587a42e3dfeb,1627406a68cc1cefd58624016,0.6
1,626b28707fe7587a42e3dff0,1627406a18cc1cefd58623f9e,0.6
2,626b28717fe7587a42e3dff5,1627406a18cc1cefd58623fa8,0.6
3,626b28717fe7587a42e3dffa,1627406a28cc1cefd58623fb2,0.6
4,626b28717fe7587a42e3dfff,1627406a28cc1cefd58623fbc,0.6
5,626b28727fe7587a42e3e004,1627406a38cc1cefd58623fc6,0.6
6,626b28727fe7587a42e3e009,1627406a38cc1cefd58623fd0,0.6
7,626b28727fe7587a42e3e00e,1627406a48cc1cefd58623fda,0.6
8,626b28727fe7587a42e3e013,1627406a48cc1cefd58623fe4,0.6
9,626b28737fe7587a42e3e018,1627406a48cc1cefd58623fee,0.6


# Matrix Factorization

In [4]:
%%time 
matrix_fact = KernelMF(n_epochs = 20, n_factors = 100, verbose = 1, lr = 0.001, reg = 0.005)
matrix_fact.fit(X_train, y_train)

pred = matrix_fact.predict(X_test)
rmse = mean_squared_error(y_test, pred, squared = False)

print(f'\nTest RMSE: {rmse:.4f}')

Epoch  1 / 20  -  train_rmse: 0.18538663328915972
Epoch  2 / 20  -  train_rmse: 0.18474679076196573
Epoch  3 / 20  -  train_rmse: 0.18410961724791453
Epoch  4 / 20  -  train_rmse: 0.18347511366725247
Epoch  5 / 20  -  train_rmse: 0.1828432253431285
Epoch  6 / 20  -  train_rmse: 0.18221397398942657
Epoch  7 / 20  -  train_rmse: 0.18158734011739588
Epoch  8 / 20  -  train_rmse: 0.18096328939637427
Epoch  9 / 20  -  train_rmse: 0.1803418546048381
Epoch  10 / 20  -  train_rmse: 0.17972293707471873
Epoch  11 / 20  -  train_rmse: 0.17910656101427946
Epoch  12 / 20  -  train_rmse: 0.17849273097307758
Epoch  13 / 20  -  train_rmse: 0.17788141989279257
Epoch  14 / 20  -  train_rmse: 0.17727261812473463
Epoch  15 / 20  -  train_rmse: 0.17666632033525723
Epoch  16 / 20  -  train_rmse: 0.17606252646324413
Epoch  17 / 20  -  train_rmse: 0.17546115241409296
Epoch  18 / 20  -  train_rmse: 0.17486224776114978
Epoch  19 / 20  -  train_rmse: 0.17426583065529946
Epoch  20 / 20  -  train_rmse: 0.173671831

## Getting list of recommendations for a user


In [44]:
user = '626b28707fe7587a42e3dfeb'
items_known = X_train.query('user_id == @user')['item_id']
productReviewId,productReviewRate,productQuestionId,productQuestionRate,companyreviewId,companyreviewRate,companyQuestionId,companyQuestionRate=matrix_fact.recommend(user=user, items_known=items_known)
productQuestionId

['1627406988cc1cefd58623ecc',
 '1627406998cc1cefd58623ee0',
 '16274069d8cc1cefd58623f3a',
 '16274069a8cc1cefd58623f08',
 '1627406878cc1cefd58623d50',
 '16274069c8cc1cefd58623f30']

In [68]:
#load seenTable 
seenTableDataFrame=loadDatFarame('../seenTable.pkl')

In [14]:
# add recommendations to seenTable
addToSeenTable(df=seenTableDataFrame,userId='u1',itemIds=res['item_id'])
seenTableDataFrame=loadDatFarame('../seenTable.pkl')
seenTableDataFrame

NameError: name 'seenTableDataFrame' is not defined

In [89]:
#revovme item from seenTable after certine time like for example 5 days
#amount here is in seconds
removeExpiredDateFromSeenTable(tablePath='../seenTable.pkl',amount=150)
seenTableDataFrame=loadDatFarame('../seenTable.pkl')
seenTableDataFrame

Unnamed: 0,user_id,item_id,date
10,u1,i25,706075200.0
11,u1,i4,706075200.0
12,u1,i10,706075200.0
13,u1,i73,706075200.0
14,u1,i28,706075200.0
15,u1,i83,706075200.0
16,u1,i89,706075200.0
17,u1,i82,706075200.0
18,u1,i81,706075200.0
19,u1,i46,706075200.0
