In [18]:
# Data manipulation
import numpy as np
import pandas as pd
pd.options.display.max_rows = 100

# Modeling
from matrix_factorization import BaselineModel, KernelMF, train_update_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Other
import os
import random
import sys

# Reload imported code 
%load_ext autoreload
%autoreload 2

# Print all output
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
    
rand_seed = 2
np.random.seed(rand_seed)
random.seed(rand_seed)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load data

In [22]:
# read data 
review_data = pd.read_csv('User2reviwe.csv')

# Drop first column
review_data.drop(columns=review_data.columns[0], 
        axis=1, 
        inplace=True)

X = review_data[['user_id', 'item_id']]
y = review_data['rating']

# Prepare data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Prepare data for online learning
X_train_initial, y_train_initial, X_train_update, y_train_update, X_test_update, y_test_update = train_update_test_split(review_data, frac_new_users=0.2)

review_data.head(10)

Unnamed: 0,user_id,item_id,rating
0,u1,i1,0.3
1,u1,i2,0.9
2,u1,i3,1.0
3,u1,i4,0.0
4,u1,i5,0.0
5,u1,i6,0.0
6,u1,i7,0.85
7,u1,i8,0.0
8,u1,i9,0.9
9,u1,i10,0.0


# Matrix Factorization

In [23]:
%%time 
matrix_fact = KernelMF(n_epochs = 20, n_factors = 100, verbose = 1, lr = 0.001, reg = 0.005)
matrix_fact.fit(X_train, y_train)

pred = matrix_fact.predict(X_test)
rmse = mean_squared_error(y_test, pred, squared = False)

print(f'\nTest RMSE: {rmse:.4f}')

Epoch  1 / 20  -  train_rmse: 0.3414598538474251
Epoch  2 / 20  -  train_rmse: 0.34012042871832043
Epoch  3 / 20  -  train_rmse: 0.33887511570612694
Epoch  4 / 20  -  train_rmse: 0.3377104978873496
Epoch  5 / 20  -  train_rmse: 0.33661460754544825
Epoch  6 / 20  -  train_rmse: 0.3355794440018485
Epoch  7 / 20  -  train_rmse: 0.33459487375699243
Epoch  8 / 20  -  train_rmse: 0.3336574013984142
Epoch  9 / 20  -  train_rmse: 0.33275782234005286
Epoch  10 / 20  -  train_rmse: 0.3318920142434759
Epoch  11 / 20  -  train_rmse: 0.3310566642902394
Epoch  12 / 20  -  train_rmse: 0.33024591639094475
Epoch  13 / 20  -  train_rmse: 0.32945757055851616
Epoch  14 / 20  -  train_rmse: 0.32868938684994004
Epoch  15 / 20  -  train_rmse: 0.327938884026495
Epoch  16 / 20  -  train_rmse: 0.3272038062834892
Epoch  17 / 20  -  train_rmse: 0.32648334308780536
Epoch  18 / 20  -  train_rmse: 0.3257751648551539
Epoch  19 / 20  -  train_rmse: 0.32507856848869027
Epoch  20 / 20  -  train_rmse: 0.32439178767238847

## Getting list of recommendations for a user


In [40]:
user = 'u1'
items_known = X_train.query('user_id == @user')['item_id']
matrix_fact.recommend(user=user, items_known=items_known)

Unnamed: 0,user_id,item_id,rating_pred
9,u1,i71,0.418047
6,u1,i79,0.377523
14,u1,i76,0.375771
4,u1,i95,0.363637
8,u1,i80,0.359852
11,u1,i14,0.339951
3,u1,i96,0.282685
17,u1,i25,0.265827
15,u1,i45,0.255752
2,u1,i15,0.220069
