# AutoRec cs3639 Recommendation Systems course IDC

### here will be general explanations

In [1]:
import numpy as np
import pandas as pd
import sklearn
import torch
from torch import nn

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


## datasets

In this project, we will use 2 datasets:
* **movielens**, which can be downloaded using `utils.datasets_download.py` or straight from [here](http://files.grouplens.org/datasets/movielens/).
* **netflixprize**, which can be downloaded from this [semi-parsed version from kaggle](https://www.kaggle.com/netflix-inc/netflix-prize-data) or from this [raw version](https://archive.org/download/nf_prize_dataset.tar)

**NOTE**: for the notebook to run properly, you should save you dataset under `data` folder and `movielens` folder for the movielens dataset and `netflix` folder for the netflixprize dataset.
i.e `data/movielens` folder and `data/netflix` folder respectively.

In [3]:
from src.data_prep import movielens_load
train, test = movielens_load(1)
print(train.shape)
train

(80000, 4)


Unnamed: 0,user_id,item_id,rating,timestamp
0,1,1,5,874965758
1,1,2,3,876893171
2,1,3,4,878542960
3,1,4,3,876893119
4,1,5,3,889751712
...,...,...,...,...
79995,943,1067,2,875501756
79996,943,1074,4,888640250
79997,943,1188,3,888640250
79998,943,1228,3,888640275


In [4]:
from src.matrixfactorization import MatrixFactorization
num_users = train.user_id.max()
num_items = train.item_id.max()
print(num_users, num_items)
model = MatrixFactorization(num_users, num_items).to(device)


943 1682


In [6]:
from src.matrixfactorization import mf_train

# %%time
mf_train(train, model, epochs=50, lr=0.001, reg=0.001, device=device)

Train RMSE: 4.983299255371094
Train RMSE: 4.673593997955322
Train RMSE: 4.547260284423828
Train RMSE: 5.233304500579834
Train RMSE: 5.417215347290039
Train RMSE: 4.63135290145874
Train RMSE: 4.188024520874023
Train RMSE: 5.046353816986084
Train RMSE: 4.576107025146484
Train RMSE: 4.368079662322998
Train RMSE: 4.723515510559082
Train RMSE: 4.608023643493652
Train RMSE: 4.782299995422363
Train RMSE: 4.926836967468262
Train RMSE: 3.7756874561309814
Train RMSE: 4.398947238922119
Train RMSE: 4.828915119171143
Train RMSE: 4.2370991706848145
Train RMSE: 4.414370059967041
Train RMSE: 4.423279285430908
Train RMSE: 4.208510398864746
Train RMSE: 3.782543420791626
Train RMSE: 4.290326118469238
Train RMSE: 3.865056276321411
Train RMSE: 4.2685675621032715
Train RMSE: 4.576760292053223
Train RMSE: 4.474152565002441
Train RMSE: 4.142910957336426
Train RMSE: 4.61855936050415
Train RMSE: 4.335089683532715
Train RMSE: 4.174910068511963
Train RMSE: 4.336365699768066
Train RMSE: 4.534503936767578
Train RMS

In [22]:
# halpful links:
# https://d2l.ai/chapter_recommender-systems/autorec.html
# https://github.com/gtshs2/Autorec
# https://github.com/ImKeTT/Recommend_algorithms_Librec2Python/blob/master/AutoRec_torch/src/model.py

In [3]:
from src.data_prep import movielens_prep
train, test = movielens_prep(1)
train

item_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,0.0,4.0,1.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
940,0.0,0.0,0.0,2.0,0.0,0.0,4.0,5.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
941,5.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
942,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
from src.autorec import AutoRec
num_items = train.shape[1]
model = AutoRec(num_hidden=500, num_features=num_items).to(device)

In [12]:
train

item_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,0.0,4.0,1.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
940,0.0,0.0,0.0,2.0,0.0,0.0,4.0,5.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
941,5.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
942,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
from src.autorec import autorec_train
p = autorec_train(train, model, epochs=100, device=device)

epoch: 1 train RMSE: 0.6659520268440247
epoch: 2 train RMSE: 0.666005551815033
epoch: 3 train RMSE: 0.6658753156661987
epoch: 4 train RMSE: 0.6657629609107971
epoch: 5 train RMSE: 0.6655679941177368
epoch: 6 train RMSE: 0.6654093265533447
epoch: 7 train RMSE: 0.665400505065918
epoch: 8 train RMSE: 0.6654183268547058
epoch: 9 train RMSE: 0.665755033493042
epoch: 10 train RMSE: 0.6654524803161621
epoch: 11 train RMSE: 0.6656726002693176
epoch: 12 train RMSE: 0.6680296659469604
epoch: 13 train RMSE: 0.6691038608551025
epoch: 14 train RMSE: 0.6665021777153015
epoch: 15 train RMSE: 0.666279137134552
epoch: 16 train RMSE: 0.6661916375160217
epoch: 17 train RMSE: 0.6649579405784607
epoch: 18 train RMSE: 0.6648148894309998
epoch: 19 train RMSE: 0.6646931767463684
epoch: 20 train RMSE: 0.6646556258201599
epoch: 21 train RMSE: 0.6646238565444946
epoch: 22 train RMSE: 0.6646037697792053
epoch: 23 train RMSE: 0.664584755897522
epoch: 24 train RMSE: 0.6645642518997192
epoch: 25 train RMSE: 0.664530

In [30]:
# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler((0, 5))
# scaler.fit_transform(torch.Tensor.cpu(p).detach().numpy())