# AutoRec cs3639 Recommendation Systems course IDC

### here will be general explanations

In [1]:
import numpy as np
import pandas as pd
import sklearn
import torch
from torch import nn

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


## datasets

In this project, we will use 2 datasets:
* **movielens**, which can be downloaded using `utils.datasets_download.py` or straight from [here](http://files.grouplens.org/datasets/movielens/).
* **netflixprize**, which can be downloaded from this [semi-parsed version from kaggle](https://www.kaggle.com/netflix-inc/netflix-prize-data) or from this [raw version](https://archive.org/download/nf_prize_dataset.tar)

**NOTE**: for the notebook to run properly, you should save you dataset under `data` folder and `movielens` folder for the movielens dataset and `netflix` folder for the netflixprize dataset.
i.e `data/movielens` folder and `data/netflix` folder respectively.

In [8]:
from src.data_prep import movielens_load
train, test = movielens_load(1)
print(train.shape)
train

(80000, 4)


Unnamed: 0,user_id,item_id,rating,timestamp
0,1,1,5,874965758
1,1,2,3,876893171
2,1,3,4,878542960
3,1,4,3,876893119
4,1,5,3,889751712
...,...,...,...,...
79995,943,1067,2,875501756
79996,943,1074,4,888640250
79997,943,1188,3,888640250
79998,943,1228,3,888640275


In [9]:
from src.matrixfactorization import MatrixFactorization
num_users = train.user_id.max()
num_items = train.item_id.max()
print(num_users, num_items)
model = MatrixFactorization(num_users, num_items).to(device)


943 1682


In [10]:
from src.matrixfactorization import mf_train

# %%time
mf_train(train, model, epochs=50, lr=0.001, reg=0.001, device=device)

Train RMSE: 4.052670478820801
Train RMSE: 4.261032581329346
Train RMSE: 4.839971542358398
Train RMSE: 4.608992576599121
Train RMSE: 4.391449451446533
Train RMSE: 5.3044562339782715
Train RMSE: 4.7662739753723145
Train RMSE: 4.5720906257629395
Train RMSE: 5.228926658630371
Train RMSE: 5.1238861083984375
Train RMSE: 5.314643383026123
Train RMSE: 4.721149921417236
Train RMSE: 4.686456680297852
Train RMSE: 5.0384840965271
Train RMSE: 4.379261016845703
Train RMSE: 3.9843738079071045
Train RMSE: 4.977007865905762
Train RMSE: 5.163356781005859
Train RMSE: 5.105048656463623
Train RMSE: 4.804464340209961
Train RMSE: 3.7557411193847656
Train RMSE: 4.2988786697387695
Train RMSE: 4.456363201141357
Train RMSE: 4.942333698272705
Train RMSE: 4.586345672607422
Train RMSE: 4.976132869720459
Train RMSE: 4.540055274963379
Train RMSE: 4.780568599700928
Train RMSE: 4.013672828674316
Train RMSE: 4.594879627227783
Train RMSE: 4.7758588790893555
Train RMSE: 5.328638553619385
Train RMSE: 4.657693386077881
Trai

In [22]:
# halpful links:
# https://d2l.ai/chapter_recommender-systems/autorec.html
# https://github.com/gtshs2/Autorec
# https://github.com/ImKeTT/Recommend_algorithms_Librec2Python/blob/master/AutoRec_torch/src/model.py

In [1]:
from src.data_prep import movielens_create_ratings
train, test = movielens_create_ratings(1)

In [2]:
from src.autorec.dataset import I_AutoRecDataSet
from src.autorec.model import AutoRec
from src.autorec.training import AutoRecTrainer

In [3]:
num_users, num_items = train.shape
model = AutoRec(num_hidden=512, num_features=num_users)

In [4]:
autorec_trainer = AutoRecTrainer(train, test, model)
autorec_trainer.train_model()

EPOCH 1: Avg losses: train: 1.939, val: 1.250
EPOCH 2: Avg losses: train: 1.096, val: 1.056
EPOCH 3: Avg losses: train: 1.015, val: 0.988
EPOCH 4: Avg losses: train: 0.976, val: 0.966
EPOCH 5: Avg losses: train: 0.954, val: 0.922
EPOCH 6: Avg losses: train: 0.915, val: 0.943
EPOCH 7: Avg losses: train: 0.915, val: 0.917
EPOCH 8: Avg losses: train: 0.915, val: 0.900
EPOCH 9: Avg losses: train: 0.887, val: 0.905
EPOCH 10: Avg losses: train: 0.882, val: 0.897
