In [18]:
import pandas as pd
import numpy as np
from score import score
from rec_utils import split_the_data
import warnings
warnings.filterwarnings('ignore')
from surprise import SVDpp, BaselineOnly
from surprise.model_selection import train_test_split as sur_tts
from surprise import Dataset, Reader, accuracy

import torch
from torch import nn
from torch import optim

In [19]:
user_data = pd.read_csv("../Course_Scraper/assets/augumented_data/augmented_user_rating.csv")

In [20]:
user_mapping = {
    k: x for x, k in enumerate(user_data['user_index'].unique())
}

# so we get a consistent titles
title_mapping = {
    k: x for x, k in enumerate(user_data['title_index'].unique())
}

In [21]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(user_data, reader)
train, test = sur_tts(data, test_size=0.2, shuffle=True)

bsl_options = {
    "n_epochs": 100,
    "method": "sgd",
    "learning_rate": 0.001,
}
algo = BaselineOnly(bsl_options=bsl_options)
algo.fit(train)
predictions = algo.test(test)
accuracy.rmse(predictions, verbose=True)

Estimating biases using sgd...
RMSE: 1.6152


1.615152274194685

In [22]:
user_data['user_index'] = user_data['user_index'].map(user_mapping)
user_data['title_index'] = user_data['title_index'].map(title_mapping)

data, test = split_the_data(user_data)
train, val = split_the_data(data)

X_train = train.drop('rating', axis=1)
X_val = val.drop('rating', axis=1)

y_train = train['rating']
y_val = val['rating']

In [23]:
total_users = len(user_mapping)
total_titles = len(title_mapping)

In [39]:
class ResidualBlock(nn.Module):
    def __init__(self, features):
        super(ResidualBlock, self).__init__()
        self.fc1 = nn.Linear(features, features)
        self.bn1 = nn.BatchNorm1d(features)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(features, features)
        self.bn2 = nn.BatchNorm1d(features)

    def forward(self, x):
        identity = x
        out = self.relu(self.bn1(self.fc1(x)))
        out = self.bn2(self.fc2(out))
        out += identity 
        out = self.relu(out)
        return out

In [65]:
class RecSys(nn.Module):
    def __init__(self, total_users, total_tiltes, user_emb_dim: int = 64, title_emb_dim: int = 64):
        super(RecSys, self).__init__()
        self.usr_emb = nn.Embedding(total_users, user_emb_dim)
        self.title_emb = nn.Embedding(total_titles, title_emb_dim)
        self.fc1 = nn.Linear((user_emb_dim + title_emb_dim), 128)
        self.relu = nn.ReLU()
        self.res_block1 = ResidualBlock(128)
        
        self.fc2 = nn.Linear(128, 64)
        self.res_block2 = ResidualBlock(64)
        self.fc3 = nn.Linear(64, 1)
    
    def forward(self, x):
        user = x[:, 1].long()
        title = x[:, 0].long()

        u = self.usr_emb(user)
        t = self.title_emb(title)

        x = torch.concat([u, t], dim=1)
        x = self.fc1(x)
        x = self.res_block1(x)
        x = self.fc2(x)
        x = self.res_block2(x)
        x = self.fc3(x)
        # x = self.softmax(x)
        return x

In [70]:
model = RecSys(total_users=total_users, total_tiltes=total_titles).to('cuda')
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, weight_decay=1e-2, momentum=0.8)

In [71]:
for epoch in range(1500):
    # convert the data to tensors
    tensor_Xtr = torch.tensor(X_train.to_numpy(), dtype=torch.float).to('cuda')
    tensor_Xva = torch.tensor(X_val.to_numpy(), dtype=torch.float).to('cuda')
    tensor_ytr = torch.tensor(y_train.to_numpy(), dtype=torch.float).to('cuda')
    tensor_yva = torch.tensor(y_val.to_numpy(), dtype=torch.float).to('cuda')

    model.train()
    outputs = model(tensor_Xtr)
    loss = torch.sqrt(criterion(outputs, tensor_ytr)).to('cuda')

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    model.eval()

    with torch.no_grad():
        val_output = model(tensor_Xva).to('cuda')
        val_loss = torch.sqrt(criterion(val_output, tensor_yva)).to('cuda')

    if (epoch + 1) % 20 == 0:
        print(f'Epoch {epoch+1} - Train Loss: {loss.item()} - Val Loss {val_loss.item()}')

Epoch 20 - Train Loss: 1.687637209892273 - Val Loss 1.8616093397140503
Epoch 40 - Train Loss: 1.6714032888412476 - Val Loss 1.7135950326919556
Epoch 60 - Train Loss: 1.66849684715271 - Val Loss 1.7195463180541992
Epoch 80 - Train Loss: 1.667236328125 - Val Loss 1.7242830991744995
Epoch 100 - Train Loss: 1.6664929389953613 - Val Loss 1.7258421182632446
Epoch 120 - Train Loss: 1.6659996509552002 - Val Loss 1.7265441417694092
Epoch 140 - Train Loss: 1.6656428575515747 - Val Loss 1.726840853691101
Epoch 160 - Train Loss: 1.6653741598129272 - Val Loss 1.7276394367218018
Epoch 180 - Train Loss: 1.665173053741455 - Val Loss 1.7279800176620483
Epoch 200 - Train Loss: 1.665021538734436 - Val Loss 1.7287442684173584
Epoch 220 - Train Loss: 1.6649082899093628 - Val Loss 1.729594349861145
Epoch 240 - Train Loss: 1.664820909500122 - Val Loss 1.7296056747436523
Epoch 260 - Train Loss: 1.664750099182129 - Val Loss 1.729596495628357
Epoch 280 - Train Loss: 1.6646968126296997 - Val Loss 1.7293902635574