In [1]:
import torch
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# CONVERT THIS NB TO PY

In [38]:
# setting device on GPU if available, else CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cpu


In [2]:
# read MovieLens 1M dataset
ratings_df = pd.read_csv('../data/ratings.dat',sep="::",header=None)

# rename columns
ratings_df = ratings_df[[0,1,2]].rename(columns={0:'user_id',1:'movie_id',2:'rating'})

# split into train, valid and test sets
test_size, valid_size = 0.1, 0.1
test_split_random_state, valid_split_random_state= 42, 0 
train_valid_df, test_df = train_test_split(ratings_df, test_size=test_size, random_state=test_split_random_state)
train_df, valid_df = train_test_split(train_valid_df, test_size=valid_size, random_state=valid_split_random_state)

  ratings_df = pd.read_csv('../data/ratings.dat',sep="::",header=None)


In [41]:
# performance metric root mean squared error
rmse = lambda y_true,y_pred: np.sqrt(mean_squared_error(y_true, y_pred))

Baseline 1: predict all ratings '3'

In [43]:
y_pred_dummy = np.ones((len(test_df.rating.values))) * 3
y_true = test_df.rating.values
rmse(y_true,y_pred_dummy)

1.2593597352393096

Baseline 2: predict all ratings train_df's mean rating

In [44]:
y_pred_dummy = np.ones((len(test_df.rating.values))) * np.mean(train_valid_df.rating)
y_true = test_df.rating.values
rmse(y_true,y_pred_dummy)

1.1164773790818092

Baseline 3: predict user mean rating

Baseline 4: predict item mean rating

In [22]:
class CF_Dataset(torch.utils.data.Dataset):
    def __init__(self, df):
        # pd -> np
        self.df = df.values
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self,idx):

        item = self.df[idx]
        u_id, m_id, r = item[0],item[1],item[2]

        return (u_id,m_id),r


In [68]:
class RegularizedSVD(torch.nn.Module):
  def __init__(self, num_users, num_items, global_mean, embedding_dim):
    super().__init__()
    self.gm = global_mean

    # cfg should handle these num_users, num_movies, embedding_dim
    self.P = torch.nn.Embedding(num_users, embedding_dim)
    self.Q = torch.nn.Embedding(num_items, embedding_dim)
    self.B_U = torch.nn.Embedding(num_users, 1)
    self.B_I = torch.nn.Embedding(num_items, 1)

  def forward(self, x):
    
    # user and item indices start with 1 in dataset, embedding index starts with 0 
    # so embedding of user 1 is stored at self.p(0)
    user_id, item_id = x[0]-1, x[1]-1

    p_u = self.P(user_id)
    q_i = self.Q(item_id)
    b_u = self.B_U(user_id)
    b_i = self.B_I(item_id)

    pred_r_ui = torch.sum(p_u * q_i, axis=1) + torch.squeeze(b_u) + torch.squeeze(b_i) + self.gm

    return pred_r_ui


In [71]:
train_dataset = CF_Dataset(train_df)
valid_dataset = CF_Dataset(valid_df)
test_dataset = CF_Dataset(test_df)

train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset,batch_size=32,shuffle=True)
valid_dataloader = torch.utils.data.DataLoader(dataset=valid_dataset,batch_size=32)
test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=32)


global_mean = np.mean(train_df.rating.values)
model = RegularizedSVD(num_users=6040, num_items=3952, global_mean=global_mean, embedding_dim=100)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = torch.nn.MSELoss()

lowest_val_loss = float('inf')

# zero the parameters' gradients
optimizer.zero_grad()

epochs = 20
beta = 3e-5  # weight decay

for epoch in range(epochs):  # loop over dataset

    print(f'epoch: {epoch+1} / {epochs}')

    # training
    model.train()
    
    batch_train_loss_array=[]
    batch_train_rmse_array=[]
    batch_train_reg_loss_array=[] # temporary

    for batch_idx, batch_data in enumerate(train_dataloader): # loop over train batches
        
        #batch_data = batch_data.to(device)
        x, y_true = batch_data[0], batch_data[1]
        y_true = y_true.to(torch.float32)

        optimizer.zero_grad()

        # forward pass
        y_pred = model(x)

        # compute loss
        mse_loss = loss_fn(y_true,y_pred)

        reg_loss = 0
        for param in model.parameters():
            reg_loss += torch.norm(param,'fro')**2
        
        loss = mse_loss + beta * reg_loss

        # backpropagation
        loss.backward()

        # gradient descent with optimizer
        optimizer.step()
            
        # save batch metrics
        batch_train_loss_array.append(mse_loss.detach().cpu().item())
        batch_train_rmse_array.append(rmse(y_true, y_pred.detach().cpu()))
        batch_train_reg_loss_array.append(reg_loss.detach().cpu().item()) # temporary

    # validation
    model.eval()
    with torch.no_grad():

        batch_valid_rmse_array=[]
        
        for _, valid_batch_data in enumerate(valid_dataloader): # loop over valid batches
            
            #valid_batch_data = valid_batch_data.to(device)
            valid_x, valid_y_true = valid_batch_data[0], valid_batch_data[1]
            valid_y_true = valid_y_true.to(torch.float32)

            # forward pass
            valid_y_pred = model(valid_x)

            # save batch metrics
            batch_valid_rmse_array.append(rmse(valid_y_true, valid_y_pred.detach().cpu()))


    # display metrics at end of epoch
    epoch_train_loss, epoch_train_rmse = np.mean(batch_train_loss_array), np.mean(batch_train_rmse_array)
    epoch_val_rmse = np.mean(batch_valid_rmse_array)

    print(f'epoch: {epoch+1} / {epochs}, train_loss: {epoch_train_loss:.4f}, train_rmse: {epoch_train_rmse:.4f}, val_rmse: {epoch_val_rmse:.4f}\n')



epoch: 1 / 20


100%|██████████| 25318/25318 [03:29<00:00, 120.57it/s]


epoch: 1 / 20, train_loss: 63.9363, train_rmse: 7.8331, val_rmse: 5.8226

epoch: 2 / 20


100%|██████████| 25318/25318 [03:13<00:00, 130.99it/s]


epoch: 2 / 20, train_loss: 15.6667, train_rmse: 3.8696, val_rmse: 3.6601

epoch: 3 / 20


100%|██████████| 25318/25318 [02:23<00:00, 175.94it/s]


epoch: 3 / 20, train_loss: 4.6128, train_rmse: 2.0988, val_rmse: 2.7163

epoch: 4 / 20


100%|██████████| 25318/25318 [02:12<00:00, 191.40it/s]


epoch: 4 / 20, train_loss: 1.9999, train_rmse: 1.3846, val_rmse: 2.2975

epoch: 5 / 20


100%|██████████| 25318/25318 [02:12<00:00, 191.74it/s]


epoch: 5 / 20, train_loss: 1.2404, train_rmse: 1.0962, val_rmse: 2.0870

epoch: 6 / 20


100%|██████████| 25318/25318 [02:33<00:00, 164.60it/s]


epoch: 6 / 20, train_loss: 0.9778, train_rmse: 0.9767, val_rmse: 1.9670

epoch: 7 / 20


100%|██████████| 25318/25318 [02:25<00:00, 174.22it/s]


epoch: 7 / 20, train_loss: 0.8560, train_rmse: 0.9147, val_rmse: 1.8852

epoch: 8 / 20


100%|██████████| 25318/25318 [02:33<00:00, 164.71it/s]


epoch: 8 / 20, train_loss: 0.7827, train_rmse: 0.8747, val_rmse: 1.8272

epoch: 9 / 20


100%|██████████| 25318/25318 [02:13<00:00, 189.06it/s]


epoch: 9 / 20, train_loss: 0.7260, train_rmse: 0.8422, val_rmse: 1.7819

epoch: 10 / 20


100%|██████████| 25318/25318 [02:07<00:00, 198.57it/s]


epoch: 10 / 20, train_loss: 0.6812, train_rmse: 0.8158, val_rmse: 1.7405

epoch: 11 / 20


100%|██████████| 25318/25318 [02:07<00:00, 199.23it/s]


epoch: 11 / 20, train_loss: 0.6413, train_rmse: 0.7915, val_rmse: 1.7045

epoch: 12 / 20


100%|██████████| 25318/25318 [02:12<00:00, 191.14it/s]


epoch: 12 / 20, train_loss: 0.6066, train_rmse: 0.7695, val_rmse: 1.6694

epoch: 13 / 20


100%|██████████| 25318/25318 [02:53<00:00, 145.84it/s]


epoch: 13 / 20, train_loss: 0.5785, train_rmse: 0.7511, val_rmse: 1.6470

epoch: 14 / 20


100%|██████████| 25318/25318 [02:52<00:00, 146.72it/s]


epoch: 14 / 20, train_loss: 0.5490, train_rmse: 0.7318, val_rmse: 1.6245

epoch: 15 / 20


100%|██████████| 25318/25318 [02:32<00:00, 166.49it/s]


epoch: 15 / 20, train_loss: 0.5264, train_rmse: 0.7164, val_rmse: 1.5981

epoch: 16 / 20


100%|██████████| 25318/25318 [03:04<00:00, 137.08it/s]


epoch: 16 / 20, train_loss: 0.5047, train_rmse: 0.7013, val_rmse: 1.5790

epoch: 17 / 20


 56%|█████▌    | 14235/25318 [02:04<01:36, 114.33it/s]


KeyboardInterrupt: 