In [1]:
import pandas as pd
import urllib
import tempfile
import shutil
import zipfile
import os

os.chdir('/Users/joesh/neural_colab_filtering/')
archive_url = f'http://files.grouplens.org/datasets/movielens/ml-1m.zip'
wd = os.getcwd()

with urllib.request.urlopen(archive_url) as url:
    with tempfile.NamedTemporaryFile(delete=True) as f:
        shutil.copyfileobj(url, f)
        with zipfile.ZipFile(f.name) as archive:
            archive.extractall(f'{wd}/datasets')


In [2]:
import torch
device = torch.device('cpu') if not torch.backends.mps.is_available() else torch.device('mps')
print(device)

mps


In [3]:
ratings = pd.read_csv('datasets/ml-1m/ratings.dat', sep='::', engine = 'python', 
                        header=None,
                        names=['user_id', 'movie_id', 'rating', 'timestamp'])

ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [4]:
import source.model as model
import source.data as data

import torch.nn as nn

# Create a dataset object

dataset = data.filmDataset( ratings['user_id'].values, ratings['movie_id'].values, 
                            ratings['rating'].values)

mod = model.NCF(ratings.user_id.nunique(), ratings.movie_id.nunique(), dropout_rate=0.2)

In [5]:
# Create a dataloader object

from torch.utils.data import DataLoader
train_set_size = int(0.9 * len(dataset))
test_set_size = len(dataset) - train_set_size

trainset, testset = torch.utils.data.random_split(dataset, [train_set_size, test_set_size])

train_set_size = int(0.8 * len(trainset))
valid_set_size = len(trainset) - train_set_size

train_set, valid_set = torch.utils.data.random_split(trainset, 
                       [train_set_size, valid_set_size])

train_loader = DataLoader(train_set, batch_size=1024, shuffle=True, drop_last= False)
valid_loader = DataLoader(valid_set, batch_size=1024, shuffle=False, drop_last=False)
test_loader = DataLoader(testset, batch_size=1024, shuffle=False, drop_last=False)

# Create a loss function and an optimizer
#loss_module = nn.MSELoss()
#optimizer = torch.optim.Adam(mod.parameters(), lr=0.01)

In [6]:
import json

def _get_config_file(model_path, model_name):
    #Name of the file for storing hyperparameter details
    return os.path.join(model_path, model_name+ ".config")

def _get_model_file(model_path, model_name):
    #Name of the file for storing network parameters
    return os.path.join(model_path, model_name+".tar")

def save_model(model, model_path, model_name):
    config_dict = model.config
    os.makedirs(model_path, exist_ok = True)
    config_file, model_file = _get_config_file(model_path, model_name), _get_model_file(model_path, model_name)
    with open(config_file, 'w') as f:
        json.dump(config_dict, f)
    torch.save(model.state_dict(), model_file)



checkpoint_path = os.path.join(os.getcwd(), "checkpoints")
mod = mod.to(device)
num_params = sum(p.numel() for p in mod.parameters())
print(f'Our model architecture: \n\n {mod} \n')


Our model architecture: 

 NCF(
  (embedding_user): Embedding(6040, 32)
  (embedding_item): Embedding(3706, 32)
  (MLP): Sequential(
    (0): Linear(in_features=64, out_features=32, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=32, out_features=16, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=16, out_features=8, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.2, inplace=False)
    (9): Linear(in_features=8, out_features=1, bias=True)
    (10): Sigmoid()
  )
  (dropout): Dropout(p=0.2, inplace=False)
) 



In [9]:
from tqdm.notebook import tqdm

def train_model(net, model_name, max_epoch=30, patience= 5, overwrite=False):
  file_exists  = os.path.isfile(_get_model_file(checkpoint_path, model_name))
  if file_exists and not overwrite:
    print("Model already exists. Skipping training")
  else:
    if file_exists:
      print("Overwriting existing model")
    
    optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
    loss_module = nn.MSELoss()
    
    val_score = []
    best_val_epoch = -1

    for epoch in range(max_epoch):
      net.train()
      running_loss = 0.0
      count = 0
      for users, film, ratings in tqdm(train_loader, desc = f"Epoch {epoch+1}", leave=False):
        count += 1
        users , film, ratings = users.to(device), film.to(device), ratings.to(device)
        ratings = ratings.unsqueeze(-1).type(torch.float32)
        optimizer.zero_grad()
        preds = net(users, film)
        loss = loss_module(preds, ratings)
        running_loss += loss.item()

        loss.backward()
        optimizer.step()
      
      running_loss = running_loss/count
      val_loss = test_model(net, valid_loader)
      val_score.append(val_loss)
      print(f'Epoch {epoch+1:2d}: Training loss: {running_loss}, Validation loss {val_loss}')

      if len(val_score) == 1 or val_loss < val_score[best_val_epoch]:
        print("New best. Saving model")
        #save_model(net, checkpoint_path, model_name)
        best_val_epoch = epoch
      elif best_val_epoch <= epoch - patience:
        print(f"Early stopping since model is not improving over last {patience} epochs")
        break
      
def test_model(net, data_loader):
  loss_module = nn.MSELoss()
  net.eval()
  running_loss = 0.0
  count = 0
  for users, films, ratings in data_loader:
    count += 1
    users , films, ratings = users.to(device), films.to(device), ratings.to(device)
    with torch.no_grad():
      ratings = ratings.unsqueeze(-1).type(torch.float32)
      preds = net(users, films)
      loss = loss_module(preds, ratings)
      running_loss += loss.item()
  return running_loss/count




      
    
  



In [10]:
mod_name = "NCF_Recommender"
torch.seed()
train_model(mod, mod_name)


Epoch 1:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch  1: Training loss: 0.9076196818866513, Validation loss 0.8596027581529184
New best. Saving model


Epoch 2:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch  2: Training loss: 0.9005187434059653, Validation loss 0.8587483167648315
New best. Saving model


Epoch 3:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch  3: Training loss: 0.8962043886157599, Validation loss 0.861221738836982


Epoch 4:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch  4: Training loss: 0.8924306207759813, Validation loss 0.8566829768771475
New best. Saving model


Epoch 5:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch  5: Training loss: 0.8904508234594356, Validation loss 0.8509871712462469
New best. Saving model


Epoch 6:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch  6: Training loss: 0.8872685069218278, Validation loss 0.850959128615531
New best. Saving model


Epoch 7:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch  7: Training loss: 0.8847615871239792, Validation loss 0.8537093052132563


Epoch 8:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch  8: Training loss: 0.882377269085158, Validation loss 0.855521827258847


Epoch 9:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch  9: Training loss: 0.8774598817425695, Validation loss 0.8455433591522954
New best. Saving model


Epoch 10:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 10: Training loss: 0.8754263664172455, Validation loss 0.8417831229215319
New best. Saving model


Epoch 11:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 11: Training loss: 0.8722276351499286, Validation loss 0.8392148461531509
New best. Saving model


Epoch 12:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 12: Training loss: 0.8700794039971449, Validation loss 0.8397403572770682


Epoch 13:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 13: Training loss: 0.8662011109461839, Validation loss 0.8401969495144758


Epoch 14:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 14: Training loss: 0.8653643279421058, Validation loss 0.8386140428483486
New best. Saving model


Epoch 15:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 15: Training loss: 0.8630463647740808, Validation loss 0.852774208242243


Epoch 16:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 16: Training loss: 0.8606322149492123, Validation loss 0.8362143991345709
New best. Saving model


Epoch 17:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 17: Training loss: 0.8581413856114853, Validation loss 0.8440723114392974


Epoch 18:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 18: Training loss: 0.8588676247745752, Validation loss 0.839617901227691


Epoch 19:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 19: Training loss: 0.8582830353724685, Validation loss 0.8356746303086932
New best. Saving model


Epoch 20:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 20: Training loss: 0.857596620155329, Validation loss 0.839794649657878


Epoch 21:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 21: Training loss: 0.8570499113676223, Validation loss 0.8343702764673666
New best. Saving model


Epoch 22:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 22: Training loss: 0.8532618004828691, Validation loss 0.8327173726125197
New best. Saving model


Epoch 23:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 23: Training loss: 0.8512981518764388, Validation loss 0.8331840498880907


Epoch 24:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 24: Training loss: 0.8515854731879451, Validation loss 0.8390824357894334


Epoch 25:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 25: Training loss: 0.8503620515323498, Validation loss 0.8389693501998078


Epoch 26:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 26: Training loss: 0.8504925237291239, Validation loss 0.8336940678683195


Epoch 27:   0%|          | 0/704 [00:00<?, ?it/s]

Epoch 27: Training loss: 0.8490051506087184, Validation loss 0.836385424841534
Early stopping since model is not improving over last 5 epochs
