In [6]:
import pandas as pd
import urllib
import tempfile
import shutil
import zipfile
import os

os.chdir('/Users/joesh/neural_colab_filtering/')
archive_url = f'http://files.grouplens.org/datasets/movielens/ml-1m.zip'
wd = os.getcwd()

with urllib.request.urlopen(archive_url) as url:
    with tempfile.NamedTemporaryFile(delete=True) as f:
        shutil.copyfileobj(url, f)
        with zipfile.ZipFile(f.name) as archive:
            archive.extractall(f'{wd}/datasets')


In [7]:
import torch
device = torch.device('cpu') if not torch.backends.mps.is_available() else torch.device('mps')
print(device)

mps


In [8]:
ratings = pd.read_csv('datasets/ml-1m/ratings.dat', sep='::', engine = 'python', 
                        header=None,
                        names=['user_id', 'movie_id', 'rating', 'timestamp'])

ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [9]:
import source.model as model
import source.data as data

import torch.nn as nn

# Create a dataset object

dataset = data.filmDataset( ratings['user_id'].values, ratings['movie_id'].values, 
                            ratings['rating'].values)

mod = model.NCF(ratings.user_id.nunique(), ratings.movie_id.nunique())

In [10]:
# Create a dataloader object

from torch.utils.data import DataLoader
train_set_size = int(0.9 * len(dataset))
test_set_size = len(dataset) - train_set_size

trainset, testset = torch.utils.data.random_split(dataset, [train_set_size, test_set_size])

train_set_size = int(0.8 * len(trainset))
valid_set_size = len(trainset) - train_set_size

train_set, valid_set = torch.utils.data.random_split(trainset, 
                       [train_set_size, valid_set_size])

train_loader = DataLoader(train_set, batch_size=256, shuffle=True, drop_last= False)
valid_loader = DataLoader(valid_set, batch_size=256, shuffle=False, drop_last=False)
test_loader = DataLoader(testset, batch_size=256, shuffle=False, drop_last=False)

# Create a loss function and an optimizer
#loss_module = nn.MSELoss()
#optimizer = torch.optim.Adam(mod.parameters(), lr=0.01)

In [11]:
import json

def _get_config_file(model_path, model_name):
    #Name of the file for storing hyperparameter details
    return os.path.join(model_path, model_name+ ".config")

def _get_model_file(model_path, model_name):
    #Name of the file for storing network parameters
    return os.path.join(model_path, model_name+".tar")

def save_model(model, model_path, model_name):
    config_dict = model.config
    os.makedirs(model_path, exist_ok = True)
    config_file, model_file = _get_config_file(model_path, model_name), _get_model_file(model_path, model_name)
    with open(config_file, 'w') as f:
        json.dump(config_dict, f)
    torch.save(model.state_dict(), model_file)



checkpoint_path = os.path.join(os.getcwd(), "checkpoints")
mod = mod.to(device)
num_params = sum(p.numel() for p in mod.parameters())
print(f'Our model architecture: \n\n {mod} \n')


Our model architecture: 

 NCF(
  (embedding_user): Embedding(6040, 32)
  (embedding_item): Embedding(3706, 32)
  (MLP): Sequential(
    (0): Linear(in_features=64, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=16, bias=True)
    (3): ReLU()
    (4): Linear(in_features=16, out_features=8, bias=True)
    (5): ReLU()
    (6): Linear(in_features=8, out_features=1, bias=True)
    (7): Sigmoid()
  )
) 



In [12]:
from tqdm.notebook import tqdm

def train_model(net, model_name, max_epoch=30, patience= 5, overwrite=False):
  file_exists  = os.path.isfile(_get_model_file(checkpoint_path, model_name))
  if file_exists and not overwrite:
    print("Model already exists. Skipping training")
  else:
    if file_exists:
      print("Overwriting existing model")
    
    optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
    loss_module = nn.MSELoss(reduction='sum')
    
    val_score = []
    best_val_epoch = -1

    for epoch in range(max_epoch):
      net.train()
      running_loss = 0.0
      for users, film, ratings in tqdm(train_set, desc = f"Epoch {epoch+1}", leave=False):
        users , film, ratings = users.to(device), film.to(device), ratings.to(device)
        optimizer.zero_grad()
        preds = net(users, film)
        loss = loss_module(preds, ratings)
        running_loss += loss.item()

        loss.backward()
        optimizer.step()
    
      val_loss = test_model(net, valid_loader)
      val_score.append(val_loss)
      print(f'Epoch {epoch+1:2d}: Training loss: {loss}, Validation loss {val_loss}')

      if len(val_score) == 1 or val_loss < val_score[best_val_epoch]:
        print("New best. Saving model")
        save_model(net, checkpoint_path, model_name)
        best_val_epoch = epoch
      elif best_val_epoch <= epoch - patience:
        print(f"Early stopping since model is not improving over last {patience} epochs")
        break
      




def test_model(net, data_loader):
  loss_module = nn.MSELoss(reduction='sum')
  net.eval()
  running_loss = 0.0
  for users, films, ratings in data_loader:
    users , film, ratings = users.to(device), film.to(device), ratings.to(device)
    with torch.no_grad():
      preds = net(users, film)
      loss = loss_module(preds, ratings)
      running_loss += loss.item()
  return running_loss




      
    
  



In [None]:
mod_name = "NCF_Recommender"
torch.seed()
train_model(mod, mod_name)


Epoch 1:   0%|          | 0/720150 [00:00<?, ?it/s]