In [25]:
# Library imports
import pyforest
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
from tqdm import tqdm

from turtle import forward
import torch.nn as nn
import torch.nn.functional as F
import math
import torch
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
from torch.utils.data import DataLoader, TensorDataset

# from cf import CollaborativeFiltering
from cf_bce import CollaborativeFiltering

#### **Hyperparams and loading data**

In [2]:
train_edges = np.load('train_edges.npy')
train_edges.shape
users = torch.LongTensor(train_edges[:, 0])
items = torch.LongTensor(train_edges[:, 1])
ratings = torch.FloatTensor(train_edges[:, 2])

# some hyperparams
epochs = 100
lr = 0.01
n_users = 943 
n_items = 1682

#### **MSE Collaborative filtering**

In [16]:
model = CollaborativeFiltering(n_users, n_items, n_factors = 32)
optimizer = torch.optim.Adam(model.parameters(), lr = lr)

model.train()
losses = []

for _ in tqdm(range(epochs)):
    y_hat = model(users, items)
    loss = F.mse_loss(y_hat, ratings)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    losses.append(loss.item())

100%|██████████| 100/100 [00:08<00:00, 11.57it/s]


In [None]:
losses

In [None]:
# Predicting value using trained model
user = 229
item = 818

user = torch.LongTensor([user])
item = torch.LongTensor([item])

model.eval()
y_hat = model(user, item)
print(y_hat)

#### **Understanding how F.nll_loss and nn.BCELoss work**


In [33]:
one = torch.tensor([1])
onef = torch.tensor([1.])
zero = torch.tensor([0])
# zerof = torch.tensor([0.01])
zerof = torch.tensor([0.])
# print(F.nll_loss(zerof, zero))
# print(F.nll_loss(zerof, one))
# print(F.nll_loss(onef, zero))
# print(F.nll_loss(onef, one))
loss = nn.BCELoss()
print(loss(zerof, zerof))
print(loss(zerof, onef))
print(loss(onef, zerof))
print(loss(onef, onef))
# note: Binary cross entropy BCELoss is  better suited for this problem
# loss(y_hat, y) -> position of arguments y_hat and y
# gradients are clamped to [-100, 100] to get finite losses
# BCELoss supports only float y_hat's and y's

tensor(0.)
tensor(100.)
tensor(100.)
tensor(0.)


In [24]:
# if reduction='sum' is not used, the loss will be averaged
loss_fn = nn.BCELoss(reduction = 'sum')
x1 = torch.tensor([1., 1, 0, 1, 1], requires_grad=True)
x2 = torch.tensor([1., 1, 1, 0, 1])
# loss(x1, x2).backward()
loss = loss_fn(x1, x2)
loss.backward()
loss.item()

200.0

#### **Sigmoid NCF Model**

In [7]:
model = CollaborativeFiltering(n_users, n_items, n_factors = 64)
optimizer = torch.optim.Adam(model.parameters(), lr = lr)

losses = []
model.train()
loss_fn = nn.BCELoss(reduction = 'mean')
epochs = 200
n_samples = len(ratings)

for _ in tqdm(range(epochs)):
    y_hat = model(users, items)
    # loss = loss_fn(y_hat, ratings.reshape((-1, 1)))
    loss = loss_fn(y_hat, ratings)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    losses.append(loss.item())

100%|██████████| 200/200 [00:21<00:00,  9.34it/s]


In [4]:
def get_accuracy(y_hat, y):
    y = y.clone().int()
    y_hat = (y_hat.clone() > 0.5).int()
    accuracy = (y == y_hat).sum() / len(y)
    return accuracy.item()

In [8]:
y_hat = model(users, items)
get_accuracy(y_hat, ratings)

0.9840800166130066

In [9]:
losses
# I can't get the BCE loss less than approx 0.463
# thinking to try different n_samples and weight_initialisations
# Seems it's not a good value for BCE loss 
# reference: https://medium.com/swlh/cross-entropy-loss-in-pytorch-c010faf97bab
# now getting loss = 0.088 (~98.5% acc) using cf_bce.CollaborativeFiltering 

[4.054061412811279,
 3.94698166847229,
 3.840118169784546,
 3.742799758911133,
 3.638658046722412,
 3.540802478790283,
 3.448302745819092,
 3.351581573486328,
 3.263144016265869,
 3.1771671772003174,
 3.094047784805298,
 3.0049450397491455,
 2.9228687286376953,
 2.8485872745513916,
 2.7708237171173096,
 2.6924474239349365,
 2.6185224056243896,
 2.545653820037842,
 2.471665382385254,
 2.4020462036132812,
 2.3409500122070312,
 2.27826189994812,
 2.2160863876342773,
 2.15813946723938,
 2.0973310470581055,
 2.0411341190338135,
 1.9861838817596436,
 1.9358195066452026,
 1.885769009590149,
 1.834368109703064,
 1.7865874767303467,
 1.7361736297607422,
 1.689346194267273,
 1.6427218914031982,
 1.598846197128296,
 1.5547336339950562,
 1.5128867626190186,
 1.469881534576416,
 1.4300044775009155,
 1.3910809755325317,
 1.3510043621063232,
 1.3140119314193726,
 1.2771203517913818,
 1.2412101030349731,
 1.2049938440322876,
 1.1701903343200684,
 1.135968565940857,
 1.1019337177276611,
 1.070609211921

In [28]:
# Predicting value using trained model
user = 229
item = 818

user = torch.LongTensor([user])
item = torch.LongTensor([item])

model.eval()
y_hat = model(user, item)
print(y_hat)

tensor([0.0130], grad_fn=<SigmoidBackward0>)


In [74]:
for layer in model.children():
    print(layer)

Embedding(943, 64)
Embedding(1682, 64)
Linear(in_features=128, out_features=1, bias=True)


#### **Use mini-batch gradient descent (different ways to do that)**

In [20]:
train_data = torch.tensor(train_edges)
train_loader = DataLoader(train_data, batch_size=8, shuffle=True)
minibatch = next(iter(train_loader))
print(minibatch)
print(minibatch[:, :2])
print(minibatch[:, 2].)

tensor([[ 710, 1346,    0],
        [ 654, 1606,    1],
        [ 428,   41,    1],
        [ 270,  222,    0],
        [ 502, 1017,    0],
        [ 379,    6,    1],
        [ 458,  171,    1],
        [ 822, 1395,    0]], dtype=torch.int32)
tensor([[ 710, 1346],
        [ 654, 1606],
        [ 428,   41],
        [ 270,  222],
        [ 502, 1017],
        [ 379,    6],
        [ 458,  171],
        [ 822, 1395]], dtype=torch.int32)
tensor([0, 1, 1, 0, 0, 1, 1, 0], dtype=torch.int32)


In [32]:
X_tensor = torch.tensor(train_edges[:, :2])
y_tensor = torch.tensor(train_edges[:, 2])
train_dataset = TensorDataset(X_tensor, y_tensor)
train_loader = DataLoader(dataset = train_dataset, batch_size = 8)
X_batch, y_batch = next(iter(train_loader))
print(X_batch)
print(y_batch)

tensor([[541, 731],
        [845, 174],
        [229, 818],
        [822, 502],
        [166, 654],
        [313, 716],
        [278, 860],
        [404, 925]], dtype=torch.int32)
tensor([1, 1, 0, 1, 1, 1, 0, 0], dtype=torch.int32)


In [36]:
train_dataset = TensorDataset(users, items, ratings)
train_loader = DataLoader(dataset = train_dataset, batch_size = 8)
user_batch, item_batch, rating_batch = next(iter(train_loader))
print(user_batch)
print(item_batch)
print(rating_batch)

tensor([541, 845, 229, 822, 166, 313, 278, 404])
tensor([731, 174, 818, 502, 654, 716, 860, 925])
tensor([1., 1., 0., 1., 1., 1., 0., 0.])


#### **Collaborative filtering with minibatch training**

In [40]:
model = CollaborativeFiltering(n_users, n_items, n_factors = 64)
optimizer = torch.optim.Adam(model.parameters(), lr = lr)

train_dataset = TensorDataset(users, items, ratings)
train_loader = DataLoader(dataset = train_dataset, batch_size = 2048, shuffle = True)

losses = []
model.train()
loss_fn = nn.BCELoss(reduction = 'mean')
epochs = 20
n_samples = len(ratings)

for _ in tqdm(range(epochs)):
    for user_batch, item_batch, rating_batch in train_loader:
        y_hat = model(user_batch, item_batch)
        loss = loss_fn(y_hat, rating_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    losses.append(loss.item())

100%|██████████| 20/20 [01:28<00:00,  4.42s/it]


References for minibatch code:
- https://towardsdatascience.com/understanding-pytorch-with-an-example-a-step-by-step-tutorial-81fc5f8c4e8e
- https://pytorch.org/docs/stable/data.html

Useful PyTorch tutorial links:
- https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html

In [41]:
losses

[3.184617519378662,
 1.9821864366531372,
 1.3081116676330566,
 0.8630186915397644,
 0.5753305554389954,
 0.4868320822715759,
 0.37049880623817444,
 0.3189575970172882,
 0.28166326880455017,
 0.25819700956344604,
 0.2314123809337616,
 0.208090141415596,
 0.197652205824852,
 0.24252861738204956,
 0.13589927554130554,
 0.14347687363624573,
 0.11966618150472641,
 0.11535950750112534,
 0.10997869074344635,
 0.09858120232820511]