In [1]:
# Library imports
import pyforest
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
from tqdm import tqdm
from pprint import pprint
from time import sleep
import time
import seaborn as sns
from sklearn.metrics import roc_auc_score

from turtle import forward
import torch.nn as nn
import torch.nn.functional as F
import math
import torch
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
from torch.utils.data import DataLoader, TensorDataset

#### **Load dataset**

In [2]:
dataset = 'movielens'

train_edges = np.load('data/' + dataset + '/train_edges.npy')
test_edges = np.load('data/' + dataset + '/test_edges.npy')

user_list_train = train_edges[:, 0]
user_list_test = test_edges[:, 0]
item_list_train = train_edges[:, 1]
item_list_test = test_edges[:, 1]
rating_list_train = train_edges[:, 2].astype('float32')
rating_list_test = test_edges[:, 2].astype('float32')

n_users = max(user_list_train.max(), user_list_test.max()) + 1 
n_items = max(item_list_train.max(), item_list_test.max()) + 1
n_samples_train = len(rating_list_train)
n_samples_test = len(rating_list_test)

n_users, n_items, n_samples_train, n_samples_test

(943, 1682, 159619, 40381)

#### **Defining collaborative filtering**

In [3]:
class CollaborativeFiltering(Module):
    def __init__(self, n_users, n_items, n_factors):
        super(CollaborativeFiltering, self).__init__()
        self.user_emb = nn.Embedding(n_users, n_factors)
        self.item_emb = nn.Embedding(n_items, n_factors)

    def forward(self, user, item):
        drop_u = nn.Dropout(p = 0.3)
        drop_i = nn.Dropout(p = 0.3)
        u = self.user_emb(user)
        i = self.item_emb(item)
        u = drop_u(u)
        i = drop_i(i)
        dot = (u * i).sum(1)
        return torch.sigmoid(dot)

class CF1(Module):
    def __init__(self, n_users, n_items, n_factors):
        super(CF1, self).__init__()
        self.user_emb = nn.Embedding(n_users, n_factors)
        self.item_emb = nn.Embedding(n_items, n_factors)
        self.fc = nn.Linear(n_factors * 3, 1)

    def forward(self, user, item):
        drop_u = nn.Dropout(p = 0.3)
        drop_i = nn.Dropout(p = 0.3)
        u = self.user_emb(user)
        i = self.item_emb(item)
        u = drop_u(u)
        i = drop_i(i)
        dot = (u * i)
        x = torch.concat([u, i, dot], dim = 1)
        # x = torch.sigmoid(x)
        x = self.fc(x)
        return torch.sigmoid(x)

class CF2(Module):
    def __init__(self, n_users, n_items, n_factors):
        super(CF2, self).__init__()
        self.user_emb = nn.Embedding(n_users, n_factors)
        self.item_emb = nn.Embedding(n_items, n_factors)

    def forward(self, user, item):
        drop_u = nn.Dropout(p = 0.3)
        drop_i = nn.Dropout(p = 0.3)
        u = self.user_emb(user)
        i = self.item_emb(item)
        u = drop_u(u)
        i = drop_i(i)
        dot = (u * i)
        sum = u + i + dot
        return torch.sigmoid(sum.sum(dim = 1))

class CF4(Module):
    def __init__(self, n_users, n_items, n_factors):
        super(CF4, self).__init__()
        self.user_emb = nn.Embedding(n_users, n_factors)
        self.item_emb = nn.Embedding(n_items, n_factors)
        self.user_bias = nn.Embedding(n_users, 1)
        self.item_bias = nn.Embedding(n_items, 1)

    def forward(self, user, item):
        drop_u = nn.Dropout(p = 0.2)
        drop_i = nn.Dropout(p = 0.2)
        u = self.user_emb(user)
        i = self.item_emb(item)
        u = drop_u(u)
        i = drop_i(i)
        dot = (u * i)
        bias_u = self.user_bias(user).squeeze()
        bias_i = self.item_bias(item).squeeze()
        total = dot.sum(dim = 1) + bias_u + bias_i
        # total = dot.sum(dim = 1)
        return torch.sigmoid(total)

class NCF(Module):
    def __init__(self, n_users, n_items, n_factors):
        super(NCF, self).__init__()
        self.user_emb = nn.Embedding(n_users, n_factors)
        self.item_emb = nn.Embedding(n_items, n_factors)
        self.fc1 = nn.Linear(n_factors * 2, n_factors)
        self.fc2 = nn.Linear(n_factors, 1)

    def forward(self, user, item):
        tanh = nn.Tanh()
        sigmoid = nn.Sigmoid()
        swish = nn.SiLU()

        u = self.user_emb(user)
        i = self.item_emb(item)
        x = torch.concat([u, i], dim = 1)
        x = swish(x)
        x = self.fc1(x)
        x = swish(x)
        x = self.fc2(x)
        x = sigmoid(x)
        return x

def get_accuracy(y_hat, y):
    y = y.clone().int()
    y_hat = (y_hat.clone() > 0.5).int()
    accuracy = (y == y_hat).sum() / len(y)
    return accuracy.item()

#### **Test collaborative filtering on unseen data**

In [6]:
use_gpu = 6
if use_gpu == -1:
    device = 'cpu'
else:
    device = torch.device('cuda:{}'.format(str(use_gpu)) if torch.cuda.is_available() else 'cpu')

users = torch.tensor(user_list_train, device = device)
items = torch.tensor(item_list_train, device = device)
ratings = torch.tensor(rating_list_train, device = device, requires_grad = True)

users_test = torch.tensor(user_list_test, device = device)
items_test = torch.tensor(item_list_test, device = device)
ratings_test = torch.tensor(rating_list_test, device = device)

# ratings = ratings.reshape((n_samples_train, 1))
# ratings_test = ratings_test.reshape((n_samples_test, 1))

In [42]:
device

device(type='cuda', index=1)

In [5]:
# lr = 1 and T = 1000, Adam giving 77% accuracy on test data
# lr = 1.1 and T = 500, Adam giving 77.2% accuracy on test data
# lr = 1.4 and T = 250, Adam giving 76.8% accuracy on test data
# lr = 1.8 and T = 250, Adam giving 77.2% accuracy on test data
# lr = 1.6 and T = 250, Adam giving 77% accuracy on test data but making meta-gradients infinite
# lr = 500, T = 300, SGD, Dropout = 0.3 gives 75.58% accuracy on test data
# lr = 1000, T = 300, SGD, Dropout = 0.3, seed = 0 gives 76% accuracy on test data
# lr = 1000, T = 300, SGD, Dropout = 0.3, seed = 50 gives 76.18% accuracy on test data

n_factors = 64
T = 300
seed = 50

# for lr in list(range(1, 500, 5)):
for lr in [1000]:

    # model = CF4(n_users, n_items, n_factors)
    model = CollaborativeFiltering(n_users, n_items, n_factors)
    model.to(device)

    torch.manual_seed(seed)
    for layer in model.children():
        layer.reset_parameters()
    # optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
    optimizer = torch.optim.SGD(model.parameters(), lr = lr)
    loss_fn = torch.nn.BCELoss(reduction = 'mean')

    model.train()
    for _ in range(T):
        y_hat = model(users, items)
        loss = loss_fn(y_hat, ratings)
        optimizer.zero_grad()
        loss.backward(retain_graph = True)
        # loss.backward()
        optimizer.step()

    model.eval()
    y_hat = model(users, items)
    print('lr: ', lr)
    print('Training accuracy: ', get_accuracy(y_hat, ratings))
    y_pred = y_hat.detach().clone().to('cpu').numpy()
    y = ratings.detach().clone().to('cpu').numpy()
    print('Training AUC: ', roc_auc_score(y, y_pred))
    print()
    loss = loss_fn(y_hat, ratings)
    meta_grad = torch.autograd.grad(loss, ratings)[0]
    print('max meta grad: ', meta_grad.max())
    print('min meta grad: ', meta_grad.min())
    print(meta_grad)
    print()
    y_hat = model(users_test, items_test)
    print('Testing accuracy: ', get_accuracy(y_hat, ratings_test))
    y_pred = y_hat.detach().clone().to('cpu').numpy()
    y = ratings_test.detach().clone().to('cpu').numpy()
    print('Testing AUC: ', roc_auc_score(y, y_pred))
    print()


lr:  1000
Training accuracy:  0.9091523885726929
Training AUC:  0.972411446309926

max meta grad:  tensor(0.0003, device='cuda:6')
min meta grad:  tensor(-inf, device='cuda:6')
tensor([ 3.1337e-06, -1.8015e-05, -4.1825e-05,  ..., -2.6589e-06,
         5.7238e-05,  1.4330e-06], device='cuda:6')

Testing accuracy:  0.7618434429168701
Training AUC:  0.8365536504587607



In [14]:
# Double factors 

# lr = 1 and T = 1000, Adam giving 77% accuracy on test data
# lr = 1.1 and T = 500, Adam giving 77.2% accuracy on test data
# lr = 1.4 and T = 250, Adam giving 76.8% accuracy on test data
# lr = 1.8 and T = 250, Adam giving 77.2% accuracy on test data
# lr = 1.6 and T = 250, Adam giving 77% accuracy on test data but making meta-gradients infinite
# lr = 500, T = 300, SGD, Dropout = 0.3 gives 75.58% accuracy on test data
# lr = 1000, T = 300, SGD, Dropout = 0.3, seed = 0 gives 76% accuracy on test data
# lr = 1000, T = 300, SGD, Dropout = 0.3, seed = 50 gives 76.18% accuracy on test data

n_factors = 128
T = 300
seed = 0

# for lr in list(range(1, 500, 5)):
for lr in [1000]:

    # model = CF4(n_users, n_items, n_factors)
    model = CollaborativeFiltering(n_users, n_items, n_factors)
    model.to(device)

    torch.manual_seed(seed)
    for layer in model.children():
        layer.reset_parameters()
    # optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
    optimizer = torch.optim.SGD(model.parameters(), lr = lr)
    loss_fn = torch.nn.BCELoss(reduction = 'mean')

    model.train()
    for _ in range(T):
        y_hat = model(users, items)
        loss = loss_fn(y_hat, ratings)
        optimizer.zero_grad()
        loss.backward(retain_graph = True)
        # loss.backward()
        optimizer.step()

    model.eval()
    y_hat = model(users, items)
    print('lr: ', lr)
    print('Training accuracy: ', get_accuracy(y_hat, ratings))
    y_pred = y_hat.detach().clone().to('cpu').numpy()
    y = ratings.detach().clone().to('cpu').numpy()
    print('Training AUC: ', roc_auc_score(y, y_pred))
    print()
    loss = loss_fn(y_hat, ratings)
    meta_grad = torch.autograd.grad(loss, ratings)[0]
    print('max meta grad: ', meta_grad.max())
    print('min meta grad: ', meta_grad.min())
    print(meta_grad)
    print()
    y_hat = model(users_test, items_test)
    print('Testing accuracy: ', get_accuracy(y_hat, ratings_test))
    y_pred = y_hat.detach().clone().to('cpu').numpy()
    y = ratings_test.detach().clone().to('cpu').numpy()
    print('Testing AUC: ', roc_auc_score(y, y_pred))
    print()


lr:  1000
Training accuracy:  0.9477066993713379
Training AUC:  0.9897268408765496

max meta grad:  tensor(0.0004, device='cuda:6')
min meta grad:  tensor(-inf, device='cuda:6')
tensor([ 1.5794e-05, -3.6377e-05, -5.2107e-05,  ..., -1.5311e-07,
         4.8483e-05,  7.0595e-05], device='cuda:6')

Testing accuracy:  0.7502290606498718
Testing AUC:  0.8177104337373045



In [13]:
n_factors = 64
print(n_factors)
print(n_factors // 2)

64
32


In [17]:
# Half factors 

# lr = 1 and T = 1000, Adam giving 77% accuracy on test data
# lr = 1.1 and T = 500, Adam giving 77.2% accuracy on test data
# lr = 1.4 and T = 250, Adam giving 76.8% accuracy on test data
# lr = 1.8 and T = 250, Adam giving 77.2% accuracy on test data
# lr = 1.6 and T = 250, Adam giving 77% accuracy on test data but making meta-gradients infinite
# lr = 500, T = 300, SGD, Dropout = 0.3 gives 75.58% accuracy on test data
# lr = 1000, T = 300, SGD, Dropout = 0.3, seed = 0 gives 76% accuracy on test data
# lr = 1000, T = 300, SGD, Dropout = 0.3, seed = 50 gives 76.18% accuracy on test data

n_factors = 32
T = 300
seed = 0

# for lr in list(range(1, 500, 5)):
for lr in [1000]:

    # model = CF4(n_users, n_items, n_factors)
    model = CollaborativeFiltering(n_users, n_items, n_factors)
    model.to(device)

    torch.manual_seed(seed)
    for layer in model.children():
        layer.reset_parameters()
    # optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
    optimizer = torch.optim.SGD(model.parameters(), lr = lr)
    loss_fn = torch.nn.BCELoss(reduction = 'mean')

    model.train()
    for _ in range(T):
        y_hat = model(users, items)
        loss = loss_fn(y_hat, ratings)
        optimizer.zero_grad()
        loss.backward(retain_graph = True)
        # loss.backward()
        optimizer.step()

    model.eval()
    y_hat = model(users, items)
    print('lr: ', lr)
    print('Training accuracy: ', get_accuracy(y_hat, ratings))
    y_pred = y_hat.detach().clone().to('cpu').numpy()
    y = ratings.detach().clone().to('cpu').numpy()
    print('Training AUC: ', roc_auc_score(y, y_pred))
    print()
    loss = loss_fn(y_hat, ratings)
    meta_grad = torch.autograd.grad(loss, ratings)[0]
    print('max meta grad: ', meta_grad.max())
    print('min meta grad: ', meta_grad.min())
    print(meta_grad)
    print()
    y_hat = model(users_test, items_test)
    print('Testing accuracy: ', get_accuracy(y_hat, ratings_test))
    y_pred = y_hat.detach().clone().to('cpu').numpy()
    y = ratings_test.detach().clone().to('cpu').numpy()
    print('Testing AUC: ', roc_auc_score(y, y_pred))
    print()


lr:  1000
Training accuracy:  0.8688000440597534
Training AUC:  0.9457508614463884

max meta grad:  tensor(0.0002, device='cuda:6')
min meta grad:  tensor(-inf, device='cuda:6')
tensor([-5.5593e-07, -1.4034e-05, -4.0434e-05,  ..., -3.7553e-06,
         1.1104e-05,  1.5798e-05], device='cuda:6')

Testing accuracy:  0.7748198509216309
Testing AUC:  0.8519723553800107



In [8]:
model

CollaborativeFiltering(
  (user_emb): Embedding(943, 128)
  (item_emb): Embedding(1682, 128)
)

#### **Exploring AUC metric for evaluating CF model**

In [35]:
# So far, using AUROC instead of accuracy seems promising

#### **AUC for CF2**

In [22]:
y_hat = model(users_test, items_test)
y_pred = y_hat.detach().clone().to('cpu').numpy()
y = ratings_test.detach().clone().to('cpu').numpy()
print(y_pred)
print(y)
roc_auc_score(y, y_pred)

[1.0000000e+00 6.9827717e-03 2.9729408e-06 ... 5.6153840e-01 9.9999976e-01
 9.9999976e-01]
[1. 1. 1. ... 1. 1. 1.]


0.8441157242039155

In [23]:
y_hat = model(users, items)
y_pred = y_hat.detach().clone().to('cpu').numpy()
y = ratings.detach().clone().to('cpu').numpy()
print(y_pred)
print(y)
roc_auc_score(y, y_pred)

[1.1213294e-06 9.9984813e-01 1.0000000e+00 ... 1.9173597e-01 3.6412087e-09
 8.8454279e-12]
[0. 1. 1. ... 1. 0. 0.]


0.9632291366539395

#### **AUC for original CF**

In [25]:
y_hat = model(users_test, items_test)
y_pred = y_hat.detach().clone().to('cpu').numpy()
y = ratings_test.detach().clone().to('cpu').numpy()
print(y_pred)
print(y)
roc_auc_score(y, y_pred)

[0.90360653 0.7383278  0.7618074  ... 0.9854893  0.85157144 0.9285307 ]
[1. 1. 1. ... 1. 1. 1.]


0.8366609955350572

In [26]:
y_hat = model(users, items)
y_pred = y_hat.detach().clone().to('cpu').numpy()
y = ratings.detach().clone().to('cpu').numpy()
print(y_pred)
print(y)
roc_auc_score(y, y_pred)

[2.9852560e-01 9.3463689e-01 9.8911220e-01 ... 8.1000441e-01 4.7213207e-06
 5.9991311e-02]
[0. 1. 1. ... 1. 0. 0.]


0.9726644336621912

#### **AUC for CF4**

In [71]:
y_hat = model(users_test, items_test)
y_pred = y_hat.detach().clone().to('cpu').numpy()
y = ratings_test.detach().clone().to('cpu').numpy()
print(y_pred)
print(y)
roc_auc_score(y, y_pred)

[0.98724735 0.96072394 0.62975526 ... 0.90863013 0.9980566  0.99219877]
[1. 1. 1. ... 1. 1. 1.]


0.8266789252244737

In [72]:
y_hat = model(users, items)
y_pred = y_hat.detach().clone().to('cpu').numpy()
y = ratings.detach().clone().to('cpu').numpy()
print(y_pred)
print(y)
roc_auc_score(y, y_pred)

[0.58443964 0.94244504 0.65263903 ... 0.98390967 0.01553843 0.00244195]
[0. 1. 1. ... 1. 0. 0.]


0.9771650815917055