In [1]:
import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.parametrizations import orthogonal
from torch.nn import functional as F
# from IPython.display import clear_output

from dataprep import transform_indices, full_preproccessing
from utils import *


def set_random_seed(seed):
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
# %%
# answer = binary matrix (no ratings)
# answer = binary matrix (no ratings)
class MVDataset(Dataset):
    def __init__(self, data, data_description, augment=False):
        useridx = data[data_description['users']].values
        itemidx = data[data_description['items']].values
        feedbackidx = data[data_description['feedback']].values
        values = np.ones(len(itemidx), dtype=np.float32)
        self.n_items = data_description['n_items']
        self.n_ratings = data_description['n_ratings']
        
        self.tensor = torch.sparse_coo_tensor(np.array([useridx, itemidx, feedbackidx-1]), torch.tensor(values),
                                            size=torch.Size((data_description["n_users"], data_description["n_items"], data_description['n_ratings'])))
        self.matrix = torch.sparse_coo_tensor(np.array([useridx, itemidx]), torch.tensor(values),
                                      size=torch.Size((data_description["n_users"], data_description["n_items"])), dtype=torch.float32)
        
        self.augment = augment

    def __len__(self):
        return self.tensor.shape[0]

    def __getitem__(self, idx):
        if self.augment:
            num_noise = np.random.randint(0, int(0.1 * self.tensor.shape[1]))
            idxs = torch.randint(0, self.tensor.shape[1], size=(num_noise,))
            noised_input = self.tensor[idx].detach().clone().to_dense()
            noised_input[idxs] = 0

            itemidx = np.arange(self.tensor.shape[1])
            ratingidx = np.arange(self.tensor.shape[2])
            itemidx, ratingidx = np.meshgrid(itemidx, ratingidx)
            noised_input = torch.sparse_coo_tensor(np.array([itemidx.flatten(), ratingidx.T.flatten(),]),
                                                   noised_input.flatten(),
                                                   size=torch.Size((self.n_items, self.n_ratings,)),
                                                   dtype=torch.float32)
            return noised_input, self.matrix[idx]
        else:
            return self.tensor[idx], self.matrix[idx]

In [2]:
set_random_seed(42)
data = pd.read_csv('ml-1m.csv')
data.rename(columns={'userId': 'userid', 'movieId': 'movieid'}, inplace=True)

# %%
training, testset_valid, holdout_valid, testset, holdout, data_description, data_index = full_preproccessing(data)
# %%
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('device:', device)

There are 6040 users
Filtered 93 invalid observations.
device: cuda:0


In [3]:
def triu_init(m):
    if isinstance(m, nn.Linear):
        with torch.no_grad():
            m.weight.copy_(torch.tril(m.weight))

def get_zero_grad_hook(mask):
    def hook(grad):
        return grad * mask

    return hook

# %%
class varindtriangularAE(nn.Module):
    def __init__(self, n_items, n_ratings, hid1, hid2):
        super(varindtriangularAE, self).__init__()
        self.V = nn.Linear(n_items, hid1, bias=False)
        torch.nn.init.xavier_uniform_(self.V.weight)
        self.VT = nn.Linear(hid1, n_items, bias=False)
        torch.nn.init.xavier_uniform_(self.VT.weight)
        self.W = nn.Linear(n_ratings, hid2, bias=False)
        torch.nn.init.xavier_uniform_(self.W.weight)
        self.WT = nn.Linear(hid2, n_ratings, bias=False)
        torch.nn.init.xavier_uniform_(self.WT.weight)
        self.L = nn.Linear(n_ratings, n_ratings, bias=False)
        torch.nn.init.xavier_uniform_(self.L.weight)
        triu_init(self.L)
        self.LTinv = nn.Linear(n_ratings, n_ratings, bias=False)
        torch.nn.init.xavier_uniform_(self.LTinv.weight)
        triu_init(self.LTinv)

        #         self.norm = nn.LayerNorm(n_ratings)
        self.vec = nn.Linear(n_items, 1)
        torch.nn.init.xavier_uniform_(self.vec.weight)

        self.relu = nn.ReLU()

    def forward(self, input):
        # encode
        x = self.L(input)
        x = self.relu(x)
        x = self.W(x)
        x = self.relu(x)
        xT = torch.transpose(x, -1, -2)
        yT = self.V(xT)
        y = torch.transpose(yT, -1, -2)
        y = self.relu(y)
        # decode
        output = self.WT(y)
        output = self.relu(output)
        output = self.LTinv(output)
        output = self.relu(output)
        outputT = torch.transpose(output, -1, -2)
        outputT = self.VT(outputT)
        output = torch.transpose(outputT, -1, -2)

        #         output = self.relu(output)
        # vec
        inputT = torch.transpose(input, -1, -2)
        rating_layer = self.vec(inputT)
        output = torch.matmul(output, rating_layer).squeeze(-1)
        return output

# %%
def varindtriangular_model(h, data_description, device):
    h1, h2 = h
    ae = varindtriangularAE(data_description['n_items'], data_description['n_ratings'], h1, h2).to(device)
    criterion = nn.BCEWithLogitsLoss().to(device)
    optimizer = optim.Adam(ae.parameters())
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

    mask = torch.tril(torch.ones_like(ae.L.weight))
    ae.L.weight.register_hook(get_zero_grad_hook(mask))

    mask = torch.tril(torch.ones_like(ae.LTinv.weight))
    ae.LTinv.weight.register_hook(get_zero_grad_hook(mask))

    return ae, criterion, optimizer, scheduler

In [4]:
print('Alpha: 3')
h = (512, 5)
training_testing_pipeline_augment(training, testset_valid, holdout_valid, testset, holdout, data_description,
                                  varindtriangular_model, h, device, MVDataset, batch_size=256, tensor_model=True)

Alpha: 3
Hidden sizes: (512, 5)


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.66 GiB (GPU 0; 4.00 GiB total capacity; 137.03 MiB already allocated; 701.60 MiB free; 1.81 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF