In [None]:
import numpy as np
import time
from scipy.sparse import coo_matrix, csr_matrix, vstack

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
assert(torch.cuda.is_available())
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

In [None]:
Xtrain, ytrain, Xvalid, yvalid, Xtest, ytest = torch.load('build/extracted.pt')

In [None]:
class Str2idx():
    def __init__(self, myset) -> None:
        self.idxDict = {}
        idx = 0
        for k in myset:
            self.idxDict[k] = idx
            idx += 1

    def __call__(self, query):
        if query in self.idxDict:
            return self.idxDict[query]
        return -1

validGenres = ['Indie', 'Action', 'Casual', 'Adventure', 'Strategy', 'Simulation', 'RPG', 'Sports', 'Massively Multiplayer', 'Racing']
genre2idx = Str2idx(validGenres)

def LabelToIdx(data):
    for idx in range(len(data)):
        data[idx] = [genre2idx(x) for x in data[idx]]
    return data

ytrain, yvalid, ytest = LabelToIdx(ytrain), LabelToIdx(yvalid), LabelToIdx(ytest)

In [None]:
def ToMultiHot(data, numValues=10, bs=4096):
    mat = np.zeros((len(data), numValues))
    for idx in range(len(data)):
        mat[idx, data[idx]] = 1

    res = []
    for idx in range(0, len(data), bs):
        m = mat[idx:idx+bs]
        res.append(torch.from_numpy(m).to(torch.float32).cuda())
    return res

ytrain, yvalid, ytest = ToMultiHot(ytrain), ToMultiHot(yvalid), ToMultiHot(ytest)

In [None]:
def CsrToTorchSparse(csr, bs=4096):
    res = []
    for idx in range(0, csr.shape[0], bs):
        c = csr[idx:idx+bs]
        c = torch.sparse_csr_tensor(c.indptr, c.indices, c.data, c.shape, dtype=torch.float32)
        res.append(c.cuda())
    return res

Xtrain = CsrToTorchSparse(Xtrain)
Xvalid = CsrToTorchSparse(Xvalid)
Xtest = CsrToTorchSparse(Xtest)

In [None]:
class SparseDataset(Dataset):
    """
    Custom Dataset class for scipy sparse matrix
    """
    def __init__(self, data, targets):
        super().__init__()
        self.data = data                # CSR
        self.targets = targets          # Dense
        
    def __getitem__(self, index:int):
        return self.data[index], self.targets[index]

    def __len__(self):
        return len(self.data)
    

In [None]:
train_set = SparseDataset(Xtrain, ytrain)
valid_set = SparseDataset(Xvalid, yvalid)
test_set = SparseDataset(Xtest, ytest)

In [None]:
class MyMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Sequential(nn.Linear(20000, 4096), nn.ReLU())
        self.fc2 = nn.Sequential(nn.Linear(4096, 512), nn.ReLU())
        self.fc3 = nn.Sequential(nn.Linear(512, 10))
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return torch.sigmoid(x)

In [None]:
criterion = nn.BCEWithLogitsLoss().cuda()
bs = 4096

def Jaccard(pred, target, thresh=0.5):
    pred = 1.0 * (pred > thresh)
    u = torch.sum(pred * target, dim=1)
    p = torch.sum(pred, dim=1)
    t = torch.sum(target, dim=1)
    precision = torch.mean(u / p).cpu().item()
    recall = torch.mean(u / t).cpu().item()
    return precision, recall

@torch.no_grad()
def val(model):
    model.eval()
    val_loss = 0.0
    precisions = []
    recalls = []
    for data, target in valid_set:
        pred = model(data)
        loss = criterion(pred, target)
        val_loss += loss.data
        p, r = Jaccard(pred, target)
        precisions.append(p)
        recalls.append(r)
    val_loss = val_loss / len(valid_set)
    precision = np.mean(precisions)
    recall = np.mean(recalls)
    f1 = 2 / (1/precision + 1/recall)
    return val_loss, precision, recall, f1

def train(model, optimizer, epoch, lr_scheduler=None, grad_clip=None):
    start_time = time.time()
    model.train()
    optimizer.zero_grad()
    train_loss = 0
    for data, target in train_set:
        pred = model(data)
        loss = criterion(pred, target)
        train_loss += loss.data
        optimizer.zero_grad()
        loss.backward()
        if grad_clip: 
            nn.utils.clip_grad_value_(model.parameters(), grad_clip)
        optimizer.step()
    if lr_scheduler:
        lr_scheduler.step()
    train_loss = train_loss / len(train_set)
    val_loss, precision, recall, f1 = val(model)
    end_time = time.time()
    msg = f"Epoch: {epoch} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val prec: {precision:.4f} | Val recall: {recall:.4f} | Val F1: {f1:.4f} | time: {end_time - start_time:.1f}"
    print(msg)

In [None]:
# No bias decay 
def create_param_groups(model):
    group_decay = []
    group_no_decay = []
    for m in model.modules():
        if isinstance(m, nn.Linear):
            group_decay.append(m.weight)
            if m.bias is not None:
                group_no_decay.append(m.bias)
        elif isinstance(m, nn.Conv2d):
            group_decay.append(m.weight)
            if m.bias is not None:
                group_no_decay.append(m.bias)
        elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
            if m.weight is not None:
                group_decay.append(m.weight)
            if m.bias is not None:
                group_no_decay.append(m.bias)
    assert(len(list(model.parameters())) == len(group_decay) + len(group_no_decay))
    return [dict(params=group_decay), dict(params=group_no_decay, weight_decay=0.0)]

In [None]:
from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR

EP = 300
model = MyMLP().cuda()
optimizer = optim.SGD(
    create_param_groups(model),
    weight_decay=1e-2,
    lr = 1e-2
)
lr_scheduler = CosineAnnealingLR(optimizer, EP)

for ep in range(EP):
    train(model, optimizer, ep, lr_scheduler, grad_clip=0.1)