In [1]:
from models import *
from datasets import H5Dataset, SparseDataset
from single_model_trainer import Trainer

In [2]:
import pandas as pd

multi_df = pd.read_hdf('data/train_multi_targets.h5', start=1000, stop=2000)
cite_df = pd.read_hdf('data/train_cite_inputs.h5', start=1000, stop=2000)

multi_keys = list(multi_df.keys())
cite_keys = list(cite_df.keys())

for i in range(len(cite_keys)):
    cite_keys[i] = cite_keys[i].split('_')[0]

# inputs
multi_idxs_shared = []
cite_idxs_shared = []
# targets
multi_idxs_not_shared = []
cite_idxs_not_shared = []
for i, s in enumerate(multi_keys):
    if s in cite_keys:
        multi_idxs_shared.append(i)
    else:
        multi_idxs_not_shared.append(i)
for i, s in enumerate(cite_keys):
    if s in multi_keys:
        cite_idxs_shared.append(i)
    else:
        cite_idxs_not_shared.append(i)    

In [11]:
# HYPERPARAMETERS
METHOD = 'cite'  # change this to decide which model to train

alpha = 0.01

batch_size = 512

num_epochs = 20
eval_every = 2
patience = 1000
num_tries = 2

initial_lr = 0.01
lr_decay_period = 10
lr_decay_gamma = 0.7
weight_decay = 0.0001
dropout = 0.01

In [12]:
input_idxs = multi_idxs_shared if METHOD == 'multi' else cite_idxs_shared
target_idxs = multi_idxs_not_shared if METHOD == 'multi' else cite_idxs_not_shared
input_idxs = torch.tensor(input_idxs)
target_idxs = torch.tensor(target_idxs)

train_dataset = H5Dataset('train', METHOD)
val_dataset = H5Dataset('val', METHOD)
test_dataset = H5Dataset('test', METHOD)

num = 3500 if METHOD == 'multi' else 2564

In [13]:
model = LinearCoder([len(input_idxs), len(target_idxs)], dropout=dropout, input_2d=False)
device = torch.device('cuda')

train_dataloader = train_dataset.get_dataloader(batch_size)
val_dataloader = val_dataset.get_dataloader(batch_size)

In [14]:
def sparse_loss(x, num):
    """
    L1 norm of `num` smallest values of `x` .
    Assumes `x` is a `(N, L)` tensor, and loss occurs over the `L` dimension and is then averaged over the `N` dimension.
    If `L >= num`, returns L1 norm of `x`
    """
    n = min(num, x.shape[1])
    vals, _ = x.sort(dim=-1)
    vals = vals[:, :n]
    loss = torch.linalg.norm(vals, ord=1, dim=-1)
    return loss.mean()

class PredLoss():
    def __init__(self, model, num, alpha, input_idxs, target_idxs, method):
        self.model = model
        self.num = num
        self.alpha = alpha
        self.input_idxs = input_idxs
        self.target_idxs = target_idxs
        self.method = method
    
    def loss(self, x, day, y):
        rna = y.to(device) if self.method == 'multi' else x.to(device)
        inputs, targets = rna[:, self.input_idxs], rna[:, self.target_idxs]
        out = self.model(inputs)
        
        loss = F.mse_loss(out, targets) + self.alpha * sparse_loss(out, self.num)
        return loss
        
    def error(self, x, day, y):
        rna = y.to(device) if self.method == 'multi' else x.to(device)
        inputs, targets = rna[:, self.input_idxs], rna[:, self.target_idxs]
        out = self.model(inputs)
        
        error = F.mse_loss(out, targets)
        return error.item()

l = PredLoss(model, num, alpha, input_idxs, target_idxs, METHOD)

In [16]:
trainer = Trainer(model, 'test', l.loss, l.error, train_dataloader, val_dataloader, initial_lr, lr_decay_period, lr_decay_gamma, weight_decay)
trainer.train(num_epochs, eval_every, patience, num_tries)

Using cuda for training
Training the following model for 20 epochs:

LinearCoder(
  (blocks): Sequential(
    (0): LinearBlock(
      (in_linear): Linear(in_features=18386, out_features=3664, bias=True)
      (out_linear): Linear(in_features=3664, out_features=3664, bias=True)
      (activation): ReLU()
      (out_norm): LayerNorm((3664,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.01, inplace=False)
    )
  )
) with 80805856 paramters

-------------------------------------------------------------
------------------  TRAIN - EPOCH NUM 0  -------------------
-------------------------------------------------------------


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 110/110 [01:32<00:00,  1.18it/s]


avg batch training loss for epoch 0: 16.055053

-------------------------------------------------------------
-------------------  VAL - EPOCH NUM 0  -------------------
-------------------------------------------------------------


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:13<00:00,  1.45it/s]


avg validation error for epoch 0: 0.36924


RuntimeError: [enforce fail at ..\caffe2\serialize\inline_container.cc:300] . unexpected pos 1088 vs 1019