In [1]:
import numpy as np
import os
import pandas as pd
import torch
from gmf import GMF
from data import *
import torch.nn as nn

In [2]:
from ignite.engine import Engine, Events
from ignite.metrics import Accuracy, Precision, Recall
from ignite.contrib.handlers import ProgressBar

In [3]:
USE_GPU = True

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print('using device:', device)

using device: cpu


### Data

In [4]:
path = './data/'

In [5]:
interactions = pd.read_csv(path + 't1_train.csv')
human_feats = np.load(path + 'human_feats.npy')
virus_feats = np.load(path + 'virus_feats.npy')

In [6]:
human_feats.shape

(7209, 2799)

In [7]:
virus_idxs = sorted(interactions['node1'].unique())
human_idxs = sorted(interactions['node2'].unique())
len(virus_idxs), len(human_idxs)

(172, 5225)

In [8]:
vtoi = {v : i for i, v in enumerate(virus_idxs)}
itov = {i : v for i, v in enumerate(virus_idxs)}
htoi = {h : i for i, h in enumerate(human_idxs)}
itoh = {i : h for i, h in enumerate(human_idxs)}

In [9]:
gen = SingleTaskGenerator(interactions, human_feats, virus_feats, .10)

In [10]:
train_loader = gen.create_train_loader(3)
val_loader = gen.create_val_loader(3)
test_loader = gen.create_test_loader(3)

In [11]:
next(iter(train_loader))

[tensor([[  31, 5026],
         [  21, 3962],
         [   0,   82]]),
 tensor([[0.3585, 0.8868, 0.4906,  ..., 0.0000, 0.0000, 0.0000],
         [0.5185, 0.7407, 0.5000,  ..., 0.0000, 0.0000, 0.0000],
         [0.3837, 0.5814, 0.5581,  ..., 0.0000, 0.0000, 1.0000]],
        dtype=torch.float64),
 tensor([[0.9333, 0.5333, 0.3333,  ..., 0.0000, 0.0000, 1.0000],
         [0.9630, 0.5926, 0.8519,  ..., 0.0000, 0.0000, 1.0000],
         [0.9615, 0.6154, 0.8846,  ..., 0.0000, 0.0000, 1.0000]],
        dtype=torch.float64),
 tensor([0, 0, 0])]

### Model

In [12]:
n_virus, n_human = len(virus_idxs), len(human_idxs)

In [13]:
config = {
    'num_virus': n_virus,
    'num_human': n_human,
    'latent_dim': 2799,
    'sparse': False
}
model = GMF(config)
model.to(device)

GMF(
  (virus): Embedding(172, 2799)
  (human): Embedding(5225, 2799)
)

In [14]:
optimizer = torch.optim.SGD(model.parameters(), 
                            lr = 1e-3,  
                            momentum=0.9, 
                            weight_decay=1e-5)
criterion = nn.MSELoss()

In [15]:
threshhold = .50

### Trainer

In [16]:
debug_loader = gen.create_debug_loader(3)

In [17]:
next(iter(debug_loader))

[tensor([[ 142, 1073],
         [   0, 2539],
         [  36, 1831]]),
 tensor([[0.3333, 0.7333, 0.4000,  ..., 0.0000, 0.0000, 1.0000],
         [0.6897, 0.7586, 0.4828,  ..., 0.0000, 0.0000, 1.0000],
         [0.9333, 0.7333, 0.4222,  ..., 0.0000, 0.0000, 1.0000]],
        dtype=torch.float64),
 tensor([[0.1111, 0.3333, 0.2222,  ..., 0.0000, 0.0000, 1.0000],
         [0.9615, 0.6154, 0.8846,  ..., 0.0000, 0.0000, 1.0000],
         [0.9412, 0.9412, 0.8235,  ..., 0.0000, 0.0000, 1.0000]],
        dtype=torch.float64),
 tensor([0, 0, 0])]

In [20]:
def train_batch(engine, batch):
    model.train()
    optimizer.zero_grad()
    
    x_pairs, human_feats, virus_feats, ys = batch
    v_idxs, h_idxs = x_pairs[:,0], x_pairs[:,1]
    pred = model(h_idxs, v_idxs, human_feats, virus_feats)
    print('pred: ', pred)
    loss = criterion(pred, ys)
    loss.backward()
    self.opt.step()

    if self.config['cuda'] is True:
        loss = loss.data.cpu().numpy()
    else:
        loss = loss.data.numpy()[0]
        
    return loss.item()

In [21]:
trainer = Engine(train_batch)
trainer.run(debug_loader)

torch.Size([3, 2799]) torch.Size([3, 2799])
tensor([-0.0139,  0.0118,  0.0093], dtype=torch.float64,
       grad_fn=<SumBackward2>)
pred:  tensor([], size=(0, 0, 0), dtype=torch.float64)


RuntimeError: The size of tensor a (0) must match the size of tensor b (3) at non-singleton dimension 2