In [1]:
import numpy as np
import os
import pandas as pd
import torch
from gmf import GMF
from data import *
import torch.nn as nn

In [2]:
from ignite.engine import Engine, Events
from ignite.metrics import Accuracy, Precision, Recall, Loss, RunningAverage
from ignite.contrib.handlers import ProgressBar

In [3]:
USE_GPU = True

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print('using device:', device)

using device: cpu


# Data

In [4]:
path = './data/'

In [5]:
interactions = pd.read_csv(path + 't1_train.csv')
human_feats = np.load(path + 'human_feats.npy')
virus_feats = np.load(path + 'virus_feats.npy')

In [6]:
interactions.head()

Unnamed: 0,node1,node2,edge
0,151,2841,1.0
1,151,2874,1.0
2,151,780,1.0
3,151,1183,1.0
4,155,2346,1.0


In [7]:
human_feats.shape

(7209, 2799)

In [8]:
virus_idxs = sorted(interactions['node1'].unique())
human_idxs = sorted(interactions['node2'].unique())
len(virus_idxs), len(human_idxs)

(172, 5225)

In [9]:
vtoi = {v : i for i, v in enumerate(virus_idxs)}
itov = {i : v for i, v in enumerate(virus_idxs)}
htoi = {h : i for i, h in enumerate(human_idxs)}
itoh = {i : h for i, h in enumerate(human_idxs)}

In [10]:
gen = SingleTaskGenerator(interactions, human_feats, virus_feats, .10)

In [11]:
train_loader = gen.create_train_loader(3)
val_loader = gen.create_val_loader(3)
test_loader = gen.create_test_loader(3)

In [12]:
next(iter(train_loader))

[tensor([[ 142, 3601],
         [  43, 5075],
         [  32,  990]]),
 tensor([[0.8966, 0.9655, 0.6207,  ..., 0.0000, 0.0000, 0.0000],
         [0.9365, 0.6984, 0.6190,  ..., 0.0000, 0.0000, 0.0000],
         [0.6098, 0.7805, 0.5854,  ..., 0.0000, 0.0000, 1.0000]],
        dtype=torch.float64),
 tensor([[0.1111, 0.3333, 0.2222,  ..., 0.0000, 0.0000, 1.0000],
         [0.7258, 0.8871, 0.5000,  ..., 0.0000, 0.0000, 1.0000],
         [0.5000, 0.6290, 0.5806,  ..., 0.0000, 0.0000, 1.0000]],
        dtype=torch.float64),
 tensor([0., 0., 0.], dtype=torch.float64)]

# Model

In [13]:
n_virus, n_human = len(virus_idxs), len(human_idxs)

In [14]:
config = {
    'num_virus': n_virus,
    'num_human': n_human,
    'latent_dim': 2799,
    'sparse': False
}
model = GMF(config)
model.to(device)

GMF(
  (virus): Embedding(172, 2799)
  (human): Embedding(5225, 2799)
)

In [15]:
optimizer = torch.optim.SGD(model.parameters(), 
                            lr = 1e-3,  
                            momentum=0.9, 
                            weight_decay=1e-5)
criterion = nn.MSELoss()

In [16]:
threshhold = .50

### Trainer

In [17]:
debug_loader = gen.create_debug_loader(3)

In [18]:
next(iter(debug_loader))

[tensor([[   1, 2540],
         [ 149, 2780],
         [  35,  143]]),
 tensor([[0.1786, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.0000],
         [0.3409, 0.3636, 0.2727,  ..., 0.0000, 0.0000, 1.0000],
         [0.9577, 0.9155, 0.9014,  ..., 0.0000, 0.0000, 1.0000]],
        dtype=torch.float64),
 tensor([[0.2273, 0.5303, 0.0758,  ..., 0.0000, 0.0000, 1.0000],
         [0.3333, 0.8889, 0.7222,  ..., 0.0000, 0.0000, 1.0000],
         [0.2667, 0.5333, 0.2667,  ..., 0.0000, 0.0000, 1.0000]],
        dtype=torch.float64),
 tensor([0., 0., 0.], dtype=torch.float64)]

In [19]:
def train_batch(engine, batch):
    model.train()
    optimizer.zero_grad()
    
    x_pairs, human_feats, virus_feats, ys = batch
    v_idxs, h_idxs = x_pairs[:,0], x_pairs[:,1]
    pred = model(h_idxs, v_idxs, human_feats, virus_feats)
#     pred = pred_help(pred)
    loss = criterion(pred, ys)
#     print(loss)
    loss.backward()
    optimizer.step()
        
    return loss.item()

In [20]:
RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss')

NameError: name 'trainer' is not defined

In [21]:
pbar = ProgressBar(persist=True)
pbar.attach(trainer, ['loss'])

NameError: name 'trainer' is not defined

### train eval 

In [22]:
trainer = Engine(train_batch)
trainer.run(debug_loader)

<ignite.engine.engine.State at 0x7f078c996278>

# evaluation

In [23]:
trainer = Engine(train_batch)

In [24]:
# round probabilities
def thresholded_output_transform(output):
    y_pred, y = output
    y_pred = torch.round(y_pred)
    return y_pred, y

In [25]:
def eval_fn(engine, batch):
    model.eval()
    with torch.no_grad():
        x_pairs, human_feats, virus_feats, ys = batch
        v_idxs, h_idxs = x_pairs[:,0], x_pairs[:,1]
        pred = model(h_idxs, v_idxs, human_feats, virus_feats)
        return pred, ys

In [26]:
train_evaluator = Engine(eval_fn)

In [27]:
Accuracy(output_transform=thresholded_output_transform).attach(train_evaluator, 'accuracy')
Precision(output_transform=thresholded_output_transform).attach(train_evaluator, 'precision')
Recall(output_transform=thresholded_output_transform).attach(train_evaluator, 'recall')
Loss(criterion).attach(train_evaluator, 'loss')

In [28]:
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(engine):
    train_evaluator.run(train_loader)
    metrics = train_evaluator.state.metrics
    avg_accuracy = metrics['accuracy']
    avg_loss = metrics['loss']
    prec = metrics['precision']
    rec = metrics['recall']
    pbar.log_message(
        "Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f} Prec: {:.2f} Rec: {:.2f}"
        .format(engine.state.epoch, avg_accuracy, avg_loss, prec, rec))

In [29]:
RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss')

In [30]:
pbar = ProgressBar(persist=True)
pbar.attach(trainer, ['loss'])

In [31]:
trainer.run(train_loader)

Epoch [1/1]: [2512/2512] 100%|██████████, loss=1.75e-02 [05:15<00:00]

Training Results - Epoch: 1  Avg accuracy: 0.99 Avg loss: 0.01 Prec: 0.00 Rec: 0.00





<ignite.engine.engine.State at 0x7f078c996ba8>