# 1. Deps

In [1]:
import numpy as np
import os
import pandas as pd
import torch
import torch.nn as nn

In [2]:
from data import *
from utils import *
from gmf import GMF

In [3]:
USE_GPU = True

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print('using device:', device)

using device: cpu


### ignite

In [4]:
from ignite.engine import Engine, Events
from ignite.metrics import Accuracy, Precision, Recall, Loss, RunningAverage
from ignite.contrib.handlers import ProgressBar

# 2. Data

In [6]:
path = './data/'

In [7]:
M = pd.read_csv(f'{path}full_train.csv')

In [8]:
M.head()

Unnamed: 0,virusUprot,humanUprot,edge
0,P03433,P49736,1.0
1,P03433,P15311,0.0
2,P03433,P11142,0.0
3,P03433,Q86U42,0.0
4,P03433,P33992,1.0


In [9]:
htoi = loadjson(f'{path}htoi.json')
vtoi = loadjson(f'{path}vtoi.json')
itoh = loadjson(f'{path}itoh.json')
itov = loadjson(f'{path}itov.json')

In [10]:
# hfeats = np.loadtxt(f'{path}hfeats.txt')
vfeats = np.loadtxt(f'{path}vfeats.txt')

In [11]:
data_config = {
    'interactions':M,
    'htoi':htoi,
    'vtoi':vtoi,
#     'hfeats':hfeats,
#     'vfeats':vfeats,
    'hfeats':None,
    'vfeats':None,
    'pct_test':.10,
    'device': device
}

In [12]:
gen = ProteinInteractionGenerator(data_config)

using device:  cpu
Found 12192 positives, and 716794 negatives! 0.01672460102114444
--------
9889 in training set, 0.01674744867717341
1036 in val set, 0.15072627231020133
1267 in test set, 0.017380210976830957


In [13]:
loader = gen.create_train_loader(4)

In [14]:
next(iter(loader))

[tensor([274,  31, 227, 334]),
 tensor([1219,  960,  366,  773]),
 tensor([0., 0., 0., 0.])]

In [15]:
train_loader = gen.create_train_loader(32)

# Model - normal mf

In [16]:
M['humanUprot'].unique().shape[0]

3299

In [17]:
n_v, n_h = len(vtoi), len(htoi)

In [18]:
latent_dim = vfeats.shape[1]

In [19]:
config = {
    'num_virus': n_v,
    'num_human': n_h,
    'latent_dim': latent_dim,
    'sparse': False # set false for now because some optimizers dont work with sparse
}

In [20]:
model = GMF(config)

In [21]:
optimizer = torch.optim.SGD(model.parameters(), 
                            lr = 1e-3,  
                            momentum=0.9, 
                            weight_decay=1e-5)
criterion = nn.BCELoss()

In [22]:
threshhold = .50

In [23]:
if USE_GPU:
  model.to(device)

In [24]:
print(model)

GMF(
  (virus): Embedding(384, 2794)
  (human): Embedding(3299, 2794)
  (virus_b): Embedding(384, 1)
  (human_b): Embedding(3299, 1)
)


In [25]:
M['humanUprot'].unique().shape

(3299,)

In [30]:
len(htoi)

3302

# Trainer

In [26]:
debug_loader = gen.create_debug_loader(3)

In [27]:
next(iter(debug_loader))

[tensor([281, 178,  98]), tensor([2808, 1211, 1826]), tensor([0., 0., 0.])]

In [28]:
def train_batch(engine, batch):    
    model.train()
    optimizer.zero_grad()
    vidxs, hidxs, ys = batch
    pred = model(vidxs, hidxs)
    loss = criterion(pred, ys)
    loss.backward()
    optimizer.step()
        
    return loss.item()

In [29]:
# quick test run
trainer = Engine(train_batch)
trainer.run(train_loader)

tensor([1868,  364,  173, 2211, 1326,  891, 2698, 2793, 1529,  489, 3073, 2486,
         928, 1589,  653,  399,  318,  443, 2215, 2327, 1505,  197,  546,  779,
        3241, 2734, 2005, 1351, 1884, 1096, 1966, 3300])


AssertionError: 

### Metrics

In [27]:
def eval_fn(engine, batch):
    model.eval()
    with torch.no_grad():
        vs, hs, ys = batch
        y_pred = model(vs, hs)
        return y_pred, ys

In [28]:
train_evaluator = Engine(eval_fn)

In [29]:
def thresholded_output_transform(output):
    y_pred, y = output
    y_pred = torch.round(y_pred)
    return y_pred, y

In [30]:
Accuracy(output_transform=thresholded_output_transform).attach(train_evaluator, 'accuracy')
Precision(output_transform=thresholded_output_transform).attach(train_evaluator, 'precision')
Recall(output_transform=thresholded_output_transform).attach(train_evaluator, 'recall')
Loss(criterion).attach(train_evaluator, 'loss')

In [31]:
trainer = Engine(train_batch)

In [32]:
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(engine):
    train_evaluator.run(train_loader)
    metrics = train_evaluator.state.metrics
    prec = metrics['precision']
    rec = metrics['recall']
    pbar.log_message(
        "\n Training Results - Epoch: {}  Prec: {:.2f} Rec: {:.2f}"
        .format(engine.state.epoch, prec, rec))

In [33]:
# RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss')

In [34]:
pbar = ProgressBar(persist=True)

In [35]:
trainer.run(train_loader, max_epochs=1)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0')
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0')
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0')
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0')
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0')
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0')
tensor([0., 0., 0., 0., 0., 

RuntimeError: cuda runtime error (59) : device-side assert triggered at /opt/conda/conda-bld/pytorch_1544174967633/work/aten/src/THC/generated/../THCReduceAll.cuh:317