# 1. Deps

In [1]:
import numpy as np
import os
import pandas as pd
import torch
import torch.nn as nn

In [2]:
from data import *
from utils import *
from gmf import GMF

In [3]:
USE_GPU = True

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print('using device:', device)

using device: cuda


### ignite

In [4]:
from ignite.engine import Engine, Events
from ignite.metrics import Accuracy, Precision, Recall, Loss, RunningAverage
from ignite.contrib.handlers import ProgressBar

# 2. Data

In [5]:
path = './data/'

In [6]:
M = pd.read_csv(f'{path}full_train.csv')

In [7]:
M.head()

Unnamed: 0,virusUprot,humanUprot,edge
0,P03433,P49736,1.0
1,P03433,P15311,0.0
2,P03433,P11142,0.0
3,P03433,Q86U42,0.0
4,P03433,P33992,1.0


In [8]:
htoi = loadjson(f'{path}htoi.json')
vtoi = loadjson(f'{path}vtoi.json')
itoh = loadjson(f'{path}itoh.json')
itov = loadjson(f'{path}itov.json')

In [9]:
# hfeats = np.loadtxt(f'{path}hfeats.txt')
vfeats = np.loadtxt(f'{path}vfeats.txt')

In [10]:
data_config = {
    'interactions':M,
    'htoi':htoi,
    'vtoi':vtoi,
#     'hfeats':hfeats,
#     'vfeats':vfeats,
    'hfeats':None,
    'vfeats':None,
    'pct_test':.10,
    'device': device
}

In [11]:
gen = ProteinInteractionGenerator(data_config)

using device:  cuda
Found 12192 positives, and 716794 negatives! 0.01672460102114444
--------
9889 in training set, 0.01674744867717341
1036 in val set, 0.15072627231020133
1267 in test set, 0.017380210976830957


In [12]:
loader = gen.create_train_loader(4)

In [13]:
next(iter(loader))

[tensor([318, 170, 116, 371], device='cuda:0'),
 tensor([2135, 3058, 2364,  256], device='cuda:0'),
 tensor([0., 0., 0., 0.], device='cuda:0')]

In [14]:
train_loader = gen.create_train_loader(32)

# Model - normal mf

In [15]:
M['humanUprot'].unique().shape[0]

3299

In [16]:
n_v, n_h = len(vtoi), len(htoi)

In [17]:
latent_dim = vfeats.shape[1]

In [18]:
config = {
    'num_virus': n_v,
    'num_human': n_h,
    'latent_dim': latent_dim,
    'sparse': False # set false for now because some optimizers dont work with sparse
}

In [19]:
model = GMF(config)

In [20]:
optimizer = torch.optim.SGD(model.parameters(), 
                            lr = 1e-3,  
                            momentum=0.9, 
                            weight_decay=1e-5)
criterion = nn.BCELoss()

In [21]:
threshhold = .50

In [22]:
if USE_GPU:
  model.to(device)

In [23]:
print(model)

GMF(
  (virus): Embedding(384, 2794)
  (human): Embedding(3302, 2794)
  (virus_b): Embedding(384, 1)
  (human_b): Embedding(3302, 1)
)


In [24]:
M['humanUprot'].unique().shape

(3299,)

In [25]:
len(htoi)

3302

# Trainer

In [26]:
debug_loader = gen.create_debug_loader(3)

In [27]:
next(iter(debug_loader))

[tensor([ 98, 309, 350], device='cuda:0'),
 tensor([1826, 1754,  797], device='cuda:0'),
 tensor([0., 0., 0.], device='cuda:0')]

In [28]:
def train_batch(engine, batch):    
    model.train()
    optimizer.zero_grad()
    vidxs, hidxs, ys = batch
    pred = model(vidxs, hidxs)
    loss = criterion(pred, ys)
    loss.backward()
    optimizer.step()
        
    return loss.item()

In [30]:
# quick test run
trainer = Engine(train_batch)
trainer.run(debug_loader)

<ignite.engine.engine.State at 0x7ff47186a908>

### Metrics

In [31]:
def eval_fn(engine, batch):
    model.eval()
    with torch.no_grad():
        vs, hs, ys = batch
        y_pred = model(vs, hs)
        return y_pred, ys

In [32]:
train_evaluator = Engine(eval_fn)

In [33]:
def thresholded_output_transform(output):
    y_pred, y = output
    y_pred = torch.round(y_pred)
    return y_pred, y

In [34]:
Accuracy(output_transform=thresholded_output_transform).attach(train_evaluator, 'accuracy')
Precision(output_transform=thresholded_output_transform).attach(train_evaluator, 'precision')
Recall(output_transform=thresholded_output_transform).attach(train_evaluator, 'recall')
Loss(criterion).attach(train_evaluator, 'loss')

In [35]:
trainer = Engine(train_batch)

In [36]:
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(engine):
    train_evaluator.run(train_loader)
    metrics = train_evaluator.state.metrics
    prec = metrics['precision']
    rec = metrics['recall']
    pbar.log_message(
        "\n Training Results - Epoch: {}  Prec: {:.2f} Rec: {:.2f}"
        .format(engine.state.epoch, prec, rec))

In [37]:
# RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss')

In [38]:
pbar = ProgressBar(persist=True)

In [39]:
trainer.run(train_loader, max_epochs=1)

KeyboardInterrupt: 