In [1]:
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

import numpy as np
import sys
import time
import utils
import my_models
import argparse
import data_loader
import data_loader_imputation
import pandas as pd
import ujson as json
import matplotlib.pyplot as plt

from sklearn import metrics

from ipdb import set_trace
import warnings
warnings.filterwarnings('ignore')

In [2]:
epochs = 20
batch_size = 64
model_name = 'brits'

In [3]:
def train(model):
    optimizer = optim.Adam(model.parameters(), lr = 1e-3)

    data_iter = data_loader.get_loader(batch_size = batch_size)

    aucs = []

    for epoch in range(epochs):
        model.train()

        run_loss = 0.0

        for idx, data in enumerate(data_iter):
            data = utils.to_var(data)

            ret = model.run_on_batch(data, optimizer)
            run_loss += ret['loss'].detach().cpu().numpy()

            if idx % 20 == 0:
                print('\r Progress epoch {}, {:.2f}%, average loss {}'.format(epoch, (idx + 1) * 100.0 / len(data_iter), run_loss / (idx + 1.0)))

        if epoch % 1 == 0:
            auc = evaluate(model, data_iter)
            aucs.append(auc)

    return aucs

In [4]:
def evaluate(model, val_iter):
    model.eval()

    labels_v = []
    preds_v = []
    labels_t = []
    preds_t = []

    evals = []
    imputations = []

    for idx, data in enumerate(val_iter):
        data = utils.to_var(data)
        ret = model.run_on_batch(data, None)

        pred = ret['predictions'].data.cpu().numpy()
        label = ret['labels'].data.cpu().numpy()
        is_train = ret['is_train'].data.cpu().numpy()

        eval_masks = ret['eval_masks'].data.cpu().numpy()
        eval_ = ret['evals'].data.cpu().numpy()
        imputation = ret['imputations'].data.cpu().numpy()

        evals += eval_[np.where(eval_masks == 1)].tolist()
        imputations += imputation[np.where(eval_masks == 1)].tolist()

        # collect test and validation label & prediction
        pred_v = pred[np.where(is_train == 0)]
        label_v = label[np.where(is_train == 0)]
        pred_t = pred[np.where(is_train == 2)]
        label_t = label[np.where(is_train == 2)]

        labels_v += label_v.tolist()
        preds_v += pred_v.tolist()
        labels_t += label_t.tolist()
        preds_t += pred_t.tolist()

    labels_v = np.asarray(labels_v).astype('int32')
    preds_v = np.asarray(preds_v)
    labels_t = np.asarray(labels_t).astype('int32')
    preds_t = np.asarray(preds_t)

    print('AUC of validation {}'.format(metrics.roc_auc_score(labels_v, preds_v)))
    print('AUC of test {}'.format(metrics.roc_auc_score(labels_t, preds_t)))

    evals = np.asarray(evals)
    imputations = np.asarray(imputations)

    print('MAE', np.abs(evals - imputations).mean())
    print('MRE', np.abs(evals - imputations).sum() / np.abs(evals).sum())

    return metrics.roc_auc_score(labels_v, preds_v)

In [5]:
def run():
    torch.manual_seed(3)
    model = getattr(my_models, model_name).Model()

    if torch.cuda.is_available():
        model = model.cuda()

    aucs = train(model)
    print(max(aucs))
    
if __name__ == '__main__':
    run()

 Progress epoch 0, 1.59%, average loss 4.8031415939331055
 Progress epoch 0, 33.33%, average loss 4.548648970467704
 Progress epoch 0, 65.08%, average loss 4.428516853146437
 Progress epoch 0, 96.83%, average loss 4.288227257181386
AUC of validation 0.7654565747349252
AUC of test 0.7872481227378315
MAE 0.5514444579232652
MRE 0.7812946540722783
 Progress epoch 1, 1.59%, average loss 3.814361572265625
 Progress epoch 1, 33.33%, average loss 3.7512806483677457
 Progress epoch 1, 65.08%, average loss 3.678295676301165
 Progress epoch 1, 96.83%, average loss 3.6205627722818345
AUC of validation 0.8101888565806092
AUC of test 0.8330857328075199
MAE 0.4645411852830483
MRE 0.6581688136369842
 Progress epoch 2, 1.59%, average loss 3.2107841968536377
 Progress epoch 2, 33.33%, average loss 3.3390966937655495
 Progress epoch 2, 65.08%, average loss 3.326813058155339
 Progress epoch 2, 96.83%, average loss 3.3011702904935745
AUC of validation 0.8412194907040268
AUC of test 0.8354221814056507
MAE 0

In [3]:
data_iter = data_loader.get_loader(batch_size = batch_size)

In [4]:
for data in data_iter:
    aa = 1
    break
    #print(data['forward']['masks'].size())
    #print(data['labels'].size())

In [5]:
data['forward'].keys()

dict_keys(['values', 'forwards', 'masks', 'deltas', 'evals', 'eval_masks', 'us'])

In [7]:
data['forward']['values'].size()

torch.Size([32, 49, 35])

In [6]:
data['is_train']

tensor([0., 0., 1., 1., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 0., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0.])

In [7]:
data['labels']

tensor([0., 0., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1.])

In [8]:
data['forward']['values'].size()
v =  data['forward']['values']
m = data['forward']['masks']

In [9]:
m[10,:,23]

tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])