In [1]:
import networkx as nx
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy
import numpy as np
import math
import pickle
from tqdm import tqdm_notebook
from multiprocessing import Pool

np.set_printoptions(suppress=True)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torch.optim.lr_scheduler import ExponentialLR
from torchvision import datasets, transforms

In [3]:
torch.cuda.set_device(1)

## Model Declaration

In [4]:
class ConvNet(nn.Module):
    def __init__(self, in_dim=256, out_dim=2):
        super(ConvNet, self).__init__()
        
        self.in_dim = in_dim
        self.outdim_en1 = in_dim
        self.outdim_en2 = math.ceil(self.outdim_en1 / 2)
        self.out_dim = out_dim
        
        self.model_conv = nn.Sequential(
            nn.Conv1d(in_channels=in_dim, out_channels=in_dim*2, kernel_size=2),
            nn.BatchNorm1d(in_dim*2),
            nn.ReLU(),
            nn.Conv1d(in_channels=in_dim*2, out_channels=in_dim*4, kernel_size=2),
            nn.BatchNorm1d(in_dim*4),
            nn.ReLU(),
        )
        
        self.model_fc = nn.Sequential(
            nn.Linear(in_features=self.in_dim*4, out_features=self.outdim_en1),
            nn.BatchNorm1d(self.outdim_en1),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(in_features=self.outdim_en1, out_features=self.outdim_en2),
            nn.BatchNorm1d(self.outdim_en2),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(in_features=self.outdim_en2, out_features=self.out_dim),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        x = self.model_conv(x)
        return self.model_fc(x.view(-1, self.in_dim*4))

In [5]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.01, gamma=2, logits=False, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.logits = logits
        self.reduce = reduce
    
    def forward(self, inputs, targets):
        if self.logits:
            BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduce=False)
        else:
            BCE_loss = F.binary_cross_entropy(inputs, targets, reduce=False)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
        
        if self.reduce:
            return torch.mean(F_loss)
        else:
            F_loss

In [6]:
class FocalLoss2(nn.Module):
    def __init__(self, alpha=0.01, gamma_pos=3, gamma_neg=2, logits=False, reduce=True):
        super(FocalLoss2, self).__init__()
        self.alpha = alpha
        self.gamma_pos=3
        self.gamma_neg=2
        self.logits = logits
        self.reduce = reduce
    
    def forward(self, inputs, targets):
        if self.logits:
            BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduce=False)
        else:
            BCE_loss = F.binary_cross_entropy(inputs, targets, reduce=False)
        pt = torch.exp(-BCE_loss)
        gamma_diff = self.gamma_pos - self.gamma_neg
        F_loss_pos = self.alpha * targets * (1-pt)**self.gamma_pos * BCE_loss
        F_loss_pos = torch.mean(pt)**(-gamma_diff) * F_loss_pos
        F_loss_neg = self.alpha * (1 - targets) * (1-pt)**self.gamma_neg * BCE_loss
        F_loss = F_loss_pos + F_loss_neg
        
        avg_F_loss_pos = torch.sum(F_loss_pos) / torch.sum(targets)
        avg_F_loss_neg = torch.sum(F_loss_neg) / torch.sum(1-targets)
        
        if self.reduce:
            return torch.mean(F_loss), avg_F_loss_pos, avg_F_loss_neg
        else:
            return F_loss, F_loss_pos, F_loss_neg

## Virtual Adversarial Training

In [7]:
import contextlib


@contextlib.contextmanager
def _disable_tracking_bn_stats(model):

    def switch_attr(m):
        if hasattr(m, 'track_running_stats'):
            m.track_running_stats ^= True
            
    model.apply(switch_attr)
    yield
    model.apply(switch_attr)

    
def _l2_normalize(d):
    d_reshaped = d.view(d.shape[0], -1, *(1 for _ in range(d.dim() - 2)))
    d /= torch.norm(d_reshaped, dim=1, keepdim=True) + 1e-8
    return d


class VATLoss(nn.Module):

    def __init__(self, xi=1e-6, eps=0.1, ip=1):
        """VAT loss
        :param xi: hyperparameter of VAT (default: 10.0)
        :param eps: hyperparameter of VAT (default: 1.0)
        :param ip: iteration times of computing adv noise (default: 1)
        """
        super(VATLoss, self).__init__()
        self.xi = xi
        self.eps = eps
        self.ip = ip

    def forward(self, model, x):
        with torch.no_grad():
            pred = F.softmax(model(x), dim=1)

        # prepare random unit tensor
        d = torch.rand(x.shape).sub(0.5).to(x.device)
        d = _l2_normalize(d)

        with _disable_tracking_bn_stats(model):
            # calc adversarial direction
            for _ in range(self.ip):
                d.requires_grad_()
                pred_hat = model(x + self.xi * d)
                logp_hat = F.log_softmax(pred_hat, dim=1)
                #adv_distance = F.kl_div(logp_hat, pred, reduction='batchmean')   # for PyTorch v1.0
                adv_distance = F.kl_div(logp_hat, pred)         # for PyTorch v0.4
                adv_distance.backward()
                d = _l2_normalize(d.grad)
                model.zero_grad()
    
            # calc LDS
            r_adv = d * self.eps
            pred_hat = model(x + r_adv)
            logp_hat = F.log_softmax(pred_hat, dim=1)
            #lds = F.kl_div(logp_hat, pred, reduction='batchmean')    # for PyTorch v1.0
            lds = F.kl_div(logp_hat, pred)          # for PyTorch v1.0

        return lds


class VATLoss2(nn.Module):

    def __init__(self, xi=1e-6, eps_pos=100, eps_neg=1., ip=1):
        """VAT loss
        :param xi: hyperparameter of VAT (default: 10.0)
        :param eps_pos: hyperparameter of VAT (default: 100.0)
        :param eps_neg: hyperparameter of VAT (default: 0.1)
        :param ip: iteration times of computing adv noise (default: 1)
        """
        super(VATLoss2, self).__init__()
        self.xi = xi
        self.eps_pos = eps_pos
        self.eps_neg = eps_neg
        self.ip = ip

    def forward(self, model, x, y):
        with torch.no_grad():
            pred = F.softmax(model(x), dim=1)

        # prepare random unit tensor
        d = torch.rand(x.shape).sub(0.5).to(x.device)
        d = _l2_normalize(d)

        with _disable_tracking_bn_stats(model):
            # calc adversarial direction
            for _ in range(self.ip):
                d.requires_grad_()
                pred_hat = model(x + self.xi * d)
                logp_hat = F.log_softmax(pred_hat, dim=1)
                #adv_distance = F.kl_div(logp_hat, pred, reduction='batchmean')   # for PyTorch v1.0
                adv_distance = F.kl_div(logp_hat, pred)         # for PyTorch v0.4
                adv_distance.backward()
                d = _l2_normalize(d.grad)
                model.zero_grad()
    
            # calc LDS
            r_adv = d * (self.eps_pos * y + self.eps_neg * (1-y)).reshape(-1, 1, 1)
            pred_hat = model(x + r_adv)
            logp_hat = F.log_softmax(pred_hat, dim=1)
            #lds = F.kl_div(logp_hat, pred, reduction='batchmean')    # for PyTorch v1.0
            lds = F.kl_div(logp_hat, pred)          # for PyTorch v1.0

        return lds

## Parameters Settings

In [8]:
#
# Classifier
# ---------------------
## focal loss
alpha = 1
gamma_pos = 4
gamma_neg = 2
learn_rate = 1e-6
grad_clip = 0.1

#
# VAT
# ---------------------
vat_xi = 1e-6
vat_eps_pos = 1
vat_eps_neg = 0.01
vat_ip = 1

#
# Training process
# ---------------------
train_batch_size = 128
test_batch_size = 256

max_epochs = 100

## Data Preparation

In [9]:
train_data = np.load('datasets/Training_data_heter.npz', allow_pickle=True)
test_data  = np.load('datasets/Testing_data_heter.npz',  allow_pickle=True)

training_data, training_label, training_announce, training_FILTER = train_data['arr_0'], train_data['arr_1'], train_data['arr_2'], train_data['arr_3']
testing_data,  testing_label,  testing_announce,  testing_FILTER = test_data['arr_0'], test_data['arr_1'], test_data['arr_2'], test_data['arr_3']

X_train = training_data#[(training_announce == 1) & (training_FILTER == 0 )]
y_train = training_label#[(training_announce == 1) & (training_FILTER == 0 )]

X_test  = testing_data#[(testing_announce == 1) & (testing_FILTER == 0 )]
y_test  = testing_label#[(testing_announce == 1) & (testing_FILTER == 0 )]

In [10]:
# data = np.load('GRUArray_and_label_for_NewEmbedding_heter_superv_recur_focal_logisticMF.npz', allow_pickle=True)

# GPUArray = data['arr_0']
# label = data['arr_1']

# GPUArray = GPUArray[-1033905:,:,:]
# label = label[-1033905:]

# X_train, X_test, y_train, y_test = train_test_split(GPUArray, label, random_state=42)
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.FloatTensor(y_train)
y_test = torch.FloatTensor(y_test)

train_data = []
for i in range(len(X_train)):
    train_data.append((X_train[i], y_train[i]))
    
test_data = []
for i in range(len(X_test)):
    test_data.append((X_test[i], y_test[i]))

train_dataloader = DataLoader(train_data, shuffle=True, batch_size=train_batch_size)
test_dataloader = DataLoader(test_data, shuffle=False, batch_size=test_batch_size)

In [11]:
classifier = ConvNet(in_dim=X_train.shape[2], out_dim=2).cuda()
focal_loss = FocalLoss2(alpha, gamma_pos, gamma_neg)
optim_clsfr = optim.Adam(filter(lambda p: p.requires_grad, classifier.parameters()), 
                         lr=learn_rate)

In [12]:
vat_loss2 = VATLoss2(xi=vat_xi, eps_pos=vat_eps_pos, eps_neg=vat_eps_neg, ip=vat_ip)

In [13]:
def train(epoch, dataloader, clip_grad_norm=0):
    label_list = []
    pred_y_list = []
    
    clsf_loss_batch = []
    clsf_loss_pos_batch = []
    clsf_loss_neg_batch = []
    vat_batch = []
    for batch_idx, (data, target) in enumerate(dataloader):
        if data.size()[0] != dataloader.batch_size:
            continue
        data, target = Variable(data.cuda()), Variable(target.cuda())
        data = data.permute(0, 2, 1)
        tmp = target.reshape(-1, 1)
        onehot_target = torch.cat([1-tmp, tmp], dim=1)
        
        #
        # Update classifier on real samples
        #
        optim_clsfr.zero_grad()
        
        vat_kld = vat_loss2(classifier, data, target)
        
        pred_y = classifier(data).squeeze(-1)
        
        clsf_loss, clsf_loss_pos, clsf_loss_neg = focal_loss(pred_y, onehot_target)
        loss = clsf_loss + vat_kld
        
        loss.backward()
        
        # gradient clipping
        if clip_grad_norm > 0:
            torch.nn.utils.clip_grad_norm_(classifier.parameters(), max_norm=clip_grad_norm)
        
        optim_clsfr.step()
        
        #
        # Record the losses
        #
        pred_yy = torch.softmax(pred_y, dim=1)[:, 1]
        vat_batch.append(vat_kld)
        clsf_loss_batch.append(clsf_loss)
        if torch.sum(target) > 0:
            clsf_loss_pos_batch.append(clsf_loss_pos)
        clsf_loss_neg_batch.append(clsf_loss_neg)
        
        label_list += list(target.cpu().detach().numpy())
        pred_y_list += list(pred_yy.cpu().detach().numpy())
        
        #if batch_idx % 2000 == 0:
        #    print('  Idx {} => clsf: {}'.format(batch_idx, clsf_loss))
    
    vat_loss_avg = sum(vat_batch) / len(vat_batch)
    clsf_loss_avg = sum(clsf_loss_batch) / len(clsf_loss_batch)
    clsf_loss_pos_avg = sum(clsf_loss_pos_batch) / len(clsf_loss_pos_batch)
    clsf_loss_neg_avg = sum(clsf_loss_neg_batch) / len(clsf_loss_neg_batch)
    
    return np.array(label_list), np.array(pred_y_list), clsf_loss_avg, clsf_loss_pos_avg, clsf_loss_neg_avg, vat_loss_avg

In [14]:
def infer(dataloader):
    label_list = []
    pred_y_list = []   
    
    clsf_loss_batch = []
    clsf_loss_pos_batch = []
    clsf_loss_neg_batch = []
    for batch_idx, (data, target) in enumerate(dataloader):
        if data.size()[0] != dataloader.batch_size:
            continue
        data, target = Variable(data.cuda()), Variable(target.cuda())
         
        # Update classifier
        
        pred_y = classifier(data.permute(0, 2, 1)).squeeze(-1)
        pred_y = torch.softmax(pred_y, dim=1)[:, 1]
        
        clsf_loss, clsf_loss_pos, clsf_loss_neg = focal_loss(pred_y, target)
        clsf_loss_batch.append(clsf_loss)
        if torch.sum(target) > 0:
            clsf_loss_pos_batch.append(clsf_loss_pos)
        clsf_loss_neg_batch.append(clsf_loss_neg)
        
        label_list += list(target.cpu().detach().numpy())
        pred_y_list += list(pred_y.cpu().detach().numpy())
    
    clsf_loss_avg = sum(clsf_loss_batch) / len(clsf_loss_batch)
    clsf_loss_pos_avg = sum(clsf_loss_pos_batch) / len(clsf_loss_pos_batch)
    clsf_loss_neg_avg = sum(clsf_loss_neg_batch) / len(clsf_loss_neg_batch)
    
    return np.array(label_list), np.array(pred_y_list), clsf_loss_avg, clsf_loss_pos_avg, clsf_loss_neg_avg

In [15]:
def evaluate(y_true, y_pred):
    prec = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    return prec, recall, f1

In [None]:
train_history_loss = []
train_history_auc = []
max_thres = 0.
max_train_auc = 0.


print('Parameter Setting ----------------------------------------------------------------------')
print('Model = VAT + Conv1D')
print('conv1d use activation = {}'.format(True))
print('graph_emdeding = Heter_AutoEncoder')
print('alpha = {}'.format(alpha))
print('gamma_pos = {}'.format(gamma_pos))
print('gamma_neg = {}'.format(gamma_neg))
print('learn_rate = {}'.format(learn_rate))
print('grad_clip = {}'.format(grad_clip))
print('train_batch_size = {}'.format(train_batch_size))
print('test_batch_size = {}'.format(test_batch_size))
print('max_epochs = {}'.format(max_epochs))
print('vat_xi = {}'.format(vat_xi))
print('vat_eps_pos = {}'.format(vat_eps_pos))
print('vat_eps_neg = {}'.format(vat_eps_neg))
print('vat_ip = {}'.format(vat_ip))

print('\n')


for epoch in range(max_epochs):
    print('Epoch {} -------------------------------------------------------------------------'.format(epoch))
    
    classifier.train()
    label_train, pred_y_train, clsf_loss_train, clsf_loss_pos_train, \
        clsf_loss_neg_train, vat_loss_train = train(epoch, train_dataloader, clip_grad_norm=grad_clip)
    
    auc_train = roc_auc_score(label_train, pred_y_train)
    train_history_loss.append(clsf_loss_train)
    train_history_auc.append(auc_train)
    print('    Training => auc: {:.6f}, clsf_pos: {}, clsf_neg: {}, vat_loss: {}'.
          format(auc_train, clsf_loss_pos_train, clsf_loss_neg_train, vat_loss_train))
    thres = np.min(pred_y_train[label_train==1]) - 1e-6
    print("                Threshold is set to {}".format(thres))
    y_predict_bin = np.array(pred_y_train > thres, dtype=int)
    prec_train, recall_train, f1_train = evaluate(label_train, y_predict_bin)
    print('                prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}'.
          format(prec_train, recall_train, f1_train))
    
    if epoch % 1 == 0:
        #
        # Testing
        # ------------------------------------------------------------------------------------                
        with torch.no_grad():
            classifier.eval()
            label_test, pred_y_test, clsf_loss_test, clsf_loss_pos_test, clsf_loss_neg_test = infer(test_dataloader)    
        
        auc = roc_auc_score(label_test, pred_y_test)
        
        print("            Min. Probailities on test set with label 1: {}".
              format(np.min(pred_y_test[label_test==1])))
        y_predict_bin = np.array(pred_y_test > thres, dtype=int)
        prec, recall, f1 = evaluate(label_test, y_predict_bin)
                
        print('    Testing ==> auc: {:.6f}, prec: {:.4f}, rec: {:.4f}, F1score: {:.4f}, clsf_loss: {}'.
              format(auc, prec, recall, f1, clsf_loss_test))
        
        if auc_train > max_train_auc:
            max_train_auc = auc_train if auc_train > max_train_auc else max_train_auc
            torch.save({'epoch': epoch,
                        'model_state_dict': classifier.state_dict(),
                        'optimizer_state_dict': optim_clsfr.state_dict(),
                        'loss': focal_loss,
                       }, 
                       'saved_models/VATConv_heter_clsfr_xi{}_eps{}{}_focal{}{}_BestAUC'.
                       format(int(-math.log10(vat_xi)), int(-math.log10(vat_eps_pos)), 
                              int(-math.log10(vat_eps_neg)), gamma_pos, gamma_neg))
#         if thres > max_thres:
#             max_thres = thres if thres > max_thres else max_thres
#             torch.save({'epoch': epoch,
#                         'model_state_dict': classifier.state_dict(),
#                         'optimizer_state_dict': optim_clsfr.state_dict(),
#                         'loss': focal_loss,
#                        }, 
#                        'saved_models/VATConv_heter_clsfr_xi{}_eps{}{}_focal{}{}_BestThres'.
#                        format(int(-math.log10(vat_xi)), int(-math.log10(vat_eps_pos)), 
#                               int(-math.log10(vat_eps_neg)), gamma_pos, gamma_neg))
        
        

Parameter Setting ----------------------------------------------------------------------
Model = VAT + Conv1D
conv1d use activation = True
graph_emdeding = Heter_AutoEncoder
alpha = 1
gamma_pos = 4
gamma_neg = 2
learn_rate = 1e-06
grad_clip = 0.1
train_batch_size = 128
test_batch_size = 256
max_epochs = 100
vat_xi = 1e-06
vat_eps_pos = 1
vat_eps_neg = 0.01
vat_ip = 1


Epoch 0 -------------------------------------------------------------------------
    Training => auc: 0.902151, clsf_pos: 0.05799376964569092, clsf_neg: 0.06469666957855225, vat_loss: 0.00036723678931593895
                Threshold is set to 0.40893207328224185
                prec: 0.0002, rec: 1.0000, F1score: 0.0005
            Min. Probailities on test set with label 1: 0.41912752389907837
    Testing ==> auc: 0.983554, prec: 0.0001, rec: 1.0000, F1score: 0.0002, clsf_loss: 0.09506320208311081
Epoch 1 -------------------------------------------------------------------------
    Training => auc: 0.999005, clsf_pos: 

                prec: 0.0010, rec: 1.0000, F1score: 0.0020
            Min. Probailities on test set with label 1: 0.27536672353744507
    Testing ==> auc: 0.999371, prec: 0.0270, rec: 0.9348, F1score: 0.0525, clsf_loss: 0.024252744391560555
Epoch 16 -------------------------------------------------------------------------
    Training => auc: 0.999987, clsf_pos: 0.0002690958499442786, clsf_neg: 7.628564162587281e-06, vat_loss: 5.279059223539662e-06
                Threshold is set to 0.28218261735343936
                prec: 0.1416, rec: 1.0000, F1score: 0.2480
            Min. Probailities on test set with label 1: 0.2730822265148163
    Testing ==> auc: 0.955520, prec: 0.6029, rec: 0.8913, F1score: 0.7193, clsf_loss: 0.024065060541033745
Epoch 17 -------------------------------------------------------------------------
    Training => auc: 0.999979, clsf_pos: 0.0002940803824458271, clsf_neg: 7.187012670328841e-06, vat_loss: 4.168924988334766e-06
                Threshold is set to 0

            Min. Probailities on test set with label 1: 0.27043774724006653
    Testing ==> auc: 0.999302, prec: 0.0473, rec: 0.8913, F1score: 0.0898, clsf_loss: 0.02289268933236599
Epoch 32 -------------------------------------------------------------------------
    Training => auc: 0.999997, clsf_pos: 0.00030870907357893884, clsf_neg: 7.891410859883763e-06, vat_loss: 3.3118055853265105e-07
                Threshold is set to 0.27534569637680056
                prec: 0.4291, rec: 1.0000, F1score: 0.6006
            Min. Probailities on test set with label 1: 0.2707546651363373
    Testing ==> auc: 0.999496, prec: 0.0745, rec: 0.8913, F1score: 0.1376, clsf_loss: 0.022894669324159622
Epoch 33 -------------------------------------------------------------------------
    Training => auc: 0.999997, clsf_pos: 0.000272607256192714, clsf_neg: 6.819471309427172e-06, vat_loss: 3.335921121561114e-07
                Threshold is set to 0.27900786535644534
                prec: 0.4291, rec: 1.000

    Testing ==> auc: 0.998380, prec: 0.0972, rec: 0.8913, F1score: 0.1752, clsf_loss: 0.02276485227048397
Epoch 48 -------------------------------------------------------------------------
    Training => auc: 0.999998, clsf_pos: 0.000306069094222039, clsf_neg: 7.81291691964725e-06, vat_loss: 1.317240929665786e-07
                Threshold is set to 0.2754160298576355
                prec: 0.5093, rec: 1.0000, F1score: 0.6749
            Min. Probailities on test set with label 1: 0.26967892050743103
    Testing ==> auc: 0.999350, prec: 0.0728, rec: 0.8913, F1score: 0.1346, clsf_loss: 0.02276068925857544
Epoch 49 -------------------------------------------------------------------------
    Training => auc: 0.999997, clsf_pos: 0.0002831048332154751, clsf_neg: 7.342282060562866e-06, vat_loss: 1.5522482499363832e-07
                Threshold is set to 0.277246965335846
                prec: 0.4308, rec: 1.0000, F1score: 0.6022
            Min. Probailities on test set with label 1: 0.2698

    Training => auc: 0.999994, clsf_pos: 0.00033315233304165304, clsf_neg: 8.580759640608449e-06, vat_loss: 8.944492435603024e-08
                Threshold is set to 0.2734947337379456
                prec: 0.2646, rec: 1.0000, F1score: 0.4184
            Min. Probailities on test set with label 1: 0.2694893181324005
    Testing ==> auc: 0.999390, prec: 0.0746, rec: 0.9130, F1score: 0.1379, clsf_loss: 0.02273397333920002
Epoch 65 -------------------------------------------------------------------------
    Training => auc: 0.999999, clsf_pos: 0.0002197997528128326, clsf_neg: 6.138680419098819e-06, vat_loss: 9.73192726405614e-08
                Threshold is set to 0.2840290798416138
                prec: 0.7219, rec: 1.0000, F1score: 0.8385
            Min. Probailities on test set with label 1: 0.26930513978004456
    Testing ==> auc: 0.998732, prec: 0.3796, rec: 0.8913, F1score: 0.5325, clsf_loss: 0.022724933922290802
Epoch 66 ----------------------------------------------------------