In [1]:
import os
import sys
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import tqdm
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

device    = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('the device is %s' % device)

from myLibs.myEval import myEval

the device is cuda


In [2]:
shuffle_seed      = 37
pos_size          = 250
neg_size          = 250
train_size        = 1000
BATCHSIZE        = 500

num_epochs = 10
prefix     = "20210413-test"
print(prefix)

20210413-test


In [4]:


label_feature = pd.read_csv("myData/Train/labeled.csv").values
label_target  = pd.read_csv("myData/Train/true_label.csv").values.reshape(-1)

pos_feature = label_feature[label_target == 1]
pos_label   = label_target[label_target == 1]
np.random.seed(shuffle_seed)
np.random.shuffle(pos_feature)
np.random.seed(shuffle_seed)
np.random.shuffle(pos_label)

neg_feature = label_feature[label_target == 0]
neg_label   = label_target[label_target == 0]
np.random.seed(shuffle_seed)
np.random.shuffle(neg_feature)
np.random.seed(shuffle_seed)
np.random.shuffle(neg_label)

train_x = np.array(list(pos_feature[0:pos_size]) +list(neg_feature[0:neg_size]))
train_y = np.array(list(pos_label[0:pos_size]) + list(neg_label[0:neg_size]))
valid_x = np.array(list(pos_feature[pos_size:]) + list(neg_feature[neg_size:]))
valid_y = np.array(list(pos_label[pos_size:]) + list(neg_label[neg_size:]))

trainDataset = TensorDataset(torch.Tensor(train_x[:, np.newaxis, :]), torch.Tensor(train_y).long())
labeledLoader = DataLoader(dataset=trainDataset, batch_size = BATCHSIZE, shuffle=True)

validDataset = TensorDataset(torch.Tensor(valid_x[:, np.newaxis, :]), torch.Tensor(valid_y).long())
validLoader  = DataLoader(dataset=validDataset, batch_size = BATCHSIZE, shuffle=False)

In [6]:
unlabel_data  = pd.read_csv("myData/Train/unlabel.csv").values
unlabeledDataset = TensorDataset(torch.Tensor(unlabel_data[:, np.newaxis, :]))
unlabeledLoader  = DataLoader(dataset=unlabeledDataset, batch_size=BATCHSIZE, shuffle=True)

In [7]:
from myLibs.myDis import NetD
from myLibs.myGen import NetG
modelCD     = NetD().to(device)
modelG      = NetG().to(device)
criterionC  = nn.CrossEntropyLoss()
optimizerCD = optim.Adam(modelCD.parameters(), lr=0.00095, betas=(0.5, 0.999), weight_decay = .01)
optimizerG  = optim.Adam(modelG.parameters(),  lr=0.00095, betas=(0.5, 0.999), weight_decay = .01)

In [8]:
print(modelCD)
print(modelG)

CNN1d_5layer(
  (cnn1): Sequential(
    (0): Conv1d(1, 10, kernel_size=(3,), stride=(1,))
    (1): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Tanh()
    (3): Dropout(p=0.6, inplace=False)
  )
  (cnn2): Sequential(
    (0): Conv1d(10, 40, kernel_size=(3,), stride=(1,))
    (1): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Tanh()
    (3): Dropout(p=0.6, inplace=False)
  )
  (cnn3): Sequential(
    (0): Conv1d(40, 200, kernel_size=(3,), stride=(1,))
    (1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (linear1): Sequential(
    (0): Linear(in_features=13000, out_features=2, bias=True)
  )
)
NetG(
  (Linear1): Sequential(
    (0): Linear(in_features=30, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): Dropout(p=0.6, inplace=False)
  )
  (line

In [9]:
from tensorboardX import SummaryWriter
writer = SummaryWriter(comment="%s" % prefix)

maxacc = 0


global_step = 1
iter_labeled = iter(labeledLoader)


for epoch in tqdm.tqdm(range(num_epochs)):
    for step, x_unlabeled in enumerate(unlabeledLoader):

        #################################################################################  Classifier/Discriminator 
        modelCD.train()
        modelG.eval()
        
        optimizerCD.zero_grad()
        
        ## label
        try:
            x_labeled, y_labeled = next(iter_labeled)
            x_labeled, y_labeled = x_labeled.to(device), y_labeled.to(device)
        except StopIteration:
            iter_labeled = iter(labeledLoader)
            x_labeled, y_labeled = next(iter_labeled)
            x_labeled, y_labeled = x_labeled.to(device), y_labeled.to(device)
        
        
        _, outClassLabeled  = modelCD(x_labeled)
        lossLabeled      = criterionC(outClassLabeled, y_labeled)
        
        
        ## unlabel
        x_unlabeled = x_unlabeled[0].to(device)
        _, outClassUnlabeled  = modelCD(x_unlabeled)
        
        logz_unlabeled = torch.logsumexp(outClassUnlabeled, dim=1)
        lossUnlabeled  = -0.5 * torch.mean(logz_unlabeled) + 0.5 * torch.mean(F.softplus(logz_unlabeled))
        
        ## Fake
        fakeNoise1       = torch.randn(x_unlabeled.size(0), 30, device=device)
        x_Fake1          = ( modelG(fakeNoise1) + 1.0 ) / 2
        _, outClassFake1 = modelCD(x_Fake1)

        logz_fake1 = torch.logsumexp(outClassFake1, dim=1)
        lossFake  = 0.5 * torch.mean(F.softplus(logz_fake1))
        

        ## loss
        totalLoss = lossLabeled + lossUnlabeled + lossFake
        
        ## optimization
        writer.add_scalar("training_loss/supervised", lossLabeled, global_step)
        writer.add_scalar("training_loss/unsupervised", lossUnlabeled+lossFake, global_step)
        writer.add_scalar("training_loss/D_Loss", totalLoss, global_step)

        totalLoss.backward()
        optimizerCD.step()
        
                
        #################################################################################  Generator
        modelCD.eval()
        modelG.train()
        optimizerG.zero_grad()
        
        fakeNoise2 = torch.randn(x_unlabeled.size(0), 30, device=device)
        x_Fake2    = ( modelG(fakeNoise2) + 1.0 ) / 2
        
        ## loss
        y_pred_unlabeled, _ = modelCD(x_unlabeled)
        y_pred_fake, _      = modelCD(x_Fake2)
        mom_real = torch.mean(y_pred_unlabeled, dim=0)
        mom_fake = torch.mean(y_pred_fake, dim=0)
        diff = mom_fake * 100 - mom_real * 100
        lossG = torch.mean(diff * diff)
        
        
        ## optimization
        writer.add_scalar("training_loss/G_loss", lossG, global_step)
        lossG.backward()        
        optimizerG.step()

        global_step += 1
  
    
    
    train_loss, train_accuracy = myEval(modelCD, criterionC, device, labeledLoader, False)
    writer.add_scalar("accuracy/train", train_accuracy, epoch)

    
    if epoch >= 1000 and train_accuracy > maxacc:
        torch.save(modelCD.state_dict(), './model_save/%s_%s.pt' % (prefix, epoch))
        maxacc = train_accuracy


    
writer.flush()
writer.close()

100%|██████████| 10/10 [01:15<00:00,  7.55s/it]


In [10]:
torch.save(modelCD.state_dict(), './model_save/%s.pt' % prefix)

### 

## the performance of testing data

In [11]:
# from models.model_20210222_CNN3 import model3_1
# modelCD     = model3_1().to(device)
# modelCD.load_state_dict(torch.load('./model_save/20210319-CNN3-net8_train_size4000_batch_size500_1368.pt'))

In [12]:
from sklearn.metrics import confusion_matrix, roc_curve, auc
from sklearn.metrics import f1_score 
import math
import matplotlib.pyplot as plt

def myEval(model, device, test_loader, display = False):
    model.eval()
    
    target_list = []
    output_list = []
    
    with torch.no_grad():
        for testdata in test_loader:
            data, target = testdata
            data, target = data.to(device), target.to(device)
            _, output = model(data)
            softmax2_score = [ math.exp(i[1]) / ( math.exp(i[0]) + math.exp(i[1]) ) for i in output.cpu().numpy() ]
            target_list += target.cpu().tolist()
            output_list += softmax2_score

    return target_list, output_list


def evaluation_df(pred_score, labeled_y):
    def TP_table(pred_score, labeled_y, threshold):
        y_pred = [0 if i < threshold else 1 for i in pred_score]
        y_true = list(labeled_y)


        fpr, tpr, _ = roc_curve(y_true, pred_score)
        auc_val = auc(fpr, tpr)


        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
        # TP TN FP FN sensitivity specificity Accuracy
        sensitivity = tp/(tp+fn)

        specificity = tn/(tn+fp)
        accuracy    = (tp+tn)/(tp+tn+fp+fn)
        
        F1 = f1_score(y_true, y_pred)
        
        try:
            MCC = ((tp*tn)-(fp*fn)) / ((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))**0.5
        except:
            MCC = np.nan

        return [threshold, tp, fp, tn, fn, sensitivity, specificity, accuracy, auc_val, MCC, F1]

    res = []
    for i in range(1,20):
        threshold = i / 20
        res.append(TP_table(pred_score, labeled_y, threshold))

    res = pd.DataFrame(res, columns=['threshold', 'TP', 'FP', 'TN', 'FN', 'sen', 'spe', 'Acc', 'AUC', 'MCC', 'F1'])
    return res


In [13]:
label_feature = pd.read_csv("myData/Test/labeled.csv").values
label_target  = pd.read_csv("myData/Test/true_label.csv").values.reshape(-1)


testDataset = TensorDataset(torch.Tensor(label_feature[:, np.newaxis, :]), torch.Tensor(label_target).long())
testLoader = DataLoader(dataset=testDataset, batch_size = BATCHSIZE, shuffle=True)

label, pred = myEval(modelCD, device, testLoader)
evaluation_df(pred, label)

Unnamed: 0,threshold,TP,FP,TN,FN,sen,spe,Acc,AUC,MCC,F1
0,0.05,1257,3222,1669,78,0.941573,0.341239,0.469965,0.789877,0.258345,0.432405
1,0.1,1243,3076,1815,92,0.931086,0.37109,0.491166,0.789877,0.26905,0.439689
2,0.15,1236,3003,1888,99,0.925843,0.386015,0.501767,0.789877,0.274578,0.443488
3,0.2,1235,2952,1939,100,0.925094,0.396442,0.509798,0.789877,0.281196,0.447302
4,0.25,1228,2906,1985,107,0.91985,0.405847,0.516062,0.789877,0.283001,0.449077
5,0.3,1223,2869,2022,112,0.916105,0.413412,0.521201,0.789877,0.284939,0.450709
6,0.35,1218,2809,2082,117,0.91236,0.42568,0.530035,0.789877,0.290271,0.454308
7,0.4,1216,2773,2118,119,0.910861,0.43304,0.535496,0.789877,0.294177,0.456799
8,0.45,1212,2741,2150,123,0.907865,0.439583,0.539994,0.789877,0.296187,0.458396
9,0.5,1210,2706,2185,125,0.906367,0.446739,0.545294,0.789877,0.299997,0.460865
