In [25]:
cd BiS495/GNN

[Errno 2] No such file or directory: 'BiS495/GNN'
/work/home/nhkim/BiS495/GNN


In [26]:
import os
import logging
import numpy as np
import pandas as pd
import random
import adabound
import pickle
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import roc_auc_score

import torch

# Env
from utils import *
from model_GAT import *
from options import parse_args
from test_model import test

In [27]:
opt = parse_args()
# opt.lin_input_dim = 958
# opt.lin_input_dim = 1437
# opt.lin_input_dim = 1916
opt.act_type = 'SM'
opt.optimizer_type = 'adabound'
opt.num_epochs = 100
opt.lr = 0.00005

device = torch.device('cpu')

----------------- Options ---------------
                 act_type: none                          
               adj_thresh: 0.05                          
                    alpha: 0.2                           
               batch_size: 32                            
                  cnv_dim: 0                             
                  dropout: 0.2                           
                 final_lr: 0.1                           
                  gpu_ids: 3,4,5                         
                input_dim: 1                             
                label_dim: 2                             
               lambda_cox: 1                             
               lambda_nll: 1                             
               lambda_reg: 0.0003                        
            lin_input_dim: 958                           
                       lr: 0.0001                        
                lr_policy: linear                        
                model_dir: ./p

In [28]:
def acc_test(out, lb):
    score = 0
    out = out.flatten()
    # print(out)
    out = np.where(out > 0.5, 1.0, 0.0)
    lb = np.array(lb.flatten())
    # print(lb)
    score += np.sum(out == lb)
    return score

In [29]:
def auroc_score(out, lb):
    out = out.flatten()
    lb = np.array(lb.flatten())
    # print(out)
    # print(lb)
    if isinstance(out, torch.Tensor):
        out = out.detach().cpu().numpy()
    if isinstance(lb, torch.Tensor):
        lb = lb.detach().cpu().numpy()
    # roc_auc_score는 실제 레이블과 예측 확률을 받아 AUROC 값을 반환
    auroc = roc_auc_score(lb, out)
    
    return auroc

In [30]:
def preprocess(labels):
    n = len(labels)
    one_hot = torch.zeros(n, 2)

    for i in range(n):
        if labels[i] == 0:
            one_hot[i] = torch.tensor([1, 0])
        elif labels[i] == 1:
            one_hot[i] = torch.tensor([0, 1])
    
    return one_hot

In [31]:
class MLP(nn.Module):
    def __init__(self, input_dim=479, hidden_dim=[128, 64, 32, 8], output_dim=2):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim[0])
        self.fc2 = nn.Linear(hidden_dim[0], hidden_dim[1])
        self.fc3 = nn.Linear(hidden_dim[1], hidden_dim[2])
        self.fc4 = nn.Linear(hidden_dim[2], hidden_dim[3])
        self.fc5 = nn.Linear(hidden_dim[3], output_dim)
        self.relu = nn.LeakyReLU(0.01)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.relu(self.fc4(x))
        x = self.fc5(x)
        x = torch.softmax(x, dim=-1)

        return x

In [32]:
def initialize_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        nn.init.zeros_(m.bias)

In [33]:
def train_model(train_loader):
    model = MLP()
    model.to(device)
    model.apply(initialize_weights)
    max_acc = 0
    all_outs = np.array([], dtype=np.int64)
    all_labels = np.array([], dtype=np.int64)
    optimizer = torch.optim.Adam(params = model.parameters(), lr = opt.lr)
    criterion = torch.nn.CrossEntropyLoss()
    
    for epoch in range(opt.num_epochs):
        model.train()
        running_loss = 0.0
        running_acc = 0.0
        running_auroc = 0.0
        
        for i, (features, labels) in enumerate(train_loader):
            features = features.float().to(device)
            labels = labels.float().to(device)

            optimizer.zero_grad()

            features = features.view(features.size(0), -1).to(device)
            
            outputs = model(features)

            # print('outputs: ', outputs)
            # print('labels: ', labels)

            labels_change = preprocess(labels)

            loss = criterion(outputs, labels_change)
            # loss.requires_grad = True
            
            #outs = torch.sigmoid(outputs)

            outs = (outputs[:, 0] < 0.5).long()

            acc = acc_test(outs, labels)
            
            all_outs = np.concatenate((all_outs, outs))
            all_labels = np.concatenate((all_labels, labels.view(-1)))

            # print('loss: ', loss)
            # print('acc: ' , acc)
            # print('auroc: ', auroc)
            # print('grad: ', gradients)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            running_acc += acc.item()
        
        for param_group in optimizer.param_groups:
            current_lr = param_group['lr']
            # print(f"Current Learning Rate: {current_lr}")

        auroc = auroc_score(all_outs, all_labels)

        print(f'Epoch [{epoch+1}/{opt.num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Acc: {running_acc/len(train_loader):.4f}, Auroc: {auroc:.4f}')
        # print('outputs: ', outputs.view(-1))
        # print('labels: ', labels.view(-1))
        if running_acc > max_acc:
            best_model = model
            max_acc = running_acc

    print('Training complete')

    return best_model

In [34]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for i, (features, labels) in enumerate(test_loader):
            features = features.float().to(device)
            features = features.view(features.size(0), -1).to(device)
            
            probs = model(features)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
            print(len(predictions))
            
    return predictions

In [35]:
def test_model(best_model, test_loader, te_labels):
    model = best_model
    preds_real = inference(model, test_loader, device)

    preds_real = np.array(preds_real)
    preds = (preds_real[:, 0] < 0.5).astype(int)
    # print(preds_real)
    # print(len(preds_real))
    # print(preds)
    # print(te_labels.view(-1))

    acc = acc_test(preds, te_labels)
    auroc = auroc_score(preds, te_labels)

    print(f'Acc: {acc/len(te_labels):.4f}, Auroc: {auroc:.4f}')

    return acc

In [36]:
acc_dnn = 0

tr_features, tr_labels, te_features, te_labels, adj_matrix = load_csv_data(4, opt)
train_dataset = TensorDataset(tr_features, tr_labels)
train_loader = DataLoader(dataset=train_dataset, batch_size=opt.batch_size, shuffle=True)
te_dataset = TensorDataset(te_features, te_labels)
test_loader = DataLoader(te_dataset, batch_size=opt.batch_size, shuffle=False)

model = MLP()
device = torch.device('cpu')
model.to(device)
model.apply(initialize_weights)

best_model = train_model(train_loader)
acc_dnn = acc_dnn + test_model(best_model, test_loader, te_labels)

print(f"accuracy of DNN: {acc_dnn/5.0:.4f}")

Loading data from: /work/home/nhkim/BiS495/GNN/input_features_labels/split4
Training features and labels: torch.Size([576, 479, 1]) torch.Size([576, 1])


Testing features and labels: torch.Size([145, 479, 1]) torch.Size([145, 1])
Adjacency matrix: torch.Size([479, 479])
Number of edges: tensor(43797)
Epoch [1/100], Loss: 0.7361, Acc: 16.1111, Auroc: 0.4993
Epoch [2/100], Loss: 0.7015, Acc: 16.6667, Auroc: 0.5089
Epoch [3/100], Loss: 0.6921, Acc: 16.8889, Auroc: 0.5105
Epoch [4/100], Loss: 0.6935, Acc: 16.8333, Auroc: 0.5144
Epoch [5/100], Loss: 0.7014, Acc: 15.2222, Auroc: 0.5061
Epoch [6/100], Loss: 0.6868, Acc: 16.7222, Auroc: 0.5090
Epoch [7/100], Loss: 0.6910, Acc: 16.6667, Auroc: 0.5098
Epoch [8/100], Loss: 0.6831, Acc: 18.5556, Auroc: 0.5183
Epoch [9/100], Loss: 0.6811, Acc: 17.7222, Auroc: 0.5216
Epoch [10/100], Loss: 0.6783, Acc: 18.5000, Auroc: 0.5268
Epoch [11/100], Loss: 0.6806, Acc: 18.1667, Auroc: 0.5300
Epoch [12/100], Loss: 0.6751, Acc: 18.0000, Auroc: 0.5321
Epoch [13/100], Loss: 0.6735, Acc: 18.6111, Auroc: 0.5358
Epoch [14/100], Loss: 0.6769, Acc: 18.3333, Auroc: 0.5380
Epoch [15/100], Loss: 0.6739, Acc: 18.6667, Auroc

In [37]:
# acc_dnn = 0

# for k in range(1, 6):
#     tr_features, tr_labels, te_features, te_labels, adj_matrix = load_csv_data(k, opt)
#     train_dataset = TensorDataset(tr_features, tr_labels)
#     train_loader = DataLoader(dataset=train_dataset, batch_size=opt.batch_size, shuffle=True)
#     te_dataset = TensorDataset(te_features, te_labels)
#     test_loader = DataLoader(te_dataset, batch_size=opt.batch_size, shuffle=False)

#     model = MLP()
#     device = torch.device('cpu')
#     model.to(device)
#     model.apply(initialize_weights)

#     best_model = train_model(train_loader)
#     acc_dnn = acc_dnn + test_model(best_model, test_loader, te_labels)

# print(f"accuracy of DNN: {acc_dnn/5.0:.4f}")