# 2022/09/08

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.utils.prune as prune
import torch.nn.functional as F

from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torchmetrics import CohenKappa

from scipy import io
import os
import wandb

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
sweep_config = {
    'method': 'grid'
    }

parameters_dict = {
    'hidden_size': {
        'values': [16,32,64]
        },
    'batch_size': {
        'values': [128]
        },
    'optimizer': {
        'values': ['adam']
        },
    'epochs': {
        'values': [400]
        },
    'learning_rate': {
        'values': [0.001,0.01]
      }, 
    'chop': {
        'values': [32,64,128]
      }, 
    }
sweep_config['parameters'] = parameters_dict

sweep_id = wandb.sweep(sweep_config, project="BEEE_KA_twosec_divide_DY_1007_1016")
config = wandb.config

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: gx3rk3qm
Sweep URL: https://wandb.ai/goldenyoo/BEEE_KA_twosec_divide_DY_1007_1016/sweeps/gx3rk3qm


In [5]:
def load_mat_file(chop, option):
    # mat_file = io.loadmat('/Users/goldenyoo/Library/Mobile Documents/com~apple~CloudDocs/BioCAS_prepare/Python_code/Data_center/one_dx/Calib_data_'+ str(num_subject) +'.mat')
    # mat_file = io.loadmat('C:/Users/Peter/iCloudDrive/BioCAS_prepare/BCIIV_2a_mat/myData/Raw/Calib_data_'+ str(num_subject) +'_chop_'+str(chop) +'.mat')
    
    if option == 1:
        file_name = f'C:/Users/Peter/Desktop/CGX/data/tmp/DY_KA_{chop}_twosec_data_1.mat'
    elif option == 2:
        file_name = f'C:/Users/Peter/Desktop/CGX/data/tmp/DY_KA_{chop}_twosec_data_2.mat'

    mat_file = io.loadmat(file_name)

    K1 = mat_file['K1']
    K2 = mat_file['K2']

    A1 = mat_file['A1']
    A2 = mat_file['A2']
    
    Y1 = mat_file['Y1']
    Y2 = mat_file['Y2']

    # K 특성에 대한 Class1 vs Class2 Data 가져오기
    k1 = torch.FloatTensor(K1)
    k2 = torch.FloatTensor(K2)
    a1 = torch.FloatTensor(A1)
    a2 = torch.FloatTensor(A2)


    # Y에 대한 Class1 vs Class2 Data 가져오기
    y1 = torch.LongTensor(Y1)
    y2 = torch.LongTensor(Y2)

    k_train = torch.cat([k1,k2],dim=0)
    a_train = torch.cat([a1,a2],dim=0)

    y_train = torch.cat([y1,y2],dim=0)
    y_train = y_train-1 # y를 0~1의 정수로 만들어야함.



    return k_train.to(device), a_train.to(device), y_train.to(device)

In [6]:
def build_dataset(batch_size, k_train, a_train, y_train, k_test, a_test, y_test):
    dataset_train = TensorDataset(k_train, a_train, y_train) # 각 tensor의 첫번째 dim이 일치해야한다
    dataset_test = TensorDataset(k_test, a_test, y_test) # 각 tensor의 첫번째 dim이 일치해야한다

    # Data Split
    dataset_size = len(dataset_train)
    train_size = int(dataset_size * 0.8)
    valid_size = dataset_size - train_size

    train_dataset, valid_dataset = random_split(dataset_train, [train_size, valid_size])

    train_DL = DataLoader(train_dataset, batch_size= batch_size, shuffle=True, drop_last=True)
    valid_DL = DataLoader(valid_dataset, batch_size= valid_size, shuffle=False)

    test_DL = DataLoader(dataset_test, batch_size = batch_size )


    return train_DL, valid_DL, test_DL

In [7]:
def build_optimizer(network, optimizer, learning_rate):
    if optimizer == "sgd":
        optimizer = optim.SGD(network.parameters(),
                              lr=learning_rate, momentum=0.9)
    elif optimizer == "adam":
        optimizer = optim.Adam(network.parameters(),
                               lr=learning_rate)
    return optimizer

In [8]:
input_size = 19
n_class = 2

dtype = torch.float

class TextLSTM(nn.Module):
  def __init__(self,hidden_size):
    super(TextLSTM, self).__init__()

    self.lstm_1 = nn.LSTM(input_size=input_size, hidden_size=hidden_size)
    self.lstm_2 = nn.LSTM(input_size=input_size, hidden_size=hidden_size)
    self.fc_1 = nn.Linear(hidden_size*2, hidden_size*2)
    self.fc_2 = nn.Linear(hidden_size*2, n_class)

  def forward(self,  K_and_A):
    (k, a) = K_and_A

    k.transpose_(0,1)
    k.transpose_(0,2)
    a.transpose_(0,1)
    a.transpose_(0,2)

    outputs1, (h_n1,c_n1) = self.lstm_1(k)
    outputs2, (h_n2,c_n2) = self.lstm_2(a)

    outputs = torch.cat((outputs1[-1],outputs2[-1]), dim=1)  

    x = self.fc_1(outputs)  # 최종 예측 최종 출력 층
    model = self.fc_2(F.relu(x))
    return model.to(device)


In [9]:
def train(config=None):
    # Initialize a new wandb run
    with wandb.init(config=config):
        config = wandb.config

        k_train, a_train, y_train = load_mat_file(config.chop, 1)
        k_test, a_test, y_test = load_mat_file(config.chop, 2)

        model = TextLSTM(hidden_size=config.hidden_size).to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = build_optimizer(model, config.optimizer, config.learning_rate)
        scheduler = optim.lr_scheduler.LambdaLR(optimizer=optimizer,
                                        lr_lambda=lambda epoch: 0.95 ** epoch,
                                        last_epoch=-1,
                                        verbose=False)

        
        
        train_DL, valid_DL, test_DL = build_dataset(config.batch_size, k_train.to(device), a_train.to(device), y_train.to(device),k_test.to(device), a_test.to(device), y_test.to(device))

        for epoch in range(config.epochs):
            rloss = 0
            model.train()
            for batch_idx, samples in enumerate(train_DL):

                k_train_mb, a_train_mb, y_train_mb = samples

                # Forward
                output = model((k_train_mb.to(device),a_train_mb.to(device)))

                # Cost
                loss = criterion(output.to(device), y_train_mb.squeeze().to(device))

                # Backpropagate
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                loss_b = loss.item()*config.batch_size
                rloss += float(loss_b)
            # For each epoch end
            model.eval()
            with torch.no_grad():
                # epoch loss 
                loss_e = rloss/len(train_DL.dataset) 
                
                # Validation
                k_valid, a_valid, y_valid = next(iter(valid_DL))

                output = model( (k_valid.to(device),a_valid.to(device)))
                prediction = output.argmax(dim=1)
                correct = prediction.eq(y_valid.view_as(prediction)).sum().item()

                # Wandb log
                wandb.log({"loss": loss_e})
                wandb.log({"Validation accuracy": correct/len(valid_DL.dataset)})

                if epoch % 100 == 0:
                    print(f"Epoch: {epoch}, train loss: {round(loss_e,3)}")
                    print(f"Validation accuracy: {round(correct/len(valid_DL.dataset),3)}")



            scheduler.step()
        
        model.eval()
        with torch.no_grad():
            for batch_idx, samples in enumerate(test_DL):
                k_train_mb, a_train_mb, y_train_mb = samples

                output = model( (k_train_mb.to(device),a_train_mb.to(device)))
                prediction = output.argmax(dim=1)
                correct = prediction.eq(y_train_mb.view_as(prediction)).sum().item()
                print(f"Evaluation accuracy: {round(correct/len(test_DL.dataset),3)}")
                wandb.log({"Evaluation accuracy": correct/len(test_DL.dataset)})
                cohenkappa = CohenKappa(num_classes=2).to(device)
                peter_kappa = cohenkappa(prediction, y_train_mb.view_as(prediction))
                wandb.log({"Kappa": peter_kappa.item()})
                

In [10]:
wandb.agent(sweep_id, train)

[34m[1mwandb[0m: Agent Starting Run: vjaycj1i with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	chop: 32
[34m[1mwandb[0m: 	epochs: 400
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mgoldenyoo[0m. Use [1m`wandb login --relogin`[0m to force relogin


Run vjaycj1i errored: NameError("name 'hidden_k' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run vjaycj1i errored: NameError("name 'hidden_k' is not defined")
[34m[1mwandb[0m: Agent Starting Run: snlhxa5t with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	chop: 32
[34m[1mwandb[0m: 	epochs: 400
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


In [None]:
wandb.finish()