## 공통

In [592]:
!pip install torch
!pip install matplotlib



In [593]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import TensorDataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.model_selection import RepeatedKFold

from itertools import product

## 데이터 정리

In [594]:
np.random.seed(0)

USE_COLAB = False

PREFIX = ''

path = 'result/model.pt'

DEBUG = False

if USE_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

    PREFIX = '/content/drive/My Drive/'

In [595]:
# finger_data 폴더에서 다운로드

x = np.load(PREFIX + 'data/x.npy')
y = np.load(PREFIX + 'data/y.npy')

# sequence length
length = x.shape[1]
# dimension
dim = x.shape[2]

tv_x, test_x, tv_y, test_y = train_test_split(x, y, test_size=0.25, random_state=0)
#train_x, validation_x, train_y, validation_y = train_test_split(train_x, train_y, test_size=0.33, random_state=0)

min_val = np.empty(dim)
max_val = np.empty(dim)

for i in range(dim):
    feature = tv_x[:,:,i]
    min_val[i] = feature.min()
    max_val[i] = feature.max()

    if min_val[i] == max_val[i]:
        max_val[i] = 1
        min_val[i] = 0

#print(min_val)
#print(max_val - min_val)

tv_x = (tv_x - min_val) / (max_val - min_val)
test_x = (test_x - min_val) / (max_val - min_val)

In [596]:
pos_weight =  (tv_y == 0).sum() / (tv_y == 1).sum()

## Basic data augmentation

In [597]:
# augmenting functions
def jitter(x, y, n_jitter, scale):
  js = np.random.choice(len(x), size=n_jitter)

  jittered_arr = np.empty((n_jitter, length, dim))
  answer_arr = np.empty(n_jitter)
  jitter_original_arr = np.empty((n_jitter, length, dim))

  for i in range(len(js)):
    ji = js[i]
    data = x[ji]

    j = np.random.normal(scale=scale, size=(length, dim))
    jittered = data + j

    jittered_arr[i] = jittered
    answer_arr[i] = y[ji]
    jitter_original_arr[i] = data

  return jittered_arr, answer_arr, jitter_original_arr

def scale(x, y, n_scale, scale):
  ss = np.random.choice(len(x), size=n_scale)

  scaled_arr = np.empty((n_scale, length, dim))
  answer_arr = np.empty(n_scale)
  scale_original_arr = np.empty((n_scale, length, dim))

  for i in range(len(ss)):
    si = ss[i]
    data = x[si]

    s = np.random.normal(loc=1, scale=scale, size=(1, dim))
    scaled = data * s

    scaled_arr[i] = scaled
    answer_arr[i] = y[si]
    scale_original_arr[i] = data

  return scaled_arr, answer_arr, scale_original_arr


In [598]:
# n_jitter = 5
# n_scale = 5

# jittered_arr, jitter_original_arr = jitter(train_x, n_jitter)
# scaled_arr, scale_original_arr = scale(train_x, n_scale)

# for i in [0, 1, 2]:
#   fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 4))
#   ax1.plot(jittered_arr[i,:,0])
#   ax1.plot(jitter_original_arr[i,:,0])
#   ax2.plot(scaled_arr[i,:,0])
#   ax2.plot(scale_original_arr[i,:,0])
#   plt.show()

## GAN data augmentation

## Basic RNN

In [599]:
# basic Vanilla RNN
class VanillaRNN(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, dropout, device):
    super(VanillaRNN, self).__init__()
    self.device = device
    self.hidden_size = hidden_size
    self.num_layers = num_layers

    self.d = 1
    self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout, bidirectional=False).to(self.device)
    self.bn = nn.BatchNorm1d(self.d * hidden_size).to(self.device)
    self.fc = nn.Sequential(nn.Linear(self.d * hidden_size, 1)).to(self.device)

  def forward(self, x):
    h0 = torch.zeros(self.num_layers*self.d, x.size()[0], self.hidden_size).to(self.device) # 초기 hidden state
    
    out, _ = self.rnn(x, h0)
    out = out[:,-1]

    # normalization + layering
    out = self.fc(
        self.bn(out)
      )
    return out

# LSTM
class LSTM(nn.Module): # LSTM with 1 seq
  def __init__(self, input_size, hidden_size, num_layers, dropout, device):
    super(LSTM, self).__init__()
    self.device = device
    self.hidden_size = hidden_size
    self.num_layers = num_layers

    self.d = 1

    self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout, bidirectional=False).to(self.device)
    self.bn = nn.BatchNorm1d(self.d * hidden_size).to(self.device)
    self.fc = nn.Sequential(nn.Linear(self.d * hidden_size, 1)).to(self.device)

  def forward(self, x):
    h0 = torch.zeros(self.num_layers*self.d, x.size()[0], self.hidden_size).to(self.device) # 초기 hidden state
    c0 = torch.zeros(self.num_layers*self.d, x.size()[0], self.hidden_size).to(self.device) # 초기 cell state
    
    out, _ = self.lstm(x, (h0, c0))
    out = out[:,-1]

    # normalization + layering
    out = self.fc(
        self.bn(out)
      )
    return out

# GRU
class GRU(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, dropout, device):
    super(GRU, self).__init__()
    self.input_size = input_size
    self.device = device
    self.hidden_size = hidden_size
    self.num_layers = num_layers

    self.d = 1

    self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout, bidirectional=False).to(self.device)
    self.bn = nn.BatchNorm1d(self.d * hidden_size).to(self.device)
    self.fc = nn.Sequential(nn.Linear(self.d * hidden_size, 1)).to(self.device)

  def forward(self, x):
    h0 = torch.zeros(self.num_layers*self.d, x.size()[0], self.hidden_size).to(self.device) # 초기 hidden state
    
    out, _ = self.gru(x, h0)
    out = out[:,-1]

    # normalization + layering
    out = self.fc(
        self.bn(out)
      )
    return out

## Attention-based RNN

## 결과 종합

In [600]:
def to_dataloader(args, x, y, device):
  x_tensor = torch.Tensor(x).to(device) # transform to torch tensor
  y_tensor = torch.Tensor(y).to(device)

  dataset = TensorDataset(x_tensor, y_tensor) # create your datset
  return DataLoader(dataset, batch_size=args['batch_size']) # create your dataloader

In [601]:
def augment_jitter(n_jitter, alpha, train_x, train_y):
    jittered_x, jittered_y, _ = jitter(train_x, train_y, n_jitter, alpha)

    ax = np.empty((train_x.shape[0] + n_jitter, train_x.shape[1], train_x.shape[2]))
    ay = np.empty(train_x.shape[0] + n_jitter)

    for i in range(len(train_x)):
        ax[i] = train_x[i]
        ay[i] = train_y[i]

    for i in range(n_jitter):
        ax[i+len(train_x)] = jittered_x[i]
        ay[i+len(train_x)] = jittered_y[i]

    return ax, ay

In [602]:
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(pos_weight))

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device,'is ready')

cpu is ready


In [603]:
def f1(cf):
    tp = cf[1, 1]
    fp = cf[0, 1]
    fn = cf[1, 0]

    if tp == 0:
        return 0

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)

    return 2 * recall * precision / (recall + precision)   

In [604]:
def train_model(model, optimizer, args, train_x, train_y, validation_x, validation_y, device):
    train_dataloader = to_dataloader(args, train_x, train_y, device)
    validation_dataloader = to_dataloader(args, validation_x, validation_y, device)
    running_loss = []

    val_acc = []
    train_acc = []

    best_acc = -1

    for epoch in range(args['num_epochs']):
        if DEBUG:
            if (epoch % 1000 == 0 and epoch!=0) or epoch==args['num_epochs']-1:
                print('epoch: {}'.format(epoch))
                plt.plot(train_acc)
                plt.plot(val_acc)
                plt.legend(['train', 'validation'])
                plt.show()
    
        # train mode
        model.train()

        for seq, target in train_dataloader:
            out = model(seq)
            loss = criterion(out, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss.append(loss.item())
        
        # eval mode
        model.eval()

        if epoch % 10 == 0 and epoch > 0:
            val_cf = np.zeros((2, 2))
            train_cf = np.zeros((2, 2))

            with torch.no_grad():
                for seq, target in validation_dataloader:
                    out = model(seq)
                    result = (out > 0.5).float()
                    cf = confusion_matrix(target, result)

                    val_cf += cf

                for seq, target in train_dataloader:
                    out = model(seq)
                    result = (out > 0.5).float()
                    cf = confusion_matrix(target, result)

                    train_cf += cf                 

                val_f1 = f1(val_cf)
                train_f1 = f1(train_cf)

                val_acc.append(val_f1)
                train_acc.append(train_f1)

                # saving best model
                if val_f1 > best_acc:
                    best_acc = val_f1

                    torch.save(model, path)

    model = torch.load(path)
    model.eval()

    print("f1:", best_acc)

    return model, best_acc

In [605]:
def feature_importance(model, x, y, feature_idx, device):
    feature_size = x.shape[2]
    data_size = x.shape[0]
    n_repeat = 30
    importance = np.empty(feature_size)

    x_tensor = torch.Tensor(x).to(device)
    y_tensor = torch.Tensor(y).to(device)
    out = model(x_tensor)
    result = (out > 0.5).float()

    cf = confusion_matrix(y_tensor, result)

    total_score = f1(cf)

    for feature in range(feature_size):
        feature_score_sum = 0

        for i in range(n_repeat):
            random_idx = np.random.permutation(data_size)

            x_shuffle = x.copy()
            for j in range(data_size):
                k = random_idx[j]
                x_shuffle[j,:,feature] = x[k,:,feature]

            x_shuffle_tensor = torch.Tensor(x_shuffle).to(device)

            out = model(x_shuffle_tensor)
            result = (out > 0.5).float()

            cf = confusion_matrix(y_tensor, result)

            feature_score = f1(cf)

            feature_score_sum += feature_score

        average_score = feature_score_sum / n_repeat
        importance[feature] = total_score - average_score
    
    dtype = [('idx', int), ('importance', float)]
    sorted_importance = np.empty(feature_size, dtype=dtype)

    for i in range(feature_size):
        sorted_importance[i] = (feature_idx[i], importance[i])

    return np.sort(sorted_importance, order='importance')

In [606]:
def select_k_feature(k, model, train_x, validation_x, validation_y, feature_idx, device):
    sorted_importance = feature_importance(model, validation_x, validation_y, feature_idx, device)

    selected = sorted_importance[-k:]

    #print(selected)

    feature_selected = []
    for i in range(k):
        feature_selected.append(selected[i][0])

    feature_selected = sorted(feature_selected)

    train_x_selected = train_x[:, :, feature_selected]
    validation_x_selected = validation_x[:, :, feature_selected]

    return feature_selected, train_x_selected, validation_x_selected

In [607]:
def hyperparameter_search(args, train_x, train_y, validation_x, validation_y, device):
    def create_model(args, feature_size, device):
        if args['model'] == 'GRU':
            return GRU(feature_size, args['hidden_size'], args['num_layers'], args['dropout'], device)
        elif args['model'] == 'LSTM':
            return LSTM(feature_size, args['hidden_size'], args['num_layers'], args['dropout'], device)
        elif args['model'] == 'RNN':
            return VanillaRNN(feature_size, args['hidden_size'], args['num_layers'], args['dropout'], device)

        return None
    
    augmented_x, augmented_y = augment_jitter(args['n_jitter'], args['jitter_alpha'], train_x, train_y)
    augmented_y = np.reshape(augmented_y, (-1, 1))

    validation_x_tensor = torch.Tensor(validation_x).to(device)

    n_ensemble = 5

    models = []
    y_pred = []

    for i in range(n_ensemble):
        model = create_model(args, dim, device)
        optimizer = optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
        model, accuracy = train_model(model, optimizer, args, augmented_x, augmented_y, validation_x, validation_y, device)
        models.append(model)

        out = model(validation_x_tensor)
        result = (out > 0.5).float()
        y_pred.append(result)

    result = torch.empty(len(validation_x))

    for i in range(len(validation_x)):
        one_count = 0
        zero_count = 0

        for j in range(len(y_pred)):
            if y_pred[j][i] == 0:
                zero_count += 1
            else:
                one_count += 1

        if zero_count > one_count:
            result[i] = 0.0
        else:
            result[i] = 1.0

    cf = confusion_matrix(validation_y, result)

    feature_score = f1(cf)

    print('best accuracy: {}'.format(feature_score))

    return feature_score

In [608]:
args_list = {
    'batch_size': [1024],
    'lr': [1e-4],
    'num_epochs': [4000],
    'weight_decay': [0],
    'dropout': [0.5],
    'hidden_size': [16],
    'num_layers': [2],
    'n_jitter': [1],
    'jitter_alpha': [0.005],
    'model': ['LSTM', 'GRU'],
}

In [609]:
values = list(args_list.values())
hyperparameters = list(product(*values))

for hyperparameter in hyperparameters:
    args = dict()

    for i in range(len(args_list)):
        key = list(args_list.keys())[i]
        args[key] = hyperparameter[i]

    print(args)

    n_splits = 3
    n_repeats = 5

    kf = RepeatedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=0)

    ba_sum = 0

    for train_idx, val_idx in kf.split(tv_x):
        train_x, val_x = tv_x[train_idx], tv_x[val_idx]
        train_y, val_y = tv_y[train_idx], tv_y[val_idx]

        best_accuracy = hyperparameter_search(args, train_x, train_y, val_x, val_y, device)
        ba_sum += best_accuracy

    with open(PREFIX+'result/result.csv','a') as f:
        f.write('{},{},{},{}'.format(args['n_jitter'], args['jitter_alpha'], args['model'], ba_sum/n_splits/n_repeats))
        f.write('\n')

{'batch_size': 1024, 'lr': 0.0001, 'num_epochs': 4000, 'weight_decay': 0, 'dropout': 0.5, 'hidden_size': 16, 'num_layers': 2, 'n_jitter': 1, 'jitter_alpha': 0.005, 'model': 'LSTM'}
f1: 0.625
f1: 0.5161290322580646
f1: 0.6206896551724139
f1: 0.5161290322580646
f1: 0.6206896551724139
best accuracy: 0.6206896551724139
f1: 0.5
f1: 0.5
f1: 0.5
f1: 0.47058823529411764
f1: 0.4
best accuracy: 0.5
f1: 0.5945945945945945
f1: 0.7058823529411765
f1: 0.6206896551724138
f1: 0.631578947368421
f1: 0.64
best accuracy: 0.6666666666666667
f1: 0.5714285714285714
f1: 0.64
f1: 0.6086956521739131
f1: 0.631578947368421
f1: 0.64
best accuracy: 0.6666666666666667
f1: 0.6
f1: 0.56
f1: 0.46153846153846156
f1: 0.5454545454545455
f1: 0.5925925925925926
best accuracy: 0.6666666666666667
f1: 0.6206896551724139
f1: 0.5945945945945945
f1: 0.7058823529411764
f1: 0.5714285714285715
f1: 0.5925925925925926
best accuracy: 0.5806451612903226
f1: 0.56
f1: 0.56
f1: 0.5333333333333333
f1: 0.5185185185185185
f1: 0.53333333333333