In [1]:
import import_ipynb
import sys
sys.path.append('C:\\Users\\USER\\JupyterProjects\\conv_biLSTM_attention')
from Stock_Dataset import StockDataset

importing Jupyter notebook from Stock_Dataset.ipynb


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import argparse
from CONV_Att_BILSTM import Conv_attLSTM
import numpy as np
import time
from metric import metric_acc as ACC
from metric import metric_mcc as MCC
import matplotlib.pyplot as plt
import csv
import os
from Stock_dataloader_csv import stock_csv_read
from sklearn.metrics import accuracy_score
from sklearn.metrics import matthews_corrcoef



def train(conv_attLSTM, lstm_optimizer,Partition, args): ## Data, loss function, argument
    trainloader = DataLoader(Partition['train'],
                             batch_size = args.batch_size,
                             shuffle=False, drop_last=True)
    conv_attLSTM.train()

    train_loss = 0.0
    for i, (x,y) in enumerate(trainloader):
        
        lstm_optimizer.zero_grad()
        true_y = y.squeeze().float().to(args.device)
        x = x.to(args.device)
        
        conv_attLSTM.hidden = [hidden.to(args.device) for hidden in conv_attLSTM.init_hidden()]
        
        yhat, attention_weight, attn_applied = conv_attLSTM(x)
        # print(es.size()) [128, 20]
        

        loss = args.loss_fn(yhat, true_y)
        loss.backward()


        lstm_optimizer.step()## parameter 갱신

        train_loss += loss.item()

    train_loss = train_loss / len(trainloader)
    return conv_attLSTM, train_loss


def validation(conv_attLSTM, partition, args):
    valloader = DataLoader(partition['val'], 
                           batch_size=args.batch_size,
                           shuffle=False, drop_last=True)
    conv_attLSTM.eval()
    
    val_loss = 0.0

    with torch.no_grad():
        for i, (x, y) in enumerate(valloader):

            true_y = y.squeeze().float().to(args.device)
            x = x.to(args.device)

            conv_attLSTM.hidden = [conv_attLSTM.to(args.device) for hidden in conv_attLSTM.init_hidden()]

            yhat, attention_weight, attn_applied = conv_attLSTM(x)


            # output_ = torch.where(output1 >= 0.5, 1.0, 0.0)
            # output_.requires_grad=True

            loss = args.loss_fn(yhat, true_y)

            val_loss += loss.item()

        val_loss = val_loss / len(valloader)
        return conv_attLSTM, val_loss


def test(conv_attLSTM,partition, args):
    testloader = DataLoader(partition['test'],
                           batch_size=args.batch_size,
                           shuffle=False, drop_last=True)
    conv_attLSTM.eval()

    ACC_metric = 0.0
    MCC_metric = 0.0
    with torch.no_grad():
        for i, (x, y) in enumerate(testloader):

            # feature transform
            true_y = y.squeeze().float().to(args.device)
            x = x.to(args.device)

            conv_attLSTM.hidden = [hidden.to(args.device) for hidden in conv_attLSTM.init_hidden()]

            yhat, attention_weight, attn_applied = conv_attLSTM(x)

            output_ = torch.where(yhat >= 0.5, 1.0, 0.0)

            output_.requires_grad = True

            perc_y_pred = output_.cpu().detach().numpy()
            perc_y_true = true_y.cpu().detach().numpy()

            acc = accuracy_score(perc_y_true, perc_y_pred)
            mcc = matthews_corrcoef(perc_y_true, perc_y_pred)

            ACC_metric += acc
            MCC_metric += mcc

        ACC_metric = ACC_metric / len(testloader)
        MCC_metric = MCC_metric / len(testloader)

        return ACC_metric, MCC_metric



importing Jupyter notebook from CONV_Att_BILSTM.ipynb
importing Jupyter notebook from attention.ipynb
importing Jupyter notebook from metric.ipynb


In [3]:

# ====== Random Seed Initialization ====== #
seed = 666
np.random.seed(seed)
torch.manual_seed(seed)

# ========= experiment setting ========== #
parser = argparse.ArgumentParser()
args = parser.parse_args("")
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'

args.save_file_path = r"D:\conv_bilstm_results"

# ====== hyperparameter ======= #
args.batch_size = 64

args.dropout = 0.2
args.use_bn = True
args.loss_fn = nn.L1Loss()  ## loss function for classification : cross entropy
args.optim = 'Adam'
args.lr = 0.0005
args.l2 = 0.00001 #?
args.epoch = 100
# ============= model ================== #
args.Conv_attLSTM = Conv_attLSTM

# ====== att_lstm hyperparameter ======= #
args.x_frames = 10
args.y_frames = 1

args.input_dim = 64
args.hid_dim = 64
args.output_dim = 1

args.attention_head = 1
args.attn_size = 10
args.num_layers = 1
args.attLSTM_x_frames = 1


In [4]:
## 실행 파일
args.data_list = os.listdir("./data/kdd17/ourpped")


with open(args.save_file_path + '\\' + 'CONV_ATTBILSTM_result_t.csv', 'w', encoding='utf-8', newline='') as f:
    wr = csv.writer(f)
    wr.writerow(["model", "stock", "entire_exp_time",  "avg_test_ACC", "avg_test_MCC"])

    for data in args.data_list:
        
        stock = data.split('.')[0]

        est = time.time()
        setattr(args, 'symbol', stock)
        args.new_file_path = args.save_file_path + '\\' + "CONV_ATTBILSTM_" + args.symbol
        os.makedirs(args.new_file_path)
        
        
        csv_read = stock_csv_read(data,args.x_frames,args.y_frames)
        split_data_list = csv_read.cv_split()
        
        with open(args.new_file_path + '/'+ str(args.symbol)+'test_acc_list' +'.csv', 'w',newline='') as alist:
            www = csv.writer(alist)
            www.writerow(["acc_list"])

            ACC_cv = []
            for i, data in enumerate(split_data_list):
                args.split_file_path = args.new_file_path + "\\" + str(i) +"th_iter"
                os.makedirs(args.split_file_path)

                # 0번째에 index 1번째에 stock 1개가 input으로 들어감
                trainset = StockDataset(data[0])
                valset = StockDataset(data[1])
                testset = StockDataset(data[2])
            

                partition = {'train': trainset, 'val': valset, 'test': testset}


                conv_attLSTM = args.Conv_attLSTM(args.input_dim, args.hid_dim, args.output_dim, args.num_layers, args.batch_size,
                                            args.dropout, args.use_bn, args.attention_head, args.attn_size,activation="ReLU")

                conv_attLSTM.to(args.device)


                if args.optim == 'SGD':
                    lstm_optimizer = optim.SGD(conv_attLSTM.parameters(), lr=args.lr, weight_decay=args.l2)
                elif args.optim == 'RMSprop':
                    lstm_optimizer = optim.RMSprop(conv_attLSTM.parameters(), lr=args.lr, weight_decay=args.l2)
                elif args.optim == 'Adam':
                    lstm_optimizer = optim.Adam(conv_attLSTM.parameters(), lr=args.lr, weight_decay=args.l2)
                else:
                    raise ValueError('In-valid optimizer choice')

                # ===== List for epoch-wise data ====== #
                train_losses = []
                val_losses = []
                # ===================================== #
                for epoch in range(args.epoch):
                    ts = time.time()
                    conv_attLSTM, train_loss = train(conv_attLSTM, lstm_optimizer, partition, args)

                    conv_attLSTM, val_loss = validation(conv_attLSTM, partition, args)

                    te = time.time()

                    ## 각 에폭마다 모델을 저장하기 위한 코드
                    if len(val_losses) == 0:
                        torch.save(conv_attLSTM.state_dict(), args.split_file_path + '\\' + str(epoch) +'conv_attLSTM' +'.pt')
                    elif min(val_losses) > val_loss:
                        torch.save(conv_attLSTM.state_dict(), args.split_file_path + '\\' + str(epoch) +'conv_attLSTM' +'.pt')

                    train_losses.append(train_loss)
                    val_losses.append(val_loss)

                    print('Epoch {}, Loss(train/val) {:2.5f}/{:2.5f}. Took {:2.2f} sec'
                        .format(epoch, train_loss, val_loss, te - ts))

                ## val_losses에서 가장 값이 최소인 위치를 저장함
                site_val_losses = val_losses.index(min(val_losses)) ## 10 epoch일 경우 0번째~9번째 까지로 나옴
                conv_attLSTM = args.Conv_attLSTM(args.input_dim, args.hid_dim, args.output_dim, args.num_layers, args.batch_size,
                                                args.dropout, args.use_bn, args.attention_head, args.attn_size, activation="ReLU")

                conv_attLSTM.to(args.device)


                conv_attLSTM.load_state_dict(torch.load(args.split_file_path + '\\' + str(site_val_losses) +'conv_attLSTM'+ '.pt'))

                ACC, MCC = test(conv_attLSTM, partition, args)
                print('ACC: {}, MCC: {}'.format(ACC, MCC))
                www.writerow([ACC])
                with open(args.split_file_path + '\\'+ str(site_val_losses)+'Epoch_test_metric' +'.csv', 'w') as fd:
                    print('ACC: {}, MCC: {}'.format(ACC, MCC), file=fd)

                result = {}

                result['train_losses'] = train_losses
                result['val_losses'] = val_losses
                result['ACC'] = ACC
                result['MCC'] = MCC
                eet = time.time()
                entire_exp_time = eet - est

                fig = plt.figure()
                plt.plot(result['train_losses'])
                plt.plot(result['val_losses'])
                plt.legend(['train_losses', 'val_losses'], fontsize=15)
                plt.xlabel('epoch', fontsize=15)
                plt.ylabel('loss', fontsize=15)
                plt.grid()
                plt.savefig(args.split_file_path + '\\' + str(args.symbol) + '_fig' + '.png')
                plt.close(fig)
                ACC_cv.append(result['ACC'])
        ACC_cv_ar = np.array(ACC_cv)
        acc_avg = np.mean(ACC_cv_ar)
        acc_std = np.std(ACC_cv_ar)

        wr.writerow(["CONV_BILSTM_ATTENTION", args.symbol, entire_exp_time, acc_avg, acc_std, result['MCC']])


Epoch 0, Loss(train/val) 0.50062/0.49768. Took 1.24 sec
Epoch 1, Loss(train/val) 0.49911/0.49486. Took 0.06 sec
Epoch 2, Loss(train/val) 0.49681/0.48942. Took 0.07 sec
Epoch 3, Loss(train/val) 0.49398/0.48101. Took 0.06 sec
Epoch 4, Loss(train/val) 0.48972/0.47244. Took 0.05 sec
Epoch 5, Loss(train/val) 0.48475/0.46525. Took 0.05 sec
Epoch 6, Loss(train/val) 0.47447/0.45977. Took 0.06 sec
Epoch 7, Loss(train/val) 0.46061/0.45686. Took 0.06 sec
Epoch 8, Loss(train/val) 0.45111/0.45538. Took 0.06 sec
Epoch 9, Loss(train/val) 0.46156/0.45173. Took 0.05 sec
Epoch 10, Loss(train/val) 0.45237/0.45402. Took 0.05 sec
Epoch 11, Loss(train/val) 0.44313/0.45399. Took 0.05 sec
Epoch 12, Loss(train/val) 0.44117/0.45431. Took 0.06 sec
Epoch 13, Loss(train/val) 0.44329/0.45416. Took 0.06 sec
Epoch 14, Loss(train/val) 0.43770/0.45624. Took 0.06 sec
Epoch 15, Loss(train/val) 0.43038/0.45707. Took 0.06 sec
Epoch 16, Loss(train/val) 0.42149/0.45656. Took 0.05 sec
Epoch 17, Loss(train/val) 0.41013/0.45564