In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import gc
import random
import warnings
import pickle
import importlib
warnings.filterwarnings(action='ignore')
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset,DataLoader
import torch.optim as optim
torch.manual_seed(1015)
# define 'device' to upload tensor in gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

import sklearn
from sklearn.neighbors import KNeighborsRegressor

# 상위 폴더에서 module을 import하기 위해 시스템 경로에 상위 폴더의 경로를 추가
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 


from utils.preprocess_utils import *
from utils.train_utils import *
from train_morning.models.model_cycle.cycle_lstm import LSTMModel_cycle

In [3]:

print('all use')

with open('data/preprocess/l_data_list.pkl', 'rb') as f:
    data_list = pickle.load(f)

data_list = append_cycle_size(data_list)
train_time, train_notime, train_y,\
valid_time, valid_notime, valid_y, \
    test_time, test_notime, test_y = data_list
    
train_time, train_notime, train_y,\
valid_time, valid_notime, valid_y, \
    test_time, test_notime, test_y    = numpy2tensor(data_list)


model= LSTMModel_cycle(input_size = 12, hidden_size = 32, no_time_size =4).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-2)
criterion = nn.MSELoss(size_average = True)

train_error = []
valid_error = []
hist = {'best_val_error': 100,
        'best_val_epoch': 0}

num_epochs = 400
for t in range(0, num_epochs):
    train_pred = model(train_time, train_notime)
    loss = criterion(train_pred, train_y[:,:,1])
    train_error.append(loss)
    valid_pred = model(valid_time, valid_notime)
    valid_mse = float(criterion(valid_pred, valid_y[:,:,1]).cpu())
    valid_error.append(valid_mse)
    if hist['best_val_error'] >= valid_mse:
        hist['best_val_error'] = valid_mse
        hist['best_val_epoch'] = t
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if t % 10 == 0 and t !=0:
        print(f"{t} Epochs train MSE: {loss.item():1.5f} // valid MSE: {valid_mse:1.5f}")
        
valid_errors.append(hist['best_val_error'])
del model
gc.collect()
torch.cuda.empty_cache()

all use
10 Epochs train MSE: 0.02888 // valid MSE: 0.01917
20 Epochs train MSE: 0.02249 // valid MSE: 0.01341
30 Epochs train MSE: 0.02229 // valid MSE: 0.01281
40 Epochs train MSE: 0.02090 // valid MSE: 0.01221
50 Epochs train MSE: 0.02025 // valid MSE: 0.01218
60 Epochs train MSE: 0.01960 // valid MSE: 0.01200
70 Epochs train MSE: 0.01846 // valid MSE: 0.01161
80 Epochs train MSE: 0.01619 // valid MSE: 0.01072
90 Epochs train MSE: 0.01381 // valid MSE: 0.00944
100 Epochs train MSE: 0.01251 // valid MSE: 0.00889
110 Epochs train MSE: 0.01151 // valid MSE: 0.00858
120 Epochs train MSE: 0.01064 // valid MSE: 0.00825
130 Epochs train MSE: 0.00956 // valid MSE: 0.00772
140 Epochs train MSE: 0.00874 // valid MSE: 0.00726
150 Epochs train MSE: 0.00804 // valid MSE: 0.00723
160 Epochs train MSE: 0.00751 // valid MSE: 0.00707
170 Epochs train MSE: 0.00719 // valid MSE: 0.00719
180 Epochs train MSE: 0.00663 // valid MSE: 0.00682
190 Epochs train MSE: 0.00632 // valid MSE: 0.00664
200 Epochs tr

In [4]:
# x_time의 12개 채널 정보
# 'card_use', 'holiday', 'day_corona',
# 'ondo', 'subdo', 'rain_snow',
# 'dayofyear_sin', 'dayofyear_cos', 'weekday_sin', 'weekday_cos',
# 'flow_trend', flow_cycle'

In [5]:
features_name = ['card_use', 'holiday', 'day_corona','ondo', 'subdo',
                 'rain_snow','dayofyear_sin', 'dayofyear_cos', 'weekday_sin',
                  'weekday_cos', 'flow_trend', 'flow_cycle', 'ALL']

In [6]:
pd.DataFrame({'features_name' : features_name,
                'valid_errors': valid_errors})

Unnamed: 0,features_name,valid_errors
0,card_use,0.005314
1,holiday,0.004789
2,day_corona,0.005328
3,ondo,0.006173
4,subdo,0.005097
5,rain_snow,0.00505
6,dayofyear_sin,0.005409
7,dayofyear_cos,0.005255
8,weekday_sin,0.005182
9,weekday_cos,0.00536


In [2]:
valid_errors = []
for feature_idx in range(12):
    print(feature_idx)
    # load model
    features = [idx for idx in range(12)]
    features.pop(feature_idx)

    with open('data/preprocess/l_data_list.pkl', 'rb') as f:
        data_list = pickle.load(f)

    data_list = append_cycle_size(data_list)
    train_time, train_notime, train_y,\
    valid_time, valid_notime, valid_y, \
        test_time, test_notime, test_y = data_list
        
    data_list = [train_time[:,:,features], train_notime, train_y,\
                valid_time[:,:,features], valid_notime, valid_y, \
                test_time[:,:,features], test_notime, test_y]
        
    train_time, train_notime, train_y,\
    valid_time, valid_notime, valid_y, \
        test_time, test_notime, test_y    = numpy2tensor(data_list)


    model= LSTMModel_cycle(input_size = 11, hidden_size = 32, no_time_size =4).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr = 1e-2)
    criterion = nn.MSELoss(size_average = True)

    train_error = []
    valid_error = []
    hist = {'best_val_error': 100,
            'best_val_epoch': 0}

    num_epochs = 400
    for t in range(0, num_epochs):
        train_pred = model(train_time, train_notime)
        loss = criterion(train_pred, train_y[:,:,1])
        train_error.append(loss)
        valid_pred = model(valid_time, valid_notime)
        valid_mse = float(criterion(valid_pred, valid_y[:,:,1]).cpu())
        valid_error.append(valid_mse)
        if hist['best_val_error'] >= valid_mse:
            hist['best_val_error'] = valid_mse
            hist['best_val_epoch'] = t
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if t % 10 == 0 and t !=0:
            print(f"{t} Epochs train MSE: {loss.item():1.5f} // valid MSE: {valid_mse:1.5f}")
            
    valid_errors.append(hist['best_val_error'])
    del model
    gc.collect()
    torch.cuda.empty_cache()

chs train MSE: 0.01909 // valid MSE: 0.01154
60 Epochs train MSE: 0.01725 // valid MSE: 0.01085
70 Epochs train MSE: 0.01571 // valid MSE: 0.01033
80 Epochs train MSE: 0.01439 // valid MSE: 0.00982
90 Epochs train MSE: 0.01311 // valid MSE: 0.00914
100 Epochs train MSE: 0.01196 // valid MSE: 0.00854
110 Epochs train MSE: 0.01068 // valid MSE: 0.00814
120 Epochs train MSE: 0.00974 // valid MSE: 0.00776
130 Epochs train MSE: 0.00886 // valid MSE: 0.00736
140 Epochs train MSE: 0.00788 // valid MSE: 0.00702
150 Epochs train MSE: 0.00724 // valid MSE: 0.00683
160 Epochs train MSE: 0.00596 // valid MSE: 0.00611
170 Epochs train MSE: 0.00536 // valid MSE: 0.00585
180 Epochs train MSE: 0.00488 // valid MSE: 0.00568
190 Epochs train MSE: 0.00451 // valid MSE: 0.00570
200 Epochs train MSE: 0.00422 // valid MSE: 0.00563
210 Epochs train MSE: 0.00395 // valid MSE: 0.00561
220 Epochs train MSE: 0.00377 // valid MSE: 0.00565
230 Epochs train MSE: 0.00347 // valid MSE: 0.00560
240 Epochs train MSE: 0