In [None]:
!pip install optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader

import joblib
import pickle
import optuna

warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
def sliding_windows(data, lookback_length, forecast_length):

    x = []
    y = []
    
    for i in range(lookback_length, len(data) - forecast_length + 1):
        _x = data[(i-lookback_length) : i]
        _y = data[i : (i + forecast_length)]
        x.append(_x)
        y.append(_y)
    return np.array(x), np.array(y)


def get_data_loader(X, y, batch_size):

    x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

    train_ds = TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train))
    train_dl = DataLoader(train_ds, batch_size = batch_size)

    val_ds = TensorDataset(torch.Tensor(x_val), torch.Tensor(y_val))
    val_dl = DataLoader(val_ds, batch_size = batch_size)

    input_size = x_train.shape[-1]

    return train_dl, val_dl, input_size

In [None]:
sales_train_val = pd.read_csv('sales_train_validation.csv')
sales_train_val = sales_train_val.T
sales_train_val = sales_train_val.fillna(0)
sales_train_val = sales_train_val[6:]
scaler = MinMaxScaler(feature_range=(0,1))

In [None]:
#timesteps = 14
#x_train = []
#y_train = []
#for i in range(timesteps, 1913):
#    x_train.append(sales_train_val[i-timesteps:i])
#    y_train.append(sales_train_val[i][0:30490])

In [None]:
#x_train=np.array(x_train, dtype='float16')
#y_train=np.array(y_train, dtype='float16')

In [None]:
#x_train.shape,y_train.shape

In [None]:
sales_train_val.reset_index(drop=True, inplace=True)

scale_cols = []
timesteps = 14
for i in range(timesteps, 1913):
    scale_cols.append(sales_train_val[i][0:30490])

scale_cols = np.array(scale_cols, dtype='float16')

# Loockback_period & forecasting_period
max_prediction_length = 20
lookback_length = 60
training_data_max = len(sales_train_val) - max_prediction_length

# 학습용 데이터
sales_train_val_p = sales_train_val.iloc[:training_data_max, :]
training_sales_train_val = scaler.fit_transform(sales_train_val_p)

In [None]:
# LSTM은 1 step 뒤의 값만을 예측하므로, forecasting_period를 1로 두고 진행
x, y = sliding_windows(training_sales_train_val, lookback_length, 1)

Model 정의

In [None]:
class LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size,
                            num_layers = num_layers, batch_first = True)
        
        self.fc = nn.Linear(hidden_size  * num_layers, num_classes)
        
    def forward(self, x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size, device = x.device))
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size, device = x.device))
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        h_out = h_out.view(-1, self.hidden_size * self.num_layers)
        out = self.fc(h_out)
        return out

In [None]:
def train(log_interval, model, train_dl, val_dl, optimizer, criterion, epoch):

    best_loss = np.inf
    for epoch in range(epoch):
        train_loss = 0.0
        model.train()
        for sales_train_val, target in train_dl:

            if torch.cuda.is_available():
                sales_train_val, target = sales_train_val.cuda(), target.cuda()
                model = model.cuda()

            optimizer.zero_grad()
            output = model(sales_train_val)
            loss = criterion(output, target) # mean-squared error for regression
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # validation
        valid_loss = 0.0
        model.eval()
        for sales_train_val, target in val_dl:

            if torch.cuda.is_available():
                sales_train_val, target = sales_train_val.cuda(), target.cuda()

            output = model(sales_train_val)         
            loss = criterion(output, target)
            valid_loss += loss.item()

        if ( epoch % log_interval == 0 ):
            print(f'\n Epoch {epoch} \t Training Loss: {train_loss / len(train_dl)} \t Validation Loss: {valid_loss / len(val_dl)} \n')

        if best_loss > (valid_loss / len(val_dl)):
            print(f'Validation Loss Decreased({best_loss:.6f}--->{(valid_loss / len(val_dl)):.6f}) \t Saving The Model')
            best_loss = (valid_loss / len(val_dl))
            torch.save(model.state_dict(), 'lstm_saved_model.pth')

    return best_loss


def smape(a, f):
    return 1/len(a) * np.sum(2 * np.abs(f-a) / (np.abs(a) + np.abs(f))*100)

In [None]:
aa = x_for_metric
tmp = np.append( np.expand_dims(aa[1:, :], 0), np.expand_dims(y_for_metric[2, :], (0,2)), axis=1)
tmp.shape

In [None]:
def objective(trial):

    cfg = { 
            'batch_size' : trial.suggest_categorical('batch_size',[64, 128, 256, 512]), # [64, 128, 256]
            'learning_rate' : trial.suggest_loguniform('learning_rate', 1e-3, 1e-1), #trial.suggest_loguniform('learning_rate', 1e-2, 1e-1), # learning rate을 0.01-0.1까지 로그 uniform 분포로 사용
            'hidden_size': trial.suggest_categorical('hidden_size',[16, 32, 64, 128, 256, 512, 1024]),
            'num_layers': trial.suggest_int('num_layers', 1, 5, 1),       
        }

    torch.manual_seed(42)

    log_interval = 5
    num_classes = 1 # parameter에서 빼서 상수로 설정
    num_epochs = 10 # parameter에서 빼서 상수로 설정

    train_dl, val_dl, input_size = get_data_loader(x, y,  cfg['batch_size'])
    
    model = LSTM(
        num_classes = num_classes, 
        input_size = input_size, 
        hidden_size = cfg['hidden_size'], 
        num_layers = cfg['num_layers']
    )
    
    if torch.cuda.is_available():
        model = model.cuda()
        
    optimizer = optim.Adam(model.parameters(), lr=cfg['learning_rate'])
    criterion = torch.nn.MSELoss()
    best_loss = train(log_interval, model, train_dl, val_dl, optimizer, criterion, num_epochs)

    print('best loss for the trial = ', best_loss)
    predict_data = []
    # 여기서 x는 (sample, lookback_length, 1)의 크기를 지님. 따라서, 제일 앞의 시점을 제거하려면, x[:, -1, :]이 되어야 함
    x_pred = np.expand_dims(x_for_metric, 0)  # Inference에 사용할 lookback data를 x_pred로 지정. 앞으로 x_pred를 하나씩 옮겨 가면서 inference를 할 예정

    for j, i in enumerate(range(max_prediction_length)):

        # feed the last forecast back to the model as an input
        x_pred = np.append( x_pred[:, 1:, :], np.expand_dims(y_for_metric[j, :], (0,2)), axis=1)
        xt_pred = torch.Tensor(x_pred)

        if torch.cuda.is_available():
            xt_pred = xt_pred.cuda()
        # generate the next forecast
        yt_pred = model(xt_pred)
        # tensor to array
        # x_pred = xt_pred.cpu().detach().numpy()
        y_pred = yt_pred.cpu().detach().numpy()

        # save the forecast
        predict_data.append(y_pred)

    # transform the forecasts back to the original scale
    predict_data = np.array(predict_data).reshape(-1, 1)
    SMAPE = smape(y_for_metric, predict_data)
    
    print(f' \nSMAPE : {SMAPE}')


    return SMAPE

In [None]:
sampler = optuna.samplers.TPESampler()
#   sampler = optuna.samplers.SkoptSampler()

# model.load_state_dict(torch.load('lstm_saved_model.pth'))
    
study = optuna.create_study(sampler=sampler, direction='minimize')
study.optimize(objective, n_trials= 5)