In [17]:
import numpy as np
import random
import os, errno
import sys
from tqdm import trange



import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

torch.manual_seed(100)
class lstm_encoder(nn.Module):
    ''' Encodes time-series sequence '''

    def __init__(self, input_size, hidden_size, num_layers = 3):
        
        '''
        : param input_size:     the number of features in the input X
        : param hidden_size:    the number of features in the hidden state h
        : param num_layers:     number of recurrent layers (i.e., 2 means there are
        :                       2 stacked LSTMs)
        '''
        
        super(lstm_encoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # define LSTM layer
        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size,
                            num_layers = num_layers)

    def forward(self, x_input):
        
        '''
        : param x_input:               input of shape (seq_len, # in batch, input_size)
        : return lstm_out, hidden:     lstm_out gives all the hidden states in the sequence;
        :                              hidden gives the hidden state and cell state for the last
        :                              element in the sequence 
        '''
        
        lstm_out, self.hidden = self.lstm(x_input.view(x_input.shape[0], x_input.shape[1], self.input_size))
        
        return lstm_out, self.hidden     
    
    def init_hidden(self, batch_size):
        
        '''
        initialize hidden state
        : param batch_size:    x_input.shape[1]
        : return:              zeroed hidden state and cell state 
        '''
        
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size, device='cuda'),
                torch.zeros(self.num_layers, batch_size, self.hidden_size, device='cuda'))


class lstm_decoder(nn.Module):
    ''' Decodes hidden state output by encoder '''
    
    def __init__(self, input_size, hidden_size, num_layers = 3):

        '''
        : param input_size:     the number of features in the input X
        : param hidden_size:    the number of features in the hidden state h
        : param num_layers:     number of recurrent layers (i.e., 2 means there are
        :                       2 stacked LSTMs)
        '''
        
        super(lstm_decoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size,
                            num_layers = num_layers)
        self.linear = nn.Linear(hidden_size, input_size)           

    def forward(self, x_input, encoder_hidden_states):
        
        '''        
        : param x_input:                    should be 2D (batch_size, input_size)
        : param encoder_hidden_states:      hidden states
        : return output, hidden:            output gives all the hidden states in the sequence;
        :                                   hidden gives the hidden state and cell state for the last
        :                                   element in the sequence 
 
        '''
        
        lstm_out, self.hidden = self.lstm(x_input.unsqueeze(0), encoder_hidden_states)
        output = self.linear(lstm_out.squeeze(0))     
        
        return output, self.hidden

class lstm_seq2seq(nn.Module):
    ''' train LSTM encoder-decoder and make predictions '''
    
    def __init__(self, input_size, hidden_size):

        '''
        : param input_size:     the number of expected features in the input X
        : param hidden_size:    the number of features in the hidden state h
        '''

        super(lstm_seq2seq, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size

        self.encoder = lstm_encoder(input_size = input_size, hidden_size = hidden_size).cuda()
        self.decoder = lstm_decoder(input_size = input_size, hidden_size = hidden_size).cuda()
        

    def train_model(self, input_tensor, target_tensor, n_epochs, target_len, batch_size, training_prediction = 'recursive', teacher_forcing_ratio = 0.5, learning_rate = 0.01, dynamic_tf = False):
        
        '''
        train lstm encoder-decoder
        
        : param input_tensor:              input data with shape (seq_len, # in batch, number features); PyTorch tensor    
        : param target_tensor:             target data with shape (seq_len, # in batch, number features); PyTorch tensor
        : param n_epochs:                  number of epochs 
        : param target_len:                number of values to predict 
        : param batch_size:                number of samples per gradient update
        : param training_prediction:       type of prediction to make during training ('recursive', 'teacher_forcing', or
        :                                  'mixed_teacher_forcing'); default is 'recursive'
        : param teacher_forcing_ratio:     float [0, 1) indicating how much teacher forcing to use when
        :                                  training_prediction = 'teacher_forcing.' For each batch in training, we generate a random
        :                                  number. If the random number is less than teacher_forcing_ratio, we use teacher forcing.
        :                                  Otherwise, we predict recursively. If teacher_forcing_ratio = 1, we train only using
        :                                  teacher forcing.
        : param learning_rate:             float >= 0; learning rate
        : param dynamic_tf:                use dynamic teacher forcing (True/False); dynamic teacher forcing
        :                                  reduces the amount of teacher forcing for each epoch
        : return losses:                   array of loss function for each epoch
        '''
        
        #
        
        # initialize array of losses 
        losses = np.full(n_epochs, np.nan)

        optimizer = optim.Adam(self.parameters(), lr = learning_rate)
        criterion = nn.MSELoss().cuda()
        
        # calculate number of batch iterations
        n_batches = int(input_tensor.shape[1] / batch_size)

        with trange(n_epochs) as tr:
            for it in tr:
                
                batch_loss = 0.
                batch_loss_tf = 0.
                batch_loss_no_tf = 0.
                num_tf = 0
                num_no_tf = 0

                for b in range(n_batches):
                    # select data 
                    input_batch = input_tensor[:, b: b + batch_size, :].cuda()
                    target_batch = target_tensor[:, b: b + batch_size, :].cuda()
                    # outputs tensor
                    outputs = torch.zeros(target_len, batch_size, input_batch.shape[2])

                    # initialize hidden state
                    encoder_hidden = self.encoder.init_hidden(batch_size)

                    # zero the gradient
                    optimizer.zero_grad()

                    # encoder outputs
                    encoder_output, encoder_hidden = self.encoder(input_batch)

                    # decoder with teacher forcing
                    decoder_input = input_batch[-1, :, :]   # shape: (batch_size, input_size)
                    decoder_hidden = encoder_hidden

                    if training_prediction == 'recursive':
                        # predict recursively
                        for t in range(target_len): 
                            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
                            outputs[t] = decoder_output
                            decoder_input = decoder_output

                    if training_prediction == 'teacher_forcing':
                        # use teacher forcing
                        if random.random() < teacher_forcing_ratio:
                            for t in range(target_len): 
                                decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
                                outputs[t] = decoder_output
                                decoder_input = target_batch[t, :, :]

                        # predict recursively 
                        else:
                            for t in range(target_len): 
                                decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
                                outputs[t] = decoder_output
                                decoder_input = decoder_output

                    if training_prediction == 'mixed_teacher_forcing':
                        # predict using mixed teacher forcing
                        for t in range(target_len):
                            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
                            outputs[t] = decoder_output
                            
                            # predict with teacher forcing
                            if random.random() < teacher_forcing_ratio:
                                decoder_input = target_batch[t, :, :]
                            
                            # predict recursively 
                            else:
                                decoder_input = decoder_output

                    # compute the loss 
                    loss = criterion(outputs.cuda(), target_batch.cuda())
                    batch_loss += loss.item()
                    
                    # backpropagation
                    loss.backward()
                    optimizer.step()

                # loss for epoch 
                batch_loss /= n_batches 
                losses[it] = batch_loss

                # dynamic teacher forcing
                if dynamic_tf and teacher_forcing_ratio > 0:
                    teacher_forcing_ratio = teacher_forcing_ratio - 0.02 

                # progress bar 
                tr.set_postfix(loss="{0:.3f}".format(batch_loss))
                    
        return losses

    def predict(self, input_tensor, target_len):
        
        '''
        : param input_tensor:      input data (seq_len, input_size); PyTorch tensor 
        : param target_len:        number of target values to predict 
        : return np_outputs:       np.array containing predicted values; prediction done recursively 
        '''

        # encode input_tensor
        input_tensor = input_tensor.unsqueeze(1)     # add in batch size of 1
        encoder_output, encoder_hidden = self.encoder(input_tensor)

        # initialize tensor for predictions
        outputs = torch.zeros(target_len, input_tensor.shape[2])

        # decode input_tensor
        decoder_input = input_tensor[-1, :, :]
        decoder_hidden = encoder_hidden
        
        for t in range(target_len):
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
            outputs[t] = decoder_output.squeeze(0)
            decoder_input = decoder_output
            
        np_outputs = outputs.detach().numpy()
        
        return np_outputs


In [18]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np


def remove_max(x):
    x[x.argmax()] = np.median(x)
    print(x.argmax(), ":", x[x.argmax()], "=>", np.median(x))
    return x

def remove_min(x):
    
    x[x.argmin()] = np.median(x)
    print(x.argmin(), ":", x[x.argmin()], "=>", np.median(x))
    return x

def groupby_datapoint(df,
                      new_index_column_name='id',
                      old_index_column_name='승인일자',
                      target='sales',
                      option='ymdh'):
    if option == 'ymdh':
        # yyyymmddh
        df[new_index_column_name] = df[old_index_column_name].astype(str) + '0' + df['a'].astype(str) + '0000'
    elif option == 'ymd':
        # yyyymmdd
        df[new_index_column_name] = df[old_index_column_name].astype(str)
    elif option == 'ym':
        # yyyymm
        df[new_index_column_name] = df[old_index_column_name].apply(lambda x: str(x)[:6] + '01')
    else:
        print("Option value must be ymdh, ymd, ym")
        raise ValueError
        
    df_tm = df.reset_index(drop=True)[[new_index_column_name, target]]
    df_tm = df_tm.groupby([new_index_column_name])[target].sum().reset_index()
    df_tm.sort_values(by=new_index_column_name, inplace=True)
    df_tm.set_index([new_index_column_name], inplace=True)
    print("Group by {} {} -> {}".format(option, df.shape, df_tm.shape))
    df_tm.index = pd.to_datetime(df_tm.index)
    return df_tm 


def windowed_dataset(y, input_window = 5, output_window = 1, stride = 1, num_features = 1):
  
    '''
    create a windowed dataset
    
    : param y:                time series feature (array)
    : param input_window:     number of y samples to give model 
    : param output_window:    number of future y samples to predict  
    : param stide:            spacing between windows   
    : param num_features:     number of features (i.e., 1 for us, but we could have multiple features)
    : return X, Y:            arrays with correct dimensions for LSTM
    :                         (i.e., [input/output window size # examples, # features])
    '''
  
    L = y.shape[0]
    num_samples = (L - input_window - output_window) // stride + 1

    X = np.zeros([input_window, num_samples, num_features])
    Y = np.zeros([output_window, num_samples, 1])    
    
    for ff in np.arange(num_features):
        for ii in np.arange(num_samples):
            start_x = stride * ii
            end_x = start_x + input_window
            X[:, ii, ff] = y[start_x:end_x, ff]

            start_y = stride * ii + input_window
            end_y = start_y + output_window 
            if ff==0:
                Y[:, ii, ff] = y[start_y:end_y, ff]

    return X, Y


def numpy_to_torch(x):
    return torch.from_numpy(x).type(torch.Tensor)



In [19]:
import pandas as pd

def load_feather(path):
    return pd.read_feather(path)



In [20]:
#from preprocessing import groupby_datapoint, windowed_dataset, numpy_to_torch, remove_max
from sklearn.preprocessing import normalize, MinMaxScaler, StandardScaler
#from dataloader import load_feather
#from model import lstm_seq2seq
import argparse
#import torch
import pandas as pd
import re

if __name__ == '__main__':
    
#     args = argparse.ArgumentParser()
#     args.add_argument("--data_path", type=str, default='/home/workspace/data/.train/.task149/train/train_a.feather')
#     args.add_argument("--model_path", type=str, default='model.pth')
#     args.add_argument("--result_path", type=str, default='pred.txt')

#     args.add_argument("--epochs", type=int, default=100)
#     args.add_argument("--batch", type=int, default=128)
#     args.add_argument("--lr", type=float, default=0.0001)
    
#     config= args.parse_args()
    MODEL_PATH = 'model.pth'
    #RESULT_PATH = config.result_path

    EPOCHS = 90 #config.epochs 
    BATCH_SIZE = 128 #config.batch
    LR = 0.001 #config.lr
   
    IN_WINDOW_SIZE = 7*8
    OUT_WINDOW_SIZE = 7*5
    STRIDE = 7
    
    NUM_FEATURE = 1
    TARGET_COL = 'sales'   
    HIDDEN_SIZE = 128
    list1 = ['1','2','3','4','5']
    list2 = ['a','b','c','d']
    for u,r in enumerate(list2):
        sub_df = pd.read_csv('/home/workspace/data/.train/.task149/submission_sample/submission_'+r+'.csv')
        sub_len = sub_df.shape[0]
        
        sub_len = int(sub_len/5)
        for i, a in enumerate(list1):
            DATA_PATH = r+'_business_time'+a+'_edit.csv'
            #'/home/workspace/user-workspace/a_business.csv'
            #'
            #'/home/workspace/data/.train/.task149/train/train_a.feather'



            #     train_df = load_feather(DATA_PATH)

            #     # Preprocessing
            #     train_ts = groupby_datapoint(train_df, 'id', '승인일자', TARGET_COL, 'ymdh')
            #     print(train_ts)
            #     train_ts = train_ts[55:]
            #     train_amt = train_ts[TARGET_COL].values.reshape(-1, 1)
            #print(train_amt)
            # Load data
            train_df = pd.read_csv(DATA_PATH)
            if r =='c':
                BATCH_SIZE = 16 #config.batch
                STRIDE = 7
                train_df = train_df[305:] # 수정
                
            else:
                BATCH_SIZE = 32 #config.batch
                STRIDE = 7
                train_df = train_df[4:] # 수정
                LR = 0.001
            #11
            
            train_df = train_df.drop([train_df.columns[0],train_df.columns[1]],axis=1)

            # Preprocessing

            train_amt = train_df[TARGET_COL].values.reshape(-1, 1)

            #2019/1/12토요일
            #2020/8/1 - 토요일
            #train_amt = remove_max(train_amt) # convert max value to median  
            #train_amt = remove_min(train_amt) # convert max value to median
            
  

            scaler = MinMaxScaler().fit(train_amt)
            #train_amt = (train_amt - train_amt.min(axis=0))/(train_amt.max(axis=0) - train_amt.min(axis=0))


            train_amt = scaler.transform(train_amt)

            # split x_train, x_test
            x_train = train_amt[:-IN_WINDOW_SIZE] # Input for train
            x_test = train_amt[-IN_WINDOW_SIZE:]  # Input for test prediction
            x_train, y_train = windowed_dataset(x_train,
                                                IN_WINDOW_SIZE,
                                                OUT_WINDOW_SIZE,
                                                STRIDE,
                                                NUM_FEATURE)
            #y_train = y_train.reshape(175,-1)

            # to torch
            x_train = numpy_to_torch(x_train)
            y_train = numpy_to_torch(y_train)
            x_test = numpy_to_torch(x_test)

            # train
            model = lstm_seq2seq(input_size = x_train.shape[2], hidden_size = HIDDEN_SIZE).cuda()

            loss = model.train_model(x_train, y_train,
                                     n_epochs = EPOCHS,
                                     target_len = OUT_WINDOW_SIZE,
                                     batch_size = BATCH_SIZE, 
                                     training_prediction = 'mixed_teacher_forcing',
                                     teacher_forcing_ratio = 0.8, 
                                     learning_rate = LR,
                                     dynamic_tf = False)
            
            torch.save(model.state_dict(), 'model'+r+'_'+a+'.pth')

     

100%|██████████| 90/90 [00:05<00:00, 15.60it/s, loss=0.023]
100%|██████████| 90/90 [00:05<00:00, 15.78it/s, loss=0.019]
100%|██████████| 90/90 [00:05<00:00, 15.18it/s, loss=0.019]
100%|██████████| 90/90 [00:05<00:00, 15.32it/s, loss=0.022]
100%|██████████| 90/90 [00:05<00:00, 15.30it/s, loss=0.004]
100%|██████████| 90/90 [00:05<00:00, 15.29it/s, loss=0.021]
100%|██████████| 90/90 [00:05<00:00, 15.25it/s, loss=0.016]
100%|██████████| 90/90 [00:05<00:00, 15.14it/s, loss=0.014]
100%|██████████| 90/90 [00:05<00:00, 15.81it/s, loss=0.017]
100%|██████████| 90/90 [00:05<00:00, 15.18it/s, loss=0.018]
100%|██████████| 90/90 [00:05<00:00, 15.68it/s, loss=0.026]
100%|██████████| 90/90 [00:05<00:00, 15.41it/s, loss=0.036]
100%|██████████| 90/90 [00:06<00:00, 14.97it/s, loss=0.048]
100%|██████████| 90/90 [00:05<00:00, 15.61it/s, loss=0.045]
100%|██████████| 90/90 [00:05<00:00, 15.55it/s, loss=0.049]
100%|██████████| 90/90 [00:05<00:00, 15.02it/s, loss=0.027]
100%|██████████| 90/90 [00:05<00:00, 15.