<a href="https://colab.research.google.com/github/johnnyff/bigcontest2021/blob/main/damn_cnnlstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import warnings
from tqdm import tqdm

import datetime as dt

warnings.filterwarnings(action='ignore') 

In [None]:
df = pd.read_excel('/content/drive/MyDrive/bigcontest/data/bigcontest_dataset.xlsx', header = 1)
df

In [None]:
columns = ['num','year','month','day','hour','target',
           'average_rain_1','a_rain_1','b_rain_1','c_rain_1','d_rain_1','e_level_1','d_level_1',
           'average_rain_2','a_rain_2','b_rain_2','c_rain_2','d_rain_2','e_level_2','d_level_2',
           'average_rain_3','a_rain_3','b_rain_3','c_rain_3','d_rain_3','e_level_3','d_level_3',
           'average_rain_4','a_rain_4','b_rain_4','c_rain_4','d_rain_4','e_level_4','d_level_4',
           'average_rain_5','a_rain_5','b_rain_5','c_rain_5','d_rain_5','e_level_5','d_level_5',
           'average_rain_6','a_rain_6','b_rain_6','c_rain_6','d_rain_6','e_level_6','d_level_6'
           ]

In [None]:
df.columns =columns
# df
# for i in range(len(df)):
#   year= df['year'][i]
#   month= df['month'][i]
#   day= df['day'][i]
#   hour= df['hour'][i]
#   if (hour == 24):
#       day +=1
#       hour = 0
#   try:
#     df['hour'][i] = dt.datetime(year, month, day, hour)
#   except (ValueError):
#     day = 1
#     month += 1
#     df['hour'][i] = dt.datetime(year, month, day, hour)

# df


  


In [None]:
df.columns

In [None]:
df.drop(['num','year','month','day','hour'],axis =1 , inplace = True)
df

In [None]:
from sklearn.preprocessing import MinMaxScaler
from pandas.plotting import register_matplotlib_converters
from torch import nn, optim
import torch


In [None]:
df_train = df[:-160]
df_train.head()

In [None]:
df_test = df[-160 : ]
df_test.head()


In [None]:
def create_sequences(data, seq_length):
    xs = []
    ys = []
    for i in range(len(data)-seq_length):
        x = data.iloc[i:(i+seq_length)]
        y = data.iloc[i+seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)


In [None]:
train_cnt = int(len(df_train)*0.8)
df

In [None]:
y_train = df_train['target'][:train_cnt]
y_val = df_train['target'][train_cnt:]
X = df_train.drop("target", axis =1)
X_train = X[:train_cnt]
X_val = X[train_cnt:]

print(X_train.shape, y_train.shape)

In [None]:
def make_Tensor(array):
    return torch.from_numpy(array).float()

In [None]:
X_train = make_Tensor(X_train.values)
X_val = make_Tensor(X_val.values)
y_train = make_Tensor(y_train.values)
y_val = make_Tensor(y_val.values)


In [None]:
class CovidPredictor(nn.Module):
    def __init__(self, n_features, n_hidden, n_layers):
        super(CovidPredictor, self).__init__()
        self.n_hidden = n_hidden
        self.n_layers = n_layers
        self.c1 = nn.Conv1d(in_channels=1, out_channels=1, kernel_size = 2, stride = 1) # 1D CNN 레이어 추가
        self.lstm = nn.LSTM(
            input_size=n_features,
            hidden_size=n_hidden,
            num_layers=n_layers
        )
        self.linear = nn.Linear(in_features=n_hidden, out_features=1)
    def reset_hidden_state(self):
        self.hidden = (
            torch.zeros(self.n_layers, self.seq_len-1, self.n_hidden),
            torch.zeros(self.n_layers, self.seq_len-1, self.n_hidden)
        )
    def forward(self, sequences):
        sequences = self.c1(sequences.view(1, 1, -1))
        lstm_out, self.hidden = self.lstm(
            sequences.view(len(sequences), 1, -1),
            self.hidden
        )
        last_time_step = lstm_out.view(1, len(sequences), self.n_hidden)[-1]
        y_pred = self.linear(last_time_step)
        return y_pred

In [None]:
def train_model(model, train_data, train_labels, val_data=None, val_labels=None, num_epochs=100, verbose = 10, patience = 10):
    loss_fn = torch.nn.L1Loss() #
    optimiser = torch.optim.Adam(model.parameters(), lr=0.001)
    train_hist = []
    val_hist = []
    for t in range(num_epochs):

        epoch_loss = 0

        for idx, seq in enumerate(train_data): # sample 별 hidden state reset을 해줘야 함 

            model.reset_hidden_state()

            # train loss
            seq = torch.unsqueeze(seq, 0)
            y_pred = model(seq)
            loss = loss_fn(y_pred[0].float(), train_labels[idx]) # 1개의 step에 대한 loss

            # update weights
            optimiser.zero_grad()
            loss.backward()
            optimiser.step()

            epoch_loss += loss.item()

        train_hist.append(epoch_loss / len(train_data))

        if val_data is not None:

            with torch.no_grad():

                val_loss = 0

                for val_idx, val_seq in enumerate(val_data):

                    model.reset_hidden_state() #seq 별로 hidden state 초기화 

                    val_seq = torch.unsqueeze(val_seq, 0)
                    y_val_pred = model(val_seq)
                    val_step_loss = loss_fn(y_val_pred[0].float(), val_labels[val_idx])

                    val_loss += val_step_loss
                
            val_hist.append(val_loss / len(val_data)) # val hist에 추가

            ## verbose 번째 마다 loss 출력 
            if t % verbose == 0:
                print(f'Epoch {t} train loss: {epoch_loss / len(train_data)} val loss: {val_loss / len(val_data)}')

            ## patience 번째 마다 early stopping 여부 확인
            if (t % patience == 0) & (t != 0):
                
                ## loss가 커졌다면 early stop
                if val_hist[t - patience] < val_hist[t] :

                    print('\n Early Stopping')

                    break

        elif t % verbose == 0:
            print(f'Epoch {t} train loss: {epoch_loss / len(train_data)}')

            
    return model, train_hist, val_hist

In [None]:
model = CovidPredictor(
    n_features=1,
    n_hidden=4,
    n_layers=1
)

In [None]:
model, train_hist, val_hist = train_model(
    model,
    X_train,
    y_train,
    X_val,
    y_val,
    num_epochs=100,
    verbose=10,
    patience=50
)