<a href="https://colab.research.google.com/github/johnnyff/bigcontest2021/blob/logscale/dam_cnnlstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import warnings
from tqdm import tqdm
import torch
import datetime as dt
import torch.nn as nn

warnings.filterwarnings(action='ignore') 

In [None]:
df = pd.read_excel('/content/drive/MyDrive/bigcontest/data/bigcontest_dataset.xlsx', header = 1)
df

In [None]:
columns = ['num','year','month','day','hour','target',
           'average_rain_1','a_rain_1','b_rain_1','c_rain_1','d_rain_1','e_level_1','d_level_1',
           'average_rain_2','a_rain_2','b_rain_2','c_rain_2','d_rain_2','e_level_2','d_level_2',
           'average_rain_3','a_rain_3','b_rain_3','c_rain_3','d_rain_3','e_level_3','d_level_3',
           'average_rain_4','a_rain_4','b_rain_4','c_rain_4','d_rain_4','e_level_4','d_level_4',
           'average_rain_5','a_rain_5','b_rain_5','c_rain_5','d_rain_5','e_level_5','d_level_5',
           'average_rain_6','a_rain_6','b_rain_6','c_rain_6','d_rain_6','e_level_6','d_level_6'
           ]

In [None]:
df.columns =columns



  


In [None]:
df.columns

In [None]:
df.drop(['year','month','day','hour'],axis =1 , inplace = True)
df

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device =='cuda':
    torch.cuda.manual_seed_all(777)
    print(torch.cuda.get_device_name(0))

In [None]:
import torch
from torch.nn import Module, LSTM, Linear
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from torch.autograd import Variable

In [None]:
train_data = df[:-160]
train_data[train_data['num']==1]

In [None]:
scaler = MinMaxScaler()
scaler.fit(train_data.iloc[:,1:])
scaler

In [None]:
def sliding_windows(data, seq_length):
    x = []
    y = []
    for j in range(1,26):
      part = data[data['num']==j]
      part.drop(['num'], axis =1 , inplace =True)
      part = scaler.transform(part)
      temp =[]
      for i in range(seq_length):
        temp.append(part[0,1:])

      for i in range(seq_length -1):
        x.append(temp)
        y.append(part[0,0])

      for i in range(len(part)-seq_length):
      
        _x = part[i:(i+seq_length),1:]
        _y = part[i+seq_length-1,0]
        x.append(_x)
        y.append(_y)
    return np.array(x),np.array(y)

In [None]:
seq_length = 3
x, y = sliding_windows(train_data, seq_length)




In [None]:
x.shape

In [None]:
y.shape

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1422)
print(X_train.shape, X_test.shape)

In [None]:
from torch.utils.data import TensorDataset, DataLoader
X_train_t = torch.Tensor(X_train)
X_test_t = torch.Tensor(X_test)
y_train_t = torch.Tensor(y_train)
y_test_t = torch.Tensor(y_test)

In [None]:
train_batch_size = 5
test_batch_size =1 
train_loader = DataLoader(TensorDataset(X_train_t, y_train_t), shuffle=True, batch_size=train_batch_size, num_workers = 3)
test_loader = DataLoader(TensorDataset(X_test_t, y_test_t), shuffle=False, batch_size=test_batch_size,num_workers = 3)

In [None]:
class convLSTM(nn.Module):
    def __init__(self, n_features, n_hidden, n_layers):
        super(convLSTM, self).__init__()
        self.n_hidden = n_hidden
        self.n_layers = n_layers
        self.c1 = nn.Conv1d(in_channels=3, out_channels=3, kernel_size = 3, padding =1 ,stride = 2) # 1D CNN 레이어 추가
        self.lstm = nn.LSTM(
            input_size=n_features//2,
            hidden_size=n_hidden,
            num_layers=n_layers,
            bidirectional = True
        )


        self.linear1 = nn.Linear(n_hidden*2, n_hidden)
        self.linear2 = nn.Linear(n_hidden,1)
        torch.nn.init.xavier_uniform_(self.linear1.weight)
        torch.nn.init.xavier_uniform_(self.linear2.weight)

    def reset_hidden_state(self):
        self.hidden = (
            torch.zeros(2*self.n_layers, self.seq_len, self.n_hidden),
            torch.zeros(2*self.n_layers, self.seq_len, self.n_hidden)
        )
    def forward(self, sequences):
        sequences = self.c1(sequences)
        lstm_out, self.hidden_cell = self.lstm(sequences)
        y_pred = self.linear1(lstm_out)
        y_pred = self.linear2(y_pred)
        y_pred = y_pred.view(len(sequences),-1)
        return y_pred[:,-1]

In [None]:
model = convLSTM(n_features =42, n_hidden= 120, n_layers = 1).to(device)
loss_function = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
model

In [None]:
for seq, labels in test_loader:
  seq = seq.to(device)
  print(model(seq))
  break

In [None]:
epoch_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 250, gamma=0.8, verbose = True)


In [None]:
epochs = 2000
patience = 100
min_val_loss = 9999
counter = 0
batch_loss = []
val_loss = []
preds =[]
gts = []
for i in range(epochs):
    mse_train =0 
    for seq, labels in train_loader:
        seq = seq.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(2, seq.size(0), model.n_hidden),
                        torch.zeros(2, seq.size(0), model.n_hidden))

        y_pred = model(seq)
        loss = loss_function(y_pred, labels)
        loss.backward()
        mse_train+= loss.item()
        optimizer.step()
    batch_loss.append(mse_train)
    epoch_scheduler.step()

   
    if (i%5==0):
      with torch.no_grad():
          mse_val = 0
          total = 1
          for seq, labels in test_loader:
              total = len(test_loader)
              seq = seq.to(device)
              labels = labels.to(device).view([-1,seq.size(0)])
              y_pred = model(seq).view([-1,seq.size(0)])

              labels = torch.tensor(labels)
              y_pred = torch.tensor(y_pred)
              
              temp = (seq[:,len(seq)-1,:])
              gt = scaler.inverse_transform(torch.cat([labels,temp], dim =1).detach().cpu().clone().numpy())[0][0]
              pred = scaler.inverse_transform(torch.cat([y_pred,temp], dim =1).detach().cpu().clone().numpy())[0][0]
              loss = loss_function(torch.tensor(pred), torch.tensor(gt)).item()
              gts.append(gt)
              preds.append(pred)              
              mse_val += loss

          val_loss.append((mse_val/total)**0.5)

      print("iteration : {} , rsme : {}".format(i, (mse_val/total)**0.5))
      if min_val_loss > mse_val**0.5:
          min_val_loss = mse_val**0.5
          print("Saving...")
          torch.save(model.state_dict(), "/content/drive/MyDrive/bigcontest/danm_convlstm_model_2days.pt")
          counter = 0
      else: 
          counter += 1
      
      if counter == patience:
          break
    
    

In [None]:
print(min(val_loss))
plt.plot(val_loss)

In [None]:
model.eval()
with torch.no_grad():
    gt = []
    preds = []
    mse_val = 0
    total = 0
    for seq, labels in test_loader:
        total = len(test_loader)
        seq = seq.to(device)
        labels = labels.to(device).view([-1,seq.size(0)])
        y_pred = model(seq).view([-1,seq.size(0)])

        labels = torch.tensor(labels)
        y_pred = torch.tensor(y_pred)

        temp = (seq[:,len(seq)-1,:])
        gt = scaler.inverse_transform(torch.cat([labels,temp], dim =1).detach().cpu().clone().numpy())[0][0]
        pred = scaler.inverse_transform(torch.cat([y_pred,temp], dim =1).detach().cpu().clone().numpy())[0][0]
        loss = loss_function(torch.tensor(pred), torch.tensor(gt)).item()
        gts.append(gt)
        preds.append(pred)              
        mse_val += loss
    

### Checking with entire dataset

In [None]:
model.eval()
with torch.no_grad():  
    final_gt = []
    final_pred = []
    final_loss = 0 
    for i in range(len(x)):
        seq = torch.FloatTensor(x[i].reshape(1,3,-1)).to(device)
        pr = model(seq).reshape(1,-1).to(device)
        label = torch.tensor(y[i]).reshape(1,-1).to(device)
        temp = (seq[:,len(seq)-1,:])

        gt = scaler.inverse_transform(torch.cat([label,temp], dim =1).detach().cpu().clone().numpy())[0][0]
        pred = scaler.inverse_transform(torch.cat([pr,temp], dim =1).detach().cpu().clone().numpy())[0][0]
        loss = loss_function(torch.tensor(pred), torch.tensor(gt)).item()
        final_gt.append(gt)
        final_pred.append(pred)
        final_loss+=loss

In [None]:

rmse = (final_loss/len(final_gt))**0.5
rmse

In [None]:
feed = pd.DataFrame({'predict': final_pred, 'GT ' : final_gt})
feed

In [None]:
plt.plot(final_gt, label ='gt')
plt.plot(final_pred,label = 'pred')
plt.legend()
plt.show()