In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import time
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, Bidirectional, GRU
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
from numpy import loadtxt
from torch.utils.data import TensorDataset, DataLoader
from keras.models import load_model
from time import sleep
from tqdm import tqdm, trange
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
import torch.optim as optim


In [None]:
dataset_path = ('testbed_flat_tms_up.csv')
dataset = pd.read_csv(dataset_path, parse_dates=["Date"])
dataset.shape

In [None]:
dataset=dataset.set_index(['Date'])
dataset.head()

In [None]:
total_steps = dataset.shape[0]
train_size = int(total_steps * 0.7)
val_size = int(total_steps * 0.1)

train_df, val_df, test_df = dataset[0:train_size], dataset[train_size:train_size + val_size], \
                                dataset[train_size + val_size: ] 

In [None]:
train_df=pd.DataFrame(train_df)

In [None]:
test_df=pd.DataFrame(test_df)

In [None]:
val_df=pd.DataFrame(val_df)

In [None]:
sc = MinMaxScaler(feature_range=(0,1))
sc=sc.fit(train_df)
train_df = sc.transform(train_df)
test_df = sc.transform(test_df)
val_df = sc.transform(val_df)
#print("train_df", train_df)
#print("test_df", test_df)
train_df=pd.DataFrame(train_df) 
test_df=pd.DataFrame(test_df)
val_df=pd.DataFrame(val_df)

In [None]:
def create_dataset(X, y , seq_len=1):
    Xs, ys = [], []
    for i in tqdm (range(len(X)-seq_len)):
        v=X.iloc[i:(i+seq_len)].to_numpy()
        Xs.append(v)
        ys.append(y.iloc[i+seq_len])
    return np.array(Xs), np.array(ys)

In [None]:
class LSTM_TM(nn.Module):
    def __init__(self, input_dim, hidden_dim, seq_len):
        super(LSTM_TM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        #self.n_layer=n_layer
        self.seq_len = seq_len
        #self.pred_len = pred_len
        
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size= hidden_dim,
            batch_first = True,
            #bidirectional=True,
            dropout=0.2)
        self.fc = nn.Linear(hidden_dim, input_dim)
        #self.time_linear = nn.Linear(seq_len, pred_len)
        
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc(x)
        y_pred = x[:,-1]
        return y_pred

In [None]:
EPS = 1e-8


def rse(preds, labels):
    return torch.sum((preds - labels) ** 2) / torch.sum((labels + EPS) ** 2)


def mae(preds, labels):
    return torch.mean(torch.abs(preds - labels))


def mse(preds, labels):
    return torch.mean((preds - labels) ** 2)

def mape(preds, labels):
    return torch.mean(torch.abs((preds - labels) / (labels + EPS)))

def rmse(preds, labels):
    return torch.sqrt(torch.mean((preds - labels) ** 2))


def calc_metrics(preds, labels):
    return rse(preds, labels), mae(preds, labels), mse(preds, labels), mape(preds, labels), rmse(preds, labels)


In [None]:
class Logger:

    def __init__(self, logdir):
        
        print('---> logdir:', logdir)

        os.makedirs(logdir, exist_ok=True)

        self.writer = SummaryWriter(log_dir=logdir)
        self.logdir = logdir

        self.min_val_loss = np.inf
        self.patience = 0
        self.best_model_save_path = os.path.join(logdir, 'best_model.pth')

        self.metrics = []
        self.stop = False

    def summary(self, m, model, epoch, patience):
        m = pd.Series(m)
        self.metrics.append(m)
        if m.val_loss < self.min_val_loss:
            torch.save(model.state_dict(), self.best_model_save_path)
            self.patience = 0
            self.min_val_loss = m.val_loss
        else:
            self.patience += 1
        met_df = pd.DataFrame(self.metrics)
        description = 'train loss: {:.5f} val_loss: {:.5f} | best val_loss: {:.5f} patience: {}'.format(
            m.train_loss,
            m.val_loss,
            self.min_val_loss,
            self.patience)

        met_df.round(6).to_csv('{}/train_metrics.csv'.format(self.logdir))

        self.writer.add_scalar('Loss/Train', m.train_loss, epoch)
        self.writer.add_scalar('Loss/Val', m.val_loss, epoch)

        if self.patience >= patience:
            self.stop = True
        return description

In [None]:
def training(model, optimizer, x, y):
    model.train()
    optimizer.zero_grad()
    # input = torch.nn.functional.pad(input, (1, 0, 0, 0))

    output = model(x)  # now, output = [bs, seq_y, n]
    
    predict = output

    if len(predict.size()) != len(y.size()):
        predict = torch.reshape(predict, y.shape)

    loss = lossfn(predict, y)
    rse, mae, mse, mape, rmse = calc_metrics(predict, y)
    loss.backward()
    optimizer.step()
    return loss.item(), rse.item(), mae.item(), mse.item(), mape.item(), rmse.item()


def evaluating(model, x, y):

    model.eval()

    output = model(x)  # now, output = [bs, seq_y, n]
    predict = output
    
    predict = torch.clamp(predict, min=0., max=10e10)
    

    loss = lossfn(predict, y)
    rse, mae, mse, mape, rmse = calc_metrics(predict, y)

    return loss.item(), rse.item(), mae.item(), mse.item(), mape.item(), rmse.item()


def testing(model, test_loader, out_seq_len):
    model.eval()
    outputs = []
    y_real = []
    for i, (x, y) in enumerate(test_loader):

        preds = model(x)
      
        preds = preds

        outputs.append(preds)
        y_real.append(y)

    yhat = torch.cat(outputs, dim=0)
    y_real = torch.cat(y_real, dim=0)
    test_met = []

    yhat[yhat < 0.0] = 0.0

    if len(yhat.size()) != len(y_real.size()):
        yhat = torch.reshape(yhat, y_real.shape)

    test_met.append([x.item() for x in calc_metrics(yhat, y_real)])
    test_met_df = pd.DataFrame(test_met, columns=['rse', 'mae', 'mse', 'mape', 'rmse']).rename_axis('t')
    return test_met_df, y_real, yhat


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'





input_dim = 144
hidden_dim = 100
lossfn = torch.nn.MSELoss()
seq_length=[24, 12, 6, 3]
runs = [1,2,3,4,5]


#n_layer=2

for seq_len in seq_length:
    print('***seq_len***', seq_len)
    for run in runs:
        print('<--run-->', run)
        model = LSTM_TM(input_dim, hidden_dim, seq_len=seq_len)
        model.to(device)

        optimizer = optim.Adam(model.parameters(), lr=0.0001)
        x_train, y_train = create_dataset(train_df, train_df, seq_len)
        x_test, y_test = create_dataset(test_df, test_df, seq_len)
        x_val, y_val = create_dataset(val_df, val_df, seq_len)

        train_loader = TensorDataset(torch.from_numpy(x_train).float(),torch.from_numpy(y_train).float())   
        val_loader = TensorDataset(torch.from_numpy(x_val).float(),torch.from_numpy(y_val).float())
        test_loader = TensorDataset(torch.from_numpy(x_test).float(),torch.from_numpy(y_test).float())

        train_loader = DataLoader(train_loader, batch_size=64, shuffle=False)
        val_loader = DataLoader(val_loader, batch_size=64, shuffle=False)
        test_loader = DataLoader(test_loader, batch_size=64, shuffle=False)
        
        parent_logs_path='logs'
        logdir='lstm'
        dataset='abilene'
        logdir=logdir+'_data_{}_seq_{}'.format(dataset, seq_len)
        _logger = os.path.join(parent_logs_path, logdir, 'run_{}'.format(run))
        logger = Logger(logdir=_logger)
        
        print('|--- Training ---|')
        epochs=500
        #iterator = 10
        iterator = trange(epochs)
        tmps_train = time.time()

        for epoch in iterator:
            train_loss, train_rse, train_mae, train_mse, train_mape, train_rmse = [], [], [], [], [], []
            for i, (x, y) in enumerate(train_loader):
                # x -->  [b, seq_x, n, f]
                # y --> [b, seq_y, n]

                loss, t_rse, t_mae, t_mse, t_mape, t_rmse = training(model, optimizer, x, y)

                train_loss.append(loss)
                train_rse.append(t_rse)
                train_mae.append(t_mae)
                train_mse.append(t_mse)
                train_mape.append(t_mape)
                train_rmse.append(t_rmse)


            with torch.no_grad():
                val_loss, val_rse, val_mae, val_mse, val_mape, val_rmse = [], [], [], [], [], []
                for i, (x, y) in enumerate(val_loader):

                    metrics = evaluating(model, x, y)
                    val_loss.append(metrics[0])
                    val_rse.append(metrics[1])
                    val_mae.append(metrics[2])
                    val_mse.append(metrics[3])
                    val_mape.append(metrics[4])
                    val_rmse.append(metrics[5])

            m = dict(train_loss=np.mean(train_loss), train_rse=np.mean(train_rse),
                    train_mae=np.mean(train_mae), train_mse=np.mean(train_mse),
                    train_mape=np.mean(train_mape), train_rmse=np.mean(train_rmse),
                    val_loss=np.mean(val_loss), val_rse=np.mean(val_rse),
                    val_mae=np.mean(val_mae), val_mse=np.mean(val_mse),
                    val_mape=np.mean(val_mape), val_rmse=np.mean(val_rmse))

            description = logger.summary(m, model, epoch, patience=50 )

            if logger.stop:
                break

            description = 'Epoch: {} '.format(epoch) + description
            iterator.set_description(description)

        tmps_t2 = time.time() - tmps_train
        print("Train_Temps = %f" % tmps_t2)

        model.load_state_dict(torch.load(logger.best_model_save_path))

        print('|--- Testing ---|')
        tmps_test = time.time()
        with torch.no_grad():
            test_met_df, y_real, yhat = testing(model, test_loader, out_seq_len=1)
            tmps_test2 = time.time() - tmps_test
            print("Test_Temps = %f" % tmps_test2)
        print('Test metric: ', test_met_df)
        test_met_df.to_csv(os.path.join(logger.logdir, 'test_metrics.csv'))
        np.save(os.path.join(logger.logdir, 'y_real_data'), y_real.detach().cpu().numpy())
        np.save(os.path.join(logger.logdir, 'y_pred_data'), yhat.detach().cpu().numpy())
        


In [None]:
#cnn_lstm_model = Sequential([
    #Conv1D(filters=32, 
           #kernel_size=(CONV_WIDTH,), 
           #activation='relu'),
    #LSTM(32, return_sequences=True),
    #LSTM(32, return_sequences=True),
    #Dense(1)
#])

#history = compile_and_fit(cnn_lstm_model, conv_window)

#val_performance['CNN+LSTM'] = cnn_lstm_model.evaluate(conv_window.val)
#performance['CNN+LSTM'] = cnn_lstm_model.evaluate(conv_window.test, verbose=0)