In [8]:
from torch import nn
import torch
import pandas as pd
import numpy as np
import csv
import random
import sklearn.model_selection
from torch.utils.tensorboard.writer import SummaryWriter
import tqdm
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
import os

In [9]:
def read_csv(filename):
    with open(filename) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        data = []
        temp = []
        i = 0
        for row in csv_reader:
            if i>0:
                if row[2] =='' or row[3]=='' or row[4]=='':
                    if temp != []:
                        data.append(temp)
                    temp = []
                    continue
                temp.append([np.float64(row[2]),np.float64(row[3]),np.float64(row[4]),np.float32(row[1].split()[1].split(':')[0])])
            i +=1
        if len(temp)>0:
            data.append(temp)
    return data

In [10]:

class AirDataset_all(Dataset):

    def __init__(self, mode ='train'):
        self.mode = mode
        self.path = [os.path.join(f'dataset/data_train/input', f) for f in os.listdir(f'dataset/data_train/input')]
        self.delay = 36
        self.future = 24

        self.train = []
        self.test = []
        self.pre_process()
    def __len__(self):
        if self.mode =='train':
            return len(self.train)
        else:
            return len(self.test)
    def pre_process(self):
        data = []
        for path in self.path:
            raw = read_csv(path)
            for period in raw:
                if len(period)>=(self.delay+self.future):
                    for i in range(len(period)-self.delay-self.future+1):
                        data.append(np.stack(period[i:i+self.delay+self.future]))
                        
            train, test = sklearn.model_selection.train_test_split(data, test_size=0.2,shuffle=False)
            self.train += train
            self.test += test
            data = []
    def __getitem__(self, idx):
        if self.mode == 'train':
            input = torch.FloatTensor(self.train[idx][0:self.delay,:])
            output = torch.FloatTensor([self.train[idx][self.delay:,0]]).T.squeeze(-1)
        if self.mode == 'test':
            input = torch.FloatTensor(self.test[idx][0:self.delay,:])
            output = torch.FloatTensor([self.test[idx][self.delay:,0]]).T.squeeze(-1)
        return input, output

def get_loader_all():
    train_loader  = DataLoader(dataset=AirDataset_all(mode='train'), 
                               drop_last=True, 
                               shuffle=True,
                               batch_size=16)
    
    dev_loader  = DataLoader(dataset=AirDataset_all(mode='test'), 
                             drop_last=True, 
                             shuffle=False,
                             batch_size=16)

    return train_loader, dev_loader


In [11]:

class LSTM(nn.Module):

    def __init__(self):
        super().__init__()
        self.lstm  = nn.LSTM(4, 64, 2,bidirectional = True, batch_first=True)
        self.lstm1  = nn.LSTM(64, 128, 2,bidirectional = True, batch_first=True)
        self.fc1 = nn.Linear(256, 64)
        self.fc2 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()

        self.delay = 24
        self.future = 24
    def forward(self, x):
        _, (h, c) = self.lstm(x)
        a = h[-1,:,:]
        a = a.unsqueeze(1).repeat(1, self.future, 1)
        a, (hidden_state, cell_state) = self.lstm1(a)
        a = self.fc1(a)
        a = self.tanh(a)
        a = self.fc2(a)
        a = x[:,-self.future:,0]+a[:,:,0]
        a = nn.ReLU()(a)
        return a
    def compute_loss(self, inp, desire):
        output = self(inp)
        loss = torch.mean(torch.abs((desire-output)/desire))
        return loss 


In [12]:

def get_ckpt_folder():
    folder = f'ckpt/predict_24/checkpoints'
    if not os.path.exists(folder):
        os.makedirs(folder)
    return folder

def get_logs_folder():
    return get_ckpt_folder().replace('checkpoints', 'logs')

def compute_mdape(y, y_hat):
    return np.median(torch.abs((y-y_hat)/y))

def compute_mape(y, y_hat):
    return torch.mean(torch.abs((y-y_hat)/y))

def compute_mae(y, y_hat):
    return torch.mean(torch.abs(y-y_hat))

def compute_rmse(y, y_hat):
    return torch.sqrt(torch.mean(torch.pow(y-y_hat,2)))
        
def compute_r2(y, y_hat):
    return 1- (torch.sum(torch.pow(y-y_hat,2)))/(torch.sum(torch.pow(y-torch.mean(y),2)))
        
def compute_metrics(x, y, y_hat):
    # initialize metrics
    metrics = {}
    # MDAPE
    metrics['MDAPE'] = compute_mdape(y, y_hat)
    # MAPE
    metrics['MAPE'] = compute_mape(y, y_hat, )
    # MAE
    metrics['MAE'] = compute_mae(y, y_hat, )
    # RMSE
    metrics['RMSE'] = compute_rmse(y, y_hat, )
    # R2
    metrics['R2'] = compute_r2(y, y_hat, )
    return metrics

In [13]:
class Trainer:

    def __init__(self):

        # clear cache
        self.clear_cache()
        # get loader
        self.train_loader, self.dev_loader = get_loader_all()
        # get model
        self.model = LSTM().to('cuda:0')
        # get optimizer
        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=1e-4)
        # get writer
        self.writer = SummaryWriter(get_logs_folder())
        # get iteration
        self.iteration = 0
        # get epoch
        self.num_epoch = 5
        self.limit_train_batch = -1
        self.log_iter = 10
        self.eval_iter = 1
    def train_step(self, batch, batch_idx):
        # extract data
        x, y = batch
        x = x.to('cuda:0')
        y = y.to('cuda:0')
        self.optimizer.zero_grad()
        # compute loss
        loss_dict = {'loss': self.model.compute_loss(x, y)}
        # backward and update weight
        loss_dict['loss'].backward()
        # clip grad norm
        # self.clip_grad_norm()
        self.optimizer.step()
        return loss_dict

    def validation_step(self, batch, batch_idx, mode='dev'):
        with torch.no_grad():
            # extract data
            x, y = batch
            x = x.to('cuda:0')

            # compute loss
            y_hat_device = self.model(x)
            y_hat = y_hat_device.detach().cpu().numpy()

            # compute metrics
            metrics = compute_metrics(x, y, y_hat)
        cleaned_metrics = {}
        for key in metrics:
            cleaned_metrics[f'{mode}:{key}'] = metrics[key]
        return cleaned_metrics

    def limit_train_batch_hook(self, batch_idx):
        if self.limit_train_batch > 0:
            if batch_idx > self.limit_train_batch:
                return True
        return False

    def limit_val_batch_hook(self, batch_idx):
        if self.limit_val_batch > 0:
            if batch_idx > self.limit_val_batch:
                return True
        return False

    def get_checkpoint_path(self):
        ckpt_folder = get_ckpt_folder()
        ckpt_name = 'predict24'
        return os.path.join(ckpt_folder, ckpt_name) + '.ckpt'

    def clear_cache(self):
        ckpt_folder = get_ckpt_folder()
        logs_folder = get_logs_folder()
        if self.clear_cache:
            os.system(f'rm -rf {ckpt_folder} {logs_folder}')

    # def clip_grad_norm(self):
    #     torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)

    def write_dev_metric_to_tensorboard(self, epoch, metrics):
        # compute average
        for key in metrics:
            metrics[key] = np.mean(metrics[key])
        # display
        print('Evaluate epoch:{}: MDAPE={:0.2f} MAPE={:0.2f},  MAE={:0.2f}, RMSE={:0.2f}, R2={:0.2f}' \
            .format(epoch, metrics['dev:MDAPE'], metrics['dev:MAPE'], metrics['dev:MAE'], metrics['dev:RMSE'],metrics['dev:R2']))
        # write to tensorboard
        self.writer.add_scalars('validation metric', metrics, epoch)

    def write_train_metric_to_tensorboard(self, loss_dicts):
        for key in loss_dicts:
            loss_dicts[key] = np.mean(loss_dicts[key])
        self.writer.add_scalars('training metric', loss_dicts, self.iteration)
        
    def load_checkpoint(self):
        # self.epoch = 0
        path = self.get_checkpoint_path()
        if os.path.exists(path):
            checkpoint = torch.load(path)
            print('[+] checkpoint loaded:', path)
            self.model.load_state_dict(checkpoint['model_state_dict'])
            # self.epoch = checkpoint['epoch']
            self.iteration = checkpoint['iteration']


    def fit(self):
        # load checkpoint
        print(self.model)
        print('Trainable parameters:', sum(p.numel() for p in self.model.parameters() if p.requires_grad))
        print('Non-trainable parameters:', sum(p.numel() for p in self.model.parameters() if not p.requires_grad))
        self.load_checkpoint()
        for epoch in range(1,self.num_epoch):
            # train
            loss_dicts = None
            self.model.train()
            with tqdm.tqdm(self.train_loader, unit="it") as pbar:
                pbar.set_description(f'Epoch {epoch}')
                for batch_idx, batch in enumerate(pbar):

                    # perform training step
                    loss_dict = self.train_step(batch, batch_idx)
                    if loss_dicts is None:
                        loss_dicts = {}
                        for key in loss_dict:
                            loss_dicts[key] = []
                    for key in loss_dict:
                        loss_dicts[key].append(float(loss_dict[key].detach().cpu()))

                    # limit train batch hook
                    if self.limit_train_batch_hook(batch_idx):
                        break

                    # set postfix
                    kwargs = {}
                    for key in loss_dict:
                        kwargs[key] = float(loss_dict[key].detach().cpu())
                    pbar.set_postfix(**kwargs)

                    # log
                    # self.epoch = epoch
                    self.iteration += 1
                    if self.iteration % self.log_iter == 0:
                        self.write_train_metric_to_tensorboard(loss_dicts)
                        loss_dicts = None

            ##########################################################################################
            # evaluate
            if (epoch+1) % self.eval_iter == 0:
                self.model.eval()
                metrics = {}
                with tqdm.tqdm(self.dev_loader, unit="it") as pbar:
                    pbar.set_description(f'Evaluate epoch - dev {epoch}')
                    for batch_idx, batch in enumerate(pbar):
                        # validate
                        batch_metrics = self.validation_step(batch, batch_idx, mode='dev')
                        # accumulate valilation metrics
                        for key in batch_metrics:
                            if key not in metrics.keys():
                                metrics[key] = []
                        for key in batch_metrics:
                            metrics[key] += [batch_metrics[key]]
                        pbar.set_postfix(MDAPE=np.mean(metrics['dev:MDAPE']))
                self.write_dev_metric_to_tensorboard(epoch, metrics)

            # save checkpoint
            self.save_checkpoint(epoch)
    def save_checkpoint(self, epoch):
        # save checkpoint
        torch.save({
            'iteration': self.iteration,
            # 'epoch': epoch,
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            }, self.get_checkpoint_path())
        print('[+] checkpoint saved')

        os.system('cp {} {}'.format(self.get_checkpoint_path(), self.get_checkpoint_path().replace('.ckpt', f'_epoch_{epoch}.ckpt')))
        print('[+] checkpoint copied')

In [15]:
trainer = Trainer()
trainer.fit()

LSTM(
  (lstm): LSTM(4, 64, num_layers=2, batch_first=True, bidirectional=True)
  (lstm1): LSTM(64, 128, num_layers=2, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=256, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (tanh): Tanh()
)
Trainable parameters: 745601
Non-trainable parameters: 0


Epoch 1: 100%|██████████| 3387/3387 [01:03<00:00, 53.49it/s, loss=0.298]
Evaluate epoch - dev 1: 100%|██████████| 847/847 [00:06<00:00, 134.94it/s, MDAPE=0.473]


Evaluate epoch:1: MDAPE=0.47 MAPE=0.66,  MAE=23.03, RMSE=28.37, R2=-2.46
[+] checkpoint saved
[+] checkpoint copied


Epoch 2: 100%|██████████| 3387/3387 [01:00<00:00, 55.98it/s, loss=0.226]
Evaluate epoch - dev 2: 100%|██████████| 847/847 [00:04<00:00, 202.73it/s, MDAPE=0.452]


Evaluate epoch:2: MDAPE=0.45 MAPE=0.62,  MAE=22.24, RMSE=27.55, R2=-2.15
[+] checkpoint saved
[+] checkpoint copied


Epoch 3: 100%|██████████| 3387/3387 [00:58<00:00, 57.67it/s, loss=0.237]
Evaluate epoch - dev 3: 100%|██████████| 847/847 [00:04<00:00, 195.28it/s, MDAPE=0.453]


Evaluate epoch:3: MDAPE=0.45 MAPE=0.60,  MAE=21.90, RMSE=27.09, R2=-2.04
[+] checkpoint saved
[+] checkpoint copied


Epoch 4: 100%|██████████| 3387/3387 [00:59<00:00, 56.95it/s, loss=0.347]
Evaluate epoch - dev 4: 100%|██████████| 847/847 [00:04<00:00, 200.66it/s, MDAPE=0.441]


Evaluate epoch:4: MDAPE=0.44 MAPE=0.59,  MAE=21.41, RMSE=26.57, R2=-1.91
[+] checkpoint saved
[+] checkpoint copied
