In [17]:
from torch import nn
import torch
import pandas as pd
import numpy as np
import csv
import random
import sklearn.model_selection
from torch.utils.tensorboard.writer import SummaryWriter
import tqdm
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
import os
torch.backends.cudnn.benchmark = True

In [30]:
df = pd.read_csv('dataset/data_train/location_input.csv')
lo_mean = df['longitude'].mean()
lo_std = df['longitude'].std()
la_mean = df['latitude'].mean()
la_std = df['latitude'].std()
def location(filename):
    with open(filename) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        i = 0
        station = {}
        for row in csv_reader:
            if i>0:
                station[row[1]] = [(np.float64(row[2])-lo_mean)/lo_std,  (np.float64(row[3])-la_mean)/la_std]
            i +=1

    return station 
# def location(filename):
#     with open(filename) as csv_file:
#         csv_reader = csv.reader(csv_file, delimiter=',')
#         i = 0
#         station = {}
#         for row in csv_reader:
#             if i>0:
#                 station[row[1]] = [(np.float64(row[2])),  (np.float64(row[3]))]
#             i +=1

#     return station 

def read_csv(filename):
    with open(filename) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        data = []
        temp = []
        i = 0
        for row in csv_reader:
            if i>0:
                if row[2] =='' or row[3]=='' or row[4]=='':
                    if temp != []:
                        data.append(temp)
                    temp = []
                    continue
                temp.append([np.float64(row[2]),np.float64(row[3]),np.float64(row[4]),np.float32(row[1].split()[1].split(':')[0]),np.float64(row[0])])
            i +=1
        if len(temp)>0:
            data.append(temp)
    return data


def read_out(filename):
    with open(filename) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        data = []
        temp = []
        i = 0
        for row in csv_reader:
            if i>0:
                if row[2] =='':
                    if temp != []:
                        data.append(temp)
                    temp = []
                    continue
                temp.append([np.float64(row[2]),np.float64(row[0])])
            i +=1
    return data

def pre_process():
    path_in = [os.path.join(f'dataset/data_train/input', f) for f in os.listdir(f'dataset/data_train/input')]
    path_out = [os.path.join(f'dataset/data_train/output', f) for f in os.listdir(f'dataset/data_train/output')]
    input_location = location('dataset/data_train/location_input.csv')
    output_location = location('dataset/data_train/location_output.csv')
    inp = {}
    inp_local = {}
    for path1 in path_in:
        locate = os.path.basename(path1).split('.')[0]
        raw = read_csv(path1)
        for period in raw:
            if len(period)>=(24+24):
                for i in range(len(period)-24-23):
                    key  = str(int(period[i][-1]))
                    if key not in inp.keys():
                        inp[key] = []
                        inp_local[key] = []
                    inp[key] += [np.stack(period[i:i+24+24])]
                    inp_local[key] += [input_location[locate]]
                    
    out = {}
    out_local = {}
    for path1 in path_out:
        locate = os.path.basename(path1).split('.')[0]
        raw = read_out(path1)
        for period in raw:
            if len(period)>=(24+24):
                for i in range(len(period)-24-23):
                    key  = str(int(period[i][-1]))
                    if key not in out.keys():
                        out[key] = []
                        out_local[key] = []
                    out[key] += [np.stack(period[i+24:i+24+24])]
                    out_local[key] += [output_location[locate]]            
    data = []
    temp = 0
    input_temp = 0
    input_local_temp = 0
    output_temp = 0
    output_local_temp = 0
    for state in out.keys():
        if state in inp.keys():
            input_temp = np.stack(inp[state])[:,:,:-1]
            input_local_temp = np.stack(inp_local[state])
            for station in range(len(out[state])):
                output_temp = np.stack(out[state][station])[:,0]
                output_local_temp = np.stack(out_local[state][station])         
                data += [[input_temp, input_local_temp, output_temp, output_local_temp]]

    
    return data


In [36]:

class AirDataset(Dataset):

    def __init__(self, mode ='train'):
        self.mode = mode
        self.data = []
        self.delay = 36
        self.future = 24
        self.pre_process()
        self.train = 0
        self.test = 0

        self.split_data()
        
            
    def __len__(self):
        if self.mode =='train':
            return len(self.train)
        else:
            return len(self.test)
    def pre_process(self):
        path_in = [os.path.join(f'dataset/data_train/input', f) for f in os.listdir(f'dataset/data_train/input')]
        path_out = [os.path.join(f'dataset/data_train/output', f) for f in os.listdir(f'dataset/data_train/output')]
        input_location = location('dataset/data_train/location_input.csv')
        output_location = location('dataset/data_train/location_output.csv')
        inp = {}
        inp_local = {}
        for path1 in path_in:
            locate = os.path.basename(path1).split('.')[0]
            raw = read_csv(path1)
            for period in raw:
                if len(period)>=(self.delay):
                    for i in range(len(period)-self.delay+1):
                        key  = str(int(period[i][-1])+self.delay)
                        if key not in inp.keys():
                            inp[key] = []
                            inp_local[key] = []
                        inp[key] += [np.stack(period[i:i+self.delay])]
                        inp_local[key] += [input_location[locate]]
                        
        out = {}
        out_local = {}
        for path1 in path_out:
            locate = os.path.basename(path1).split('.')[0]
            raw = read_out(path1)
            for period in raw:
                if len(period)>=(self.future):
                    for i in range(len(period)-self.future-+1):
                        key  = str(int(period[i][-1]))
                        if key not in out.keys():
                            out[key] = []
                            out_local[key] = []
                        out[key] += [np.stack(period[i:i++self.future])]
                        out_local[key] += [output_location[locate]]            
        data = []
        temp = 0
        input_temp = 0
        input_local_temp = 0
        output_temp = 0
        output_local_temp = 0
        for state in out.keys():
            if state in inp.keys():
                input_temp = np.stack(inp[state])[:,:,:-1]
                input_local_temp = np.stack(inp_local[state])
                for station in range(len(out[state])):
                    output_temp = np.stack(out[state][station])[:,0]
                    output_local_temp = np.stack(out_local[state][station])         
                    data += [[input_temp, input_local_temp, output_temp, output_local_temp]]
        self.data = data
    def split_data(self):
        random.Random(0).shuffle(self.data)
        self.train, self.test = sklearn.model_selection.train_test_split(self.data, test_size=0.2, shuffle=True)

    def __getitem__(self, idx):
        if self.mode == 'train':
            [input_temp, input_local_temp, output_temp, output_local_temp] = self.train[idx]
            input_temp = torch.FloatTensor(input_temp[:,0:self.delay,:])
            input_local_temp = torch.FloatTensor(input_local_temp)
            output_temp = torch.FloatTensor(output_temp)
            output_local_temp = torch.FloatTensor(output_local_temp)
        if self.mode == 'test':
            [input_temp, input_local_temp, output_temp, output_local_temp] = self.test[idx]
            input_temp = torch.FloatTensor(input_temp[:,0:self.delay,:])
            input_local_temp = torch.FloatTensor(input_local_temp)
            output_temp = torch.FloatTensor(output_temp)
            output_local_temp = torch.FloatTensor(output_local_temp)
        return input_temp, input_local_temp, output_temp, output_local_temp

def get_loader():
    train_loader  = DataLoader(dataset=AirDataset(mode='train'), 
                               drop_last=True, 
                               shuffle=True,
                               batch_size=1)
    
    dev_loader  = DataLoader(dataset=AirDataset(mode='test'), 
                             drop_last=True, 
                             shuffle=False,
                             batch_size=1)

    return train_loader, dev_loader
# train_loader, dev_loader = get_loader()


In [37]:

class LSTM(nn.Module):

    def __init__(self):
        super().__init__()
        self.lstm  = nn.LSTM(4, 64, 2,bidirectional = True, batch_first=True)
        self.lstm1  = nn.LSTM(64, 128, 2,bidirectional = True, batch_first=True)
        self.fc1 = nn.Linear(256, 64)
        self.fc2 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()

        self.delay = 24
        self.future = 24
    def forward(self, x):
        _, (h, c) = self.lstm(x)
        a = h[-1,:,:]
        a = a.unsqueeze(1).repeat(1, self.future, 1)
        a, (hidden_state, cell_state) = self.lstm1(a)
        # a = x.reshape((-1, self.seq_len, self.hidden_size))
        a = self.fc1(a)
        a = self.tanh(a)
        a = self.fc2(a)
        a = x[:,-self.future:,0]+a[:,:,0]
        a = nn.ReLU()(a)
        return a

    
model = LSTM()
checkpoint = torch.load('ckpt/predict_24/checkpoints/predict24_epoch_4.ckpt', map_location='cuda:0')

model.load_state_dict(checkpoint['model_state_dict'])

class REG(nn.Module):

    def __init__(self, bias=True):
        super(REG, self).__init__()
        self.predict24 = model.requires_grad_(False)
        self.adj_transform1 = nn.Linear(4, 10)
        self.adj_transform2 = nn.Linear(10, 1)
        self.linear1 = nn.Linear(24,100)
        self.linear2 = nn.Linear(100,24)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
    def forward(self, x, x_l, y_l, return_adj=False):
        a = self.predict24(x)
        S, _ = a.shape
        
        # construct adjacency matrix
        y_l = y_l.expand([S,2])
        adj = torch.cat((x_l, y_l), dim=-1)
        
        # non-linear function
        adj = self.adj_transform1(adj)
        adj = self.tanh(adj)
        adj = self.adj_transform2(adj)
        out = torch.mm(a.transpose(-1,0), adj)
        
        # activation
        out = out.transpose(-1,0)
        out = out.relu()
        if return_adj:
            return out, adj
        return out
    def compute_loss(self, x, x_l, y_l, desire):
        output = self(x, x_l, y_l)
        loss = torch.mean(torch.abs((desire-output)/desire))
        return loss 


In [38]:
def get_ckpt_folder():
    folder = f'ckpt/model/checkpoints'
    if not os.path.exists(folder):
        os.makedirs(folder)
    return folder

def get_logs_folder():
    return get_ckpt_folder().replace('checkpoints', 'logs')

def compute_mdape(y, y_hat):
    return np.median(torch.abs((y-y_hat)/y))

def compute_mape(y, y_hat):
    return torch.mean(torch.abs((y-y_hat)/y))

def compute_mae(y, y_hat):
    return torch.mean(torch.abs(y-y_hat))

def compute_rmse(y, y_hat):
    return torch.sqrt(torch.mean(torch.pow(y-y_hat,2)))
        
def compute_r2(y, y_hat):
    return 1- (torch.sum(torch.pow(y-y_hat,2)))/(torch.sum(torch.pow(y-torch.mean(y),2)))
        
def compute_metrics(x, y, y_hat):
    # initialize metrics
    metrics = {}
    # MDAPE
    metrics['MDAPE'] = compute_mdape(y, y_hat)
    # MAPE
    metrics['MAPE'] = compute_mape(y, y_hat, )
    # MAE
    metrics['MAE'] = compute_mae(y, y_hat, )
    # RMSE
    metrics['RMSE'] = compute_rmse(y, y_hat, )
    # R2
    metrics['R2'] = compute_r2(y, y_hat, )
    return metrics

In [39]:
class Trainer:
    
    def __init__(self):

        # clear cache
        self.clear_cache()
        # get loader
        self.train_loader, self.dev_loader = get_loader()
        # get model
        self.model = REG().to('cuda:0')
        # get optimizer
        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=1e-4)
        # get writer
        self.writer = SummaryWriter(get_logs_folder())
        # get iteration
        self.iteration = 0
        # get epoch
        self.num_epoch = 10
        self.limit_train_batch = 5000
        self.log_iter = 100
        self.eval_iter = 1
    def train_step(self, batch, batch_idx):
        # extract data
        x, x_l, y, y_l = batch
        x = x.to('cuda:0').squeeze(0)
        y = y.to('cuda:0').squeeze(0)
        x_l = x_l.to('cuda:0').squeeze(0)
        y_l = y_l.to('cuda:0').squeeze(0)
        self.optimizer.zero_grad()
        # compute loss
        loss_dict = {'loss': self.model.compute_loss(x, x_l, y_l, y)}
        # backward and update weight
        loss_dict['loss'].backward()
        # clip grad norm
        # self.clip_grad_norm()
        self.optimizer.step()
        return loss_dict

    def validation_step(self, batch, batch_idx, mode='dev'):
        with torch.no_grad():
            # extract data
            x, x_l, y, y_l = batch
            x = x.to('cuda:0')[0]
            x_l = x_l.to('cuda:0')[0]
            y_l = y_l.to('cuda:0')[0]
            # compute loss
            y_hat_device = self.model(x, x_l, y_l)
            y_hat = y_hat_device.detach().cpu().numpy()

            # compute metrics
            metrics = compute_metrics(x, y, y_hat)
        cleaned_metrics = {}
        for key in metrics:
            cleaned_metrics[f'{mode}:{key}'] = metrics[key]
        return cleaned_metrics

    def limit_train_batch_hook(self, batch_idx):
        if self.limit_train_batch > 0:
            if batch_idx > self.limit_train_batch:
                return True
        return False

    def limit_val_batch_hook(self, batch_idx):
        if self.limit_val_batch > 0:
            if batch_idx > self.limit_val_batch:
                return True
        return False

    def get_checkpoint_path(self):
        ckpt_folder = get_ckpt_folder()
        ckpt_name = 'mlp'
        return os.path.join(ckpt_folder, ckpt_name) + '.ckpt'

    def load_checkpoint(self):
        # self.epoch = 0
        path = self.get_checkpoint_path()
        if os.path.exists(path):
            checkpoint = torch.load(path)
            print('[+] checkpoint loaded:', path)
            self.model.load_state_dict(checkpoint['model_state_dict'])
            # self.epoch = checkpoint['epoch']
            self.iteration = checkpoint['iteration']

    def clear_cache(self):
        ckpt_folder = get_ckpt_folder()
        logs_folder = get_logs_folder()
        if self.clear_cache:
            os.system(f'rm -rf {ckpt_folder} {logs_folder}')

    def write_dev_metric_to_tensorboard(self, epoch, metrics):
        # compute average
        for key in metrics:
            metrics[key] = np.mean(metrics[key])
        # display
        print('Evaluate epoch:{}: MDAPE={:0.2f} MAPE={:0.2f},  MAE={:0.2f}, RMSE={:0.2f}, R2={:0.2f}' \
            .format(epoch, metrics['dev:MDAPE'], metrics['dev:MAPE'], metrics['dev:MAE'], metrics['dev:RMSE'],metrics['dev:R2']))
        # write to tensorboard
        self.writer.add_scalars('validation metric', metrics, epoch)

    def write_train_metric_to_tensorboard(self, loss_dicts):
        for key in loss_dicts:
            loss_dicts[key] = np.mean(loss_dicts[key])
        self.writer.add_scalars('training metric', loss_dicts, self.iteration)

    def fit(self):
        # load checkpoint
        print(self.model)
        print('Trainable parameters:', sum(p.numel() for p in self.model.parameters() if p.requires_grad))
        print('Non-trainable parameters:', sum(p.numel() for p in self.model.parameters() if not p.requires_grad))
        self.load_checkpoint()
        for epoch in range(1,self.num_epoch):
            self.train_loader, self.dev_loader = get_loader()
            # train
            if epoch > 3:
                self.model.predict24.requires_grad_(True)
                self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=1e-5)
            loss_dicts = None
            self.model.train()
            with tqdm.tqdm(self.train_loader, unit="it") as pbar:
                pbar.set_description(f'Epoch {epoch}')
                for batch_idx, batch in enumerate(pbar):

                    # perform training step
                    loss_dict = self.train_step(batch, batch_idx)
                    if loss_dicts is None:
                        loss_dicts = {}
                        for key in loss_dict:
                            loss_dicts[key] = []
                    for key in loss_dict:
                        loss_dicts[key].append(float(loss_dict[key].detach().cpu()))

                    # limit train batch hook
                    if self.limit_train_batch_hook(batch_idx):
                        break

                    # set postfix
                    kwargs = {}
                    for key in loss_dict:
                        kwargs[key] = float(loss_dict[key].detach().cpu())
                    pbar.set_postfix(**kwargs)

                    # log
                    # self.epoch = epoch
                    self.iteration += 1
                    if self.iteration % self.log_iter == 0:
                        self.write_train_metric_to_tensorboard(loss_dicts)
                        loss_dicts = None

            ##########################################################################################
            # evaluate
            if (epoch+1) % self.eval_iter == 0:
                self.model.eval()
                metrics = {}
                with tqdm.tqdm(self.dev_loader, unit="it") as pbar:
                    pbar.set_description(f'Evaluate epoch - dev {epoch}')
                    for batch_idx, batch in enumerate(pbar):
                        # validate
                        batch_metrics = self.validation_step(batch, batch_idx, mode='dev')
                        # accumulate valilation metrics
                        for key in batch_metrics:
                            if key not in metrics.keys():
                                metrics[key] = []
                        for key in batch_metrics:
                            metrics[key] += [batch_metrics[key]]
                        pbar.set_postfix(MDAPE=np.mean(metrics['dev:MDAPE']))
                self.write_dev_metric_to_tensorboard(epoch, metrics)
            self.save_checkpoint(epoch)
    def save_checkpoint(self, epoch):
        # save checkpoint
        torch.save({
            'iteration': self.iteration,
            # 'epoch': epoch,
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            }, self.get_checkpoint_path())
        print('[+] checkpoint saved')

        os.system('cp {} {}'.format(self.get_checkpoint_path(), self.get_checkpoint_path().replace('.ckpt', f'_epoch_{epoch}.ckpt')))
        print('[+] checkpoint copied')

In [40]:
trainer = Trainer()
trainer.fit()

REG(
  (predict24): LSTM(
    (lstm): LSTM(4, 64, num_layers=2, batch_first=True, bidirectional=True)
    (lstm1): LSTM(64, 128, num_layers=2, batch_first=True, bidirectional=True)
    (fc1): Linear(in_features=256, out_features=64, bias=True)
    (fc2): Linear(in_features=64, out_features=1, bias=True)
    (relu): ReLU()
    (tanh): Tanh()
  )
  (adj_transform1): Linear(in_features=4, out_features=10, bias=True)
  (adj_transform2): Linear(in_features=10, out_features=1, bias=True)
  (linear1): Linear(in_features=24, out_features=100, bias=True)
  (linear2): Linear(in_features=100, out_features=24, bias=True)
  (sigmoid): Sigmoid()
  (tanh): Tanh()
)
Trainable parameters: 4985
Non-trainable parameters: 745601


Epoch 1:  20%|██        | 5001/24724 [00:33<02:13, 148.00it/s, loss=0.195] 
Evaluate epoch - dev 1: 100%|██████████| 6181/6181 [00:35<00:00, 174.90it/s, MDAPE=0.494]


Evaluate epoch:1: MDAPE=0.49 MAPE=0.57,  MAE=25.37, RMSE=29.74, R2=-3.21
[+] checkpoint saved
[+] checkpoint copied


Epoch 2:  20%|██        | 5001/24724 [00:29<01:57, 168.42it/s, loss=0.314] 
Evaluate epoch - dev 2: 100%|██████████| 6181/6181 [00:32<00:00, 187.59it/s, MDAPE=0.501]


Evaluate epoch:2: MDAPE=0.50 MAPE=0.57,  MAE=26.43, RMSE=30.71, R2=-3.51
[+] checkpoint saved
[+] checkpoint copied


Epoch 3:  20%|██        | 5001/24724 [00:33<02:13, 147.26it/s, loss=0.532] 
Evaluate epoch - dev 3: 100%|██████████| 6181/6181 [00:33<00:00, 183.94it/s, MDAPE=0.497]


Evaluate epoch:3: MDAPE=0.50 MAPE=0.57,  MAE=25.68, RMSE=30.01, R2=-3.27
[+] checkpoint saved
[+] checkpoint copied


Epoch 4:  20%|██        | 5001/24724 [01:32<06:05, 54.00it/s, loss=0.275]
Evaluate epoch - dev 4: 100%|██████████| 6181/6181 [00:40<00:00, 153.20it/s, MDAPE=0.453]


Evaluate epoch:4: MDAPE=0.45 MAPE=0.52,  MAE=22.19, RMSE=26.58, R2=-2.78
[+] checkpoint saved
[+] checkpoint copied


Epoch 5:  20%|██        | 5001/24724 [01:37<06:25, 51.13it/s, loss=0.404] 
Evaluate epoch - dev 5: 100%|██████████| 6181/6181 [00:39<00:00, 154.97it/s, MDAPE=0.441]


Evaluate epoch:5: MDAPE=0.44 MAPE=0.51,  MAE=21.77, RMSE=26.23, R2=-2.71
[+] checkpoint saved
[+] checkpoint copied


Epoch 6:  20%|██        | 5001/24724 [01:38<06:29, 50.64it/s, loss=0.509]
Evaluate epoch - dev 6: 100%|██████████| 6181/6181 [00:40<00:00, 153.55it/s, MDAPE=0.439]


Evaluate epoch:6: MDAPE=0.44 MAPE=0.50,  MAE=21.53, RMSE=25.96, R2=-2.68
[+] checkpoint saved
[+] checkpoint copied


Epoch 7:  20%|██        | 5001/24724 [01:39<06:31, 50.32it/s, loss=0.294] 
Evaluate epoch - dev 7: 100%|██████████| 6181/6181 [00:40<00:00, 151.33it/s, MDAPE=0.443]


Evaluate epoch:7: MDAPE=0.44 MAPE=0.50,  MAE=21.74, RMSE=26.10, R2=-2.91
[+] checkpoint saved
[+] checkpoint copied


Epoch 8:  20%|██        | 5001/24724 [01:39<06:33, 50.07it/s, loss=0.664] 
Evaluate epoch - dev 8: 100%|██████████| 6181/6181 [00:40<00:00, 152.81it/s, MDAPE=0.439]


Evaluate epoch:8: MDAPE=0.44 MAPE=0.49,  MAE=22.13, RMSE=26.52, R2=-2.94
[+] checkpoint saved
[+] checkpoint copied


Epoch 9:  20%|██        | 5001/24724 [01:39<06:32, 50.30it/s, loss=0.474] 
Evaluate epoch - dev 9: 100%|██████████| 6181/6181 [00:40<00:00, 151.51it/s, MDAPE=0.431]


Evaluate epoch:9: MDAPE=0.43 MAPE=0.49,  MAE=21.71, RMSE=26.08, R2=-2.72
[+] checkpoint saved
[+] checkpoint copied


In [29]:
# tensorboard dev upload --logdir /home/dat/aqp/ckpt/logs --name 'lstm-parallel' 

In [30]:
# checkpoint = torch.load('/home/dat/aqp/ckpt/checkpoints/mlp_epoch_19.ckpt')
# model.load_state_dict(checkpoint['model_state_dict'])
    