In [18]:
%cd '/content/drive/MyDrive/dev/aiac/TRBAIAC-TRAFFIC'

/content/drive/MyDrive/dev/aiac/TRBAIAC-TRAFFIC


In [21]:
# Import Packages
import torch, os
import torch.nn as nn
import torch.utils.data as utils
import torch.nn.functional as F
import numpy as np
import pandas as pd
import time
from torch.autograd import Variable
import math
import copy

In [26]:
# Download training, testing and shapefile data

!gdown 'https://drive.google.com/uc?id=1E_qqe7kfvfApM4hCOBMPoXhyEPyrUJkN'
!gdown 'https://drive.google.com/uc?id=1j-3-lHegY--FDHKZvz86HBtilV2dsP-i'
!gdown 'https://drive.google.com/uc?id=1AhAb3yEVxcHSaz0ADlyyYwNH2Gqmr-um'
!unzip 'train.zip' -d '.'
!unzip 'test.zip' -d '.'
!unzip 'Geoshapefile.zip' -d '.'

Downloading...
From: https://drive.google.com/uc?id=1j-3-lHegY--FDHKZvz86HBtilV2dsP-i
To: /content/drive/MyDrive/dev/aiac/TRBAIAC-TRAFFIC/test.zip
100% 459k/459k [00:00<00:00, 73.0MB/s]
Downloading...
From: https://drive.google.com/uc?id=1AhAb3yEVxcHSaz0ADlyyYwNH2Gqmr-um
To: /content/drive/MyDrive/dev/aiac/TRBAIAC-TRAFFIC/Geoshapefile.zip
100% 53.4k/53.4k [00:00<00:00, 61.0MB/s]
Archive:  test.zip
  inflating: ./test_data/tps_5.pkl   
  inflating: ./test_data/tps_2.pkl   
  inflating: ./test_data/tps_6.pkl   
  inflating: ./test_data/tps_4.pkl   
  inflating: ./test_data/tps_15.pkl  
  inflating: ./test_data/tps_7.pkl   
  inflating: ./test_data/tps_1.pkl   
  inflating: ./test_data/tps_12.pkl  
  inflating: ./test_data/tps_8.pkl   
  inflating: ./test_data/tps_3.pkl   
  inflating: ./test_data/tps_9.pkl   
  inflating: ./test_data/tps_13.pkl  
  inflating: ./test_data/tps_11.pkl  
  inflating: ./test_data/tps_10.pkl  
  inflating: ./test_data/tps_14.pkl  
Archive:  Geoshapefile.zip
  

In [2]:
# Data Preparation
def reshape_data(rawdata):
    reshaped_tps_df = pd.DataFrame()
    reshaped_tps_df['TIME'] = rawdata.time.unique()
    for seg in rawdata.segmentID.unique():
        column = rawdata[rawdata['segmentID'] == seg][['time','TrafficIndex_GP']].drop_duplicates(subset=['time'])
        column.columns = ['TIME', str(seg)]
        reshaped_tps_df = reshaped_tps_df.join(column.set_index('TIME'), on='TIME')

    return reshaped_tps_df

def load_data(filepath, start_time, end_time, freq):
    rawdata = pd.read_pickle(filepath)
    matrix = reshape_data(rawdata)
    matrix['TIME'] = pd.to_datetime(matrix['TIME']).dt.strftime('%Y-%m-%d %H:%M:%S')

    dt_idx = pd.date_range(start=start_time, end=end_time, freq=freq)

    output = pd.DataFrame(dt_idx)
    output.columns = ['TIME']
    output['TIME'] = pd.to_datetime(output['TIME']).dt.strftime('%Y-%m-%d %H:%M:%S')
    output = output.set_index('TIME').join(matrix.set_index('TIME'))

    return output

def prepare_dataloader(matrix, n_col, seq_len=36, pred_len=12, BATCH_SIZE=32, device='cpu'):
    seg = matrix.columns.values
    time = matrix.index.values
    n_seg = len(seg)
    n_time = len(time)
    
    speedMatrix = matrix.to_numpy()
    
    data_set = []
    label_set = []

    for i in range(n_time - seq_len - pred_len):
        data = speedMatrix[i : i + seq_len]
        
        label_data = speedMatrix[i + seq_len: i + seq_len + pred_len, :n_col]
        
        if np.isnan(np.sum(data[:n_col])).any() | np.isnan(np.sum(label_data)):
            pass
        else:

            data_set.append(data)
            label_set.append(label_data)
            
    data = np.array(data_set)
    label = np.array(label_set)

    train_ind = int(len(data)* 0.8)
    valid_ind = int(len(data) * 0.9)
    test_ind = int(len(data) * 1.0)

    X_train = data[: train_ind]
    X_valid = data[train_ind : valid_ind]
    X_test = data[valid_ind : test_ind]
    Y_train = label[: train_ind]
    Y_valid = label[train_ind : valid_ind]
    Y_test = label[valid_ind : test_ind]

    X_train = torch.FloatTensor(X_train).to(device)
    X_valid = torch.FloatTensor(X_valid).to(device)
    X_test = torch.FloatTensor(X_test).to(device)
    Y_train = torch.FloatTensor(Y_train).to(device)
    Y_valid = torch.FloatTensor(Y_valid).to(device)
    Y_test = torch.FloatTensor(Y_test).to(device)

    train_dataset = utils.TensorDataset(X_train, Y_train)
    valid_dataset = utils.TensorDataset(X_valid, Y_valid)
    test_dataset = utils.TensorDataset(X_test, Y_test)

    train_dataloader = utils.DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
    valid_dataloader = utils.DataLoader(valid_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
    test_dataloader = utils.DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle=False, drop_last = False)

    return train_dataloader, valid_dataloader, test_dataloader

In [3]:
'''
Revised by https://github.com/gordicaleksa/pytorch-original-transformer/blob/main/models/definitions/transformer_model.py
'''
# Encoder Architecture
class Encoder(nn.Module):

    def __init__(self, encoder_layer, number_of_layers):
        super().__init__()
        assert isinstance(encoder_layer, EncoderLayer), f'Expected EncoderLayer got {type(encoder_layer)}.'
        self.encoder_layers = get_clones(encoder_layer, number_of_layers)
        self.norm = nn.LayerNorm(encoder_layer.model_dimension)

    def forward(self, src_embeddings_batch, src_mask):
        src_representations_batch = src_embeddings_batch
        for encoder_layer in self.encoder_layers:
            src_representations_batch = encoder_layer(src_representations_batch, src_mask)
        return self.norm(src_representations_batch)

class EncoderLayer(nn.Module):

    def __init__(self, model_dimension, dropout_probability, multi_headed_attention, pointwise_net):
        super().__init__()
        num_of_sublayers_encoder = 2
        self.sublayers = get_clones(SublayerLogic(model_dimension, dropout_probability), num_of_sublayers_encoder)

        self.multi_headed_attention = multi_headed_attention
        self.pointwise_net = pointwise_net

        self.model_dimension = model_dimension

    def forward(self, src_representations_batch, src_mask):
        encoder_self_attention = lambda srb: self.multi_headed_attention(query=srb, key=srb, value=srb, mask=src_mask)
        src_representations_batch = self.sublayers[0](src_representations_batch, encoder_self_attention)
        src_representations_batch = self.sublayers[1](src_representations_batch, self.pointwise_net)

        return src_representations_batch

# Decoder Architecture
class Decoder(nn.Module):

    def __init__(self, decoder_layer, number_of_layers):
        super().__init__()
        assert isinstance(decoder_layer, DecoderLayer), f'Expected DecoderLayer got {type(decoder_layer)}.'

        self.decoder_layers = get_clones(decoder_layer, number_of_layers)
        self.norm = nn.LayerNorm(decoder_layer.model_dimension)

    def forward(self, trg_embeddings_batch, src_representations_batch, trg_mask, src_mask):
        trg_representations_batch = trg_embeddings_batch
        for decoder_layer in self.decoder_layers:
            trg_representations_batch = decoder_layer(trg_representations_batch, src_representations_batch, trg_mask, src_mask)
        return self.norm(trg_representations_batch)

class DecoderLayer(nn.Module):

    def __init__(self, model_dimension, dropout_probability, multi_headed_attention, pointwise_net):
        super().__init__()
        num_of_sublayers_decoder = 3
        self.sublayers = get_clones(SublayerLogic(model_dimension, dropout_probability), num_of_sublayers_decoder)

        self.trg_multi_headed_attention = copy.deepcopy(multi_headed_attention)
        self.src_multi_headed_attention = copy.deepcopy(multi_headed_attention)
        self.pointwise_net = pointwise_net

        self.model_dimension = model_dimension

    def forward(self, trg_representations_batch, src_representations_batch, trg_mask, src_mask):
        srb = src_representations_batch
        decoder_trg_self_attention = lambda trb: self.trg_multi_headed_attention(query=trb, key=trb, value=trb, mask=trg_mask)
        decoder_src_attention = lambda trb: self.src_multi_headed_attention(query=trb, key=srb, value=srb, mask=src_mask)

        trg_representations_batch = self.sublayers[0](trg_representations_batch, decoder_trg_self_attention)
        trg_representations_batch = self.sublayers[1](trg_representations_batch, decoder_src_attention)
        trg_representations_batch = self.sublayers[2](trg_representations_batch, self.pointwise_net)

        return trg_representations_batch

In [4]:
# Helper Modules
class SublayerLogic(nn.Module):

    def __init__(self, model_dimension, dropout_probability):
        super().__init__()
        self.norm = nn.LayerNorm(model_dimension)
        self.dropout = nn.Dropout(p=dropout_probability)

    def forward(self, representations_batch, sublayer_module):
        return representations_batch + self.dropout(sublayer_module(self.norm(representations_batch)))

class DecoderGenerator(nn.Module):

    def __init__(self, model_dimension, vocab_size):
        super().__init__()
        self.linear = nn.Linear(model_dimension, vocab_size)

    def forward(self, trg_representations_batch):
        return torch.sigmoid(self.linear(trg_representations_batch))

class PositionwiseFeedForwardNet(nn.Module):

    def __init__(self, model_dimension, dropout_probability, width_mult=4):
        super().__init__()
        self.linear1 = nn.Linear(model_dimension, width_mult * model_dimension)
        self.linear2 = nn.Linear(width_mult * model_dimension, model_dimension)

        self.dropout = nn.Dropout(p=dropout_probability)
        self.relu = nn.ReLU()

    def forward(self, representations_batch):
        return self.linear2(self.dropout(self.relu(self.linear1(representations_batch))))

In [5]:
# Multihead Attention
class MultiHeadedAttention(nn.Module):

    def __init__(self, model_dimension, number_of_heads, dropout_probability, log_attention_weights):
        super().__init__()
        assert model_dimension % number_of_heads == 0, f'Model dimension must be divisible by the number of heads.'

        self.head_dimension = int(model_dimension / number_of_heads)
        self.number_of_heads = number_of_heads

        self.qkv_nets = get_clones(nn.Linear(model_dimension, model_dimension), 3)
        self.out_projection_net = nn.Linear(model_dimension, model_dimension)

        self.attention_dropout = nn.Dropout(p=dropout_probability)
        self.softmax = nn.Softmax(dim=-1)

        self.log_attention_weights = log_attention_weights
        self.attention_weights = None

    def attention(self, query, key, value, mask):
        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.head_dimension)
        if mask is not None:
            scores.masked_fill_(mask == torch.tensor(False), float("-inf"))

        attention_weights = self.softmax(scores)
        attention_weights = self.attention_dropout(attention_weights)

        intermediate_token_representations = torch.matmul(attention_weights, value)

        return intermediate_token_representations, attention_weights

    def forward(self, query, key, value, mask):

        batch_size = query.shape[0]
        query, key, value = [net(x).view(batch_size, -1, self.number_of_heads, self.head_dimension).transpose(1, 2)
                             for net, x in zip(self.qkv_nets, (query, key, value))]

        intermediate_token_representations, attention_weights = self.attention(query, key, value, mask)

        if self.log_attention_weights:
            self.attention_weights = attention_weights

        reshaped = intermediate_token_representations.transpose(1, 2).reshape(batch_size, -1, self.number_of_heads * self.head_dimension)

        token_representations = self.out_projection_net(reshaped)

        return token_representations

In [6]:
# Input Modules
class Embedding(nn.Module):

    def __init__(self, vocab_size, model_dimension):
        super().__init__()
        self.embeddings_table = nn.Linear(vocab_size, model_dimension)
        self.model_dimension = model_dimension

    def forward(self, token_ids_batch):
        assert token_ids_batch.ndim == 3, f'Expected: (batch size, max token sequence length), got {token_ids_batch.shape}'
        embeddings = self.embeddings_table(token_ids_batch)
        return embeddings * math.sqrt(self.model_dimension)

class PositionalEncoding(nn.Module):

    def __init__(self, model_dimension, dropout_probability, expected_max_sequence_length):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout_probability)

        position_id = torch.arange(0, expected_max_sequence_length).unsqueeze(1)
        frequencies = torch.pow(10000., -torch.arange(0, model_dimension, 2, dtype=torch.float) / model_dimension)

        positional_encodings_table = torch.zeros(expected_max_sequence_length, model_dimension)
        positional_encodings_table[:, 0::2] = torch.sin(position_id * frequencies)
        positional_encodings_table[:, 1::2] = torch.cos(position_id * frequencies)

        self.register_buffer('positional_encodings_table', positional_encodings_table)
        self.w2 = nn.Linear(model_dimension * 2, model_dimension)

    def forward(self, embeddings_batch):
        assert embeddings_batch.ndim == 3 and embeddings_batch.shape[-1] == self.positional_encodings_table.shape[1], \
            f'Expected (batch size, max token sequence length, model dimension) got {embeddings_batch.shape}'

        positional_encodings = self.positional_encodings_table[:embeddings_batch.shape[1]]
        size_embeddings_batch = embeddings_batch.shape[0]
        positional_encodings = positional_encodings.unsqueeze(0).expand(size_embeddings_batch, -1 , -1)
        output = self.w2(torch.cat((embeddings_batch, positional_encodings), dim=2))
        return self.dropout(output)

def get_clones(module, num_of_deep_copies):
    return nn.ModuleList([copy.deepcopy(module) for _ in range(num_of_deep_copies)])

In [7]:
# Transformer
class Transformer(nn.Module):

    def __init__(self, model_dimension, input_length, src_vocab_size, trg_vocab_size, number_of_heads, number_of_layers, dropout_probability, log_attention_weights=False):
        super().__init__()
        self.src_embedding = Embedding(src_vocab_size, model_dimension)
        self.trg_embedding = Embedding(trg_vocab_size, model_dimension)
        
        self.src_pos_embedding = PositionalEncoding(model_dimension, dropout_probability, input_length)
        self.trg_pos_embedding = PositionalEncoding(model_dimension, dropout_probability, input_length)

        mha = MultiHeadedAttention(model_dimension, number_of_heads, dropout_probability, log_attention_weights)
        pwn = PositionwiseFeedForwardNet(model_dimension, dropout_probability)
        encoder_layer = EncoderLayer(model_dimension, dropout_probability, mha, pwn)
        decoder_layer = DecoderLayer(model_dimension, dropout_probability, mha, pwn)

        self.encoder = Encoder(encoder_layer, number_of_layers)
        self.decoder = Decoder(decoder_layer, number_of_layers)

        self.decoder_generator = DecoderGenerator(model_dimension, trg_vocab_size)
        self.init_params()

    def init_params(self, default_initialization=False):
        if not default_initialization:
            for name, p in self.named_parameters():
                if p.dim() > 1:
                    nn.init.xavier_uniform_(p)

    def forward(self, src_token_ids_batch, trg_token_ids_batch, src_mask, trg_mask, num_pred):
        batch_size = src_token_ids_batch.shape[0]
        num_ahead = num_pred
        src_representations_batch = self.encode(src_token_ids_batch, src_mask)
        trg_log_probs = self.greedy_decoding(src_representations_batch, trg_token_ids_batch, src_mask, batch_size, num_ahead)
        return trg_log_probs

    def encode(self, src_token_ids_batch, src_mask):       
        src_embeddings_batch = self.src_embedding(src_token_ids_batch)
        src_embeddings_batch = self.src_pos_embedding(src_embeddings_batch)
        src_representations_batch = self.encoder(src_embeddings_batch, src_mask)

        return src_representations_batch

    def decode(self, trg_token_ids_batch, src_representations_batch, trg_mask, src_mask):
        trg_embeddings_batch = self.trg_embedding(trg_token_ids_batch)
        trg_embeddings_batch = self.trg_pos_embedding(trg_embeddings_batch)
        trg_representations_batch = self.decoder(trg_embeddings_batch, src_representations_batch, trg_mask, src_mask)
        trg_log_probs = self.decoder_generator(trg_representations_batch)
        return trg_log_probs
    
    def greedy_decoding(self, src_representations_batch, inputs_decode, src_mask, batch_size, num_ahead):
        
        trg_token_ids_batch = inputs_decode[:, -1, :].unsqueeze(1)
        for i in range(num_ahead):
            trg_mask = get_trg_mask(batch_size, i+1).cuda()
            predicted_log_distributions = self.decode(trg_token_ids_batch, src_representations_batch, trg_mask, src_mask)[:,-1:,]
            trg_token_ids_batch = torch.cat((trg_token_ids_batch, predicted_log_distributions), 1)

        output = trg_token_ids_batch[:,1:, :]
        return output

def get_src_mask(batch_size, step_size):
    x = torch.tensor([True])
    src_mask = torch.repeat_interleave(x, step_size*batch_size, dim=0).view(batch_size, 1, 1, -1)
    return src_mask

def get_trg_mask(batch_size, step_size):
    trg_no_look_forward_mask = torch.triu(torch.ones((1, 1, step_size, step_size)) == 1).transpose(2, 3)
    x = torch.tensor([True])
    trg_padding_mask = torch.repeat_interleave(x, step_size*batch_size, dim=0).view(batch_size, 1, 1, -1)
    trg_mask = trg_padding_mask & trg_no_look_forward_mask
    return trg_mask

In [8]:
# Train Process
def TrainModel_Transformer(model, src_mask, trg_mask, train_dataloader, valid_dataloader, num_ahead, num_epochs, learning_rate, patience, min_delta):
    
    loss_MSE = torch.nn.MSELoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)

    use_gpu = torch.cuda.is_available()
    
    cur_time = time.time()
    pre_time = time.time()

    # Variables for Early Stopping
    is_best_model = 0
    patient_epoch = 0
       
    for epoch in range(num_epochs):

        total_train_loss = 0
        total_valid_loss = 0

        for data in train_dataloader:

            inputs, labels = data
            batch_size = inputs.shape[0]
            
            if inputs.shape[0] != batch_size:
                continue
                
            if use_gpu:
                inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
            else:
                inputs, labels = Variable(inputs), Variable(labels)
            
            outputs = model(inputs, inputs, src_mask, trg_mask, num_ahead)
            loss_train = loss_MSE(outputs, torch.squeeze(labels))
            total_train_loss += loss_train.data

            optimizer.zero_grad()
            loss_train.backward()
            optimizer.step()
        
        model.eval()

        for data in valid_dataloader:

            inputs, labels = data
            batch_size = inputs.shape[0]
            
            if inputs.shape[0] != batch_size:
                continue
                
            if use_gpu:
                inputs_val, labels_val = Variable(inputs.cuda()), Variable(labels.cuda())
            else:
                inputs_val, labels_val = Variable(inputs), Variable(labels)

            outputs_val = model(inputs_val, inputs_val, src_mask, trg_mask, num_ahead)
            loss_valid = loss_MSE(outputs_val, torch.squeeze(labels_val))
            total_valid_loss += loss_valid.data

        avg_losses_epoch_train = total_train_loss / float(len(train_dataloader))
        avg_losses_epoch_valid = total_valid_loss / float(len(valid_dataloader))

        # Early Stopping
        if epoch == 0:
            is_best_model = 1
            best_model = model
            min_loss_epoch_valid = 10000.0
            if avg_losses_epoch_valid < min_loss_epoch_valid:
                min_loss_epoch_valid = avg_losses_epoch_valid
        else:
            if min_loss_epoch_valid - avg_losses_epoch_valid > min_delta:
                is_best_model = 1
                best_model = model
                min_loss_epoch_valid = avg_losses_epoch_valid
                patient_epoch = 0
            else:
                is_best_model = 0
                patient_epoch += 1
                if patient_epoch >= patience:
                    print('Early Stopped at Epoch:', epoch)
                    break


        cur_time = time.time()

        print('Epoch: {}, train_loss: {}, valid_loss: {}, time: {}, best model: {}'.format( \
                    epoch, \
                    np.around(avg_losses_epoch_train.cpu(), decimals=8),\
                    np.around(avg_losses_epoch_valid.cpu(), decimals=8),\
                    np.around([cur_time - pre_time] , decimals=2),\
                    is_best_model))
        pre_time = cur_time

    return best_model

In [9]:
NUM_COL = 87
BATCH_SIZE = 40
INPUT_LEN = 36
PRED_LEN = 12
LEARNING_RATE = 1e-5
NUM_EPOCHS = 100
MIN_DELTA = 5e-4
PATIENCE = 10

BASELINE_MODEL_DIMENSION = 512
BASELINE_MODEL_NUMBER_OF_HEADS = 8
BASELINE_MODEL_NUMBER_OF_LAYERS = 6
BASELINE_MODEL_DROPOUT_PROB = 0.3

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
use_gpu = torch.cuda.is_available()
print (use_gpu)

True


In [12]:
!ls

drive  sample_data


In [None]:
data_matrix = load_data("./tps_df.pkl", "2020-01-01 00:00:00.000", "2020-05-31 23:45:00.000", freq="15min")
input_dim = data_matrix.shape[-1]
train_dataloader, valid_dataloader, _ = prepare_dataloader(data_matrix, input_dim, BATCH_SIZE=BATCH_SIZE, seq_len=INPUT_LEN, pred_len=PRED_LEN, device=device)

src_mask = get_src_mask(BATCH_SIZE, INPUT_LEN).to(device)
trg_mask = get_trg_mask(BATCH_SIZE, PRED_LEN).to(device)

transformer = Transformer(
    model_dimension=BASELINE_MODEL_DIMENSION, 
    input_length=INPUT_LEN, 
    src_vocab_size=NUM_COL, 
    trg_vocab_size=NUM_COL, 
    number_of_heads=BASELINE_MODEL_NUMBER_OF_HEADS, 
    number_of_layers=BASELINE_MODEL_NUMBER_OF_LAYERS, 
    dropout_probability=BASELINE_MODEL_DROPOUT_PROB
).to(device)

transformer_best_model = TrainModel_Transformer(transformer, src_mask, trg_mask, train_dataloader, valid_dataloader, num_ahead=PRED_LEN, learning_rate=LEARNING_RATE, min_delta=MIN_DELTA, num_epochs=NUM_EPOCHS, patience=PATIENCE)


Epoch: 0, train_loss: 0.019574839621782303, valid_loss: 0.002029919996857643, time: [112.93], best model: 1
Epoch: 1, train_loss: 0.009758319705724716, valid_loss: 0.0011948499595746398, time: [100.51], best model: 1
Epoch: 2, train_loss: 0.005870840046554804, valid_loss: 0.0009037100244313478, time: [97.74], best model: 0
Epoch: 3, train_loss: 0.005188569892197847, valid_loss: 0.0010080799693241715, time: [100.21], best model: 0
Epoch: 4, train_loss: 0.004755950067192316, valid_loss: 0.0008686999790370464, time: [99.13], best model: 0
Epoch: 5, train_loss: 0.004484639968723059, valid_loss: 0.0007540400256402791, time: [100.55], best model: 0
Epoch: 6, train_loss: 0.004312850069254637, valid_loss: 0.000651800015475601, time: [98.67], best model: 1
Epoch: 7, train_loss: 0.0039853500202298164, valid_loss: 0.0014126800233498216, time: [99.67], best model: 0
Epoch: 8, train_loss: 0.0038302799221128225, valid_loss: 0.0006085400236770511, time: [97.77], best model: 0
Epoch: 9, train_loss: 0.

In [19]:
import os 

In [20]:
'''
Step1: read and reshape data
Step2: feed into trained model
Step3: convert output to JSON file
Here we take one testing data as an example to convert it to JSON file.
In this challenge, you should convert all 15 predicting results to one JSON file.
Please check https://colab.research.google.com/drive/1Hkt3kQuh7WzwUTnLgKCcvfPAV1CUK8lF?usp=sharing for more information of the expected result.
'''

test_files = os.listdir('test_data')
out_cols = ['11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22',
       '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34',
       '35', '36', '37', '38', '39', '40', '42', '43', '44', '45', '46', '47',
       '49', '50', '51', '52', '53', '54', '55', '58', '59', '60', '61', '62',
       '63', '64', '65', '66', '67', '68', '69', '70', '73', '74', '75', '76',
       '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88',
       '89', '90', '91', '92', '93', '94', '95', '96', '97', '99', '100',
       '102', '103', '104', '106']

horizon = {'tps_1.pkl':['2020-06-02 06:15:00', '2020-06-02 09:00:00'],
           'tps_2.pkl':['2020-06-03 07:15:00', '2020-06-03 10:00:00'],
           'tps_3.pkl':['2020-06-04 08:15:00', '2020-06-04 11:00:00'],
           'tps_4.pkl':['2020-06-05 09:15:00', '2020-06-05 12:00:00'],
           'tps_5.pkl':['2020-06-06 10:15:00', '2020-06-06 13:00:00'],
           'tps_6.pkl':['2020-06-07 11:15:00', '2020-06-07 14:00:00'],
           'tps_7.pkl':['2020-06-08 12:15:00', '2020-06-08 15:00:00'],
           'tps_8.pkl':['2020-06-09 13:15:00', '2020-06-09 16:00:00'],
           'tps_9.pkl':['2020-06-10 14:15:00', '2020-06-10 17:00:00'],
           'tps_10.pkl':['2020-06-11 15:15:00', '2020-06-11 18:00:00'],
           'tps_11.pkl':['2020-06-12 16:15:00', '2020-06-12 19:00:00'],
           'tps_12.pkl':['2020-06-13 17:15:00', '2020-06-13 20:00:00'],
           'tps_13.pkl':['2020-06-14 18:15:00', '2020-06-14 21:00:00'],
           'tps_14.pkl':['2020-06-15 19:15:00', '2020-06-15 22:00:00'],
           'tps_15.pkl':['2020-06-16 20:15:00', '2020-06-16 23:00:00'],
           }
out_df = pd.DataFrame(columns = out_cols)


BATCH_SIZE_TEST = 1
for test_file in test_files:
  print (os.path.join('test_data',test_file))
  tps_1_raw = pd.read_pickle(os.path.join('test_data',test_file))

  # tps_1_raw = pd.read_pickle('./tps_1.pkl')
  reshaped_tps_df = reshape_data(tps_1_raw)
  reshaped_tps_df = reshaped_tps_df.set_index('TIME')
  reshaped_tps_value = reshaped_tps_df.values
  reshaped_tps_value = np.expand_dims(reshaped_tps_value, axis=0)
  reshaped_tps_value = torch.from_numpy(reshaped_tps_value).float().to(device)

  src_mask_test = get_src_mask(BATCH_SIZE_TEST, INPUT_LEN).to(device)
  trg_mask_test = get_trg_mask(BATCH_SIZE_TEST, PRED_LEN).to(device)

  transformer_out = transformer_best_model(reshaped_tps_value, reshaped_tps_value, src_mask_test, trg_mask_test, PRED_LEN).squeeze(0)

  output_1 = pd.DataFrame(transformer_out.cpu().detach().numpy())
  st = horizon[test_file][0]; et = horizon[test_file][1]
  print (st, et)
  output_1.index = pd.date_range(start=st, end=et, freq='15min').astype(int) / 10**9

  # output_1.index = pd.date_range(start='2020-06-02 06:15:00', end='2020-06-02 09:00:00', freq='15min').astype(int) / 10**9
  output_1.columns = reshaped_tps_df.columns

  # result = pd.concat([output_1, output_2, ..., output_15 ])
  # result = pd.concat([output_1])
  out_df = pd.concat([out_df,output_1])
out_df.to_json('./traffic_forecasting_transformer.json')

test_data/tps_1.pkl
2020-06-02 06:15:00 2020-06-02 09:00:00
test_data/tps_2.pkl




2020-06-03 07:15:00 2020-06-03 10:00:00
test_data/tps_3.pkl




2020-06-04 08:15:00 2020-06-04 11:00:00
test_data/tps_4.pkl




2020-06-05 09:15:00 2020-06-05 12:00:00
test_data/tps_5.pkl




2020-06-06 10:15:00 2020-06-06 13:00:00
test_data/tps_6.pkl




2020-06-07 11:15:00 2020-06-07 14:00:00
test_data/tps_7.pkl




2020-06-08 12:15:00 2020-06-08 15:00:00
test_data/tps_8.pkl




2020-06-09 13:15:00 2020-06-09 16:00:00
test_data/tps_9.pkl




2020-06-10 14:15:00 2020-06-10 17:00:00
test_data/tps_10.pkl




2020-06-11 15:15:00 2020-06-11 18:00:00
test_data/tps_11.pkl




2020-06-12 16:15:00 2020-06-12 19:00:00
test_data/tps_12.pkl




2020-06-13 17:15:00 2020-06-13 20:00:00
test_data/tps_13.pkl




2020-06-14 18:15:00 2020-06-14 21:00:00
test_data/tps_14.pkl




2020-06-15 19:15:00 2020-06-15 22:00:00
test_data/tps_15.pkl




2020-06-16 20:15:00 2020-06-16 23:00:00


