In [2]:
import os
import pandas as pd
import numpy as np
from tqdm import trange, tqdm

from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile

from pandas import read_csv
from scipy import stats

window_size = 192
stride_size = 24
target_window_size = 24
num_covariates = 3
train_start = '2011-01-01 00:00:00'
train_end = '2014-08-31 23:00:00'
test_start = '2014-08-25 00:00:00' #need additional 7 days as given info
test_end = '2014-09-07 23:00:00'

def prep_data(data, covariates, data_start, train = True):
    time_len = data.shape[0]
    input_size = window_size-stride_size
    windows_per_series = np.full((num_series), (time_len-input_size-target_window_size) // stride_size)
    if train: windows_per_series -= (data_start+stride_size-1) // stride_size
    total_windows = np.sum(windows_per_series)
    x_input = np.zeros((total_windows, window_size, 1 + num_covariates), dtype='float32')
    label = np.zeros((total_windows, target_window_size, 1 + num_covariates), dtype='float32')
    v_input = np.zeros((total_windows, 2), dtype='float32')
    count = 0
    for series in trange(num_series): # for each time series
        for i in range(windows_per_series[series]):
            if train:
                window_start = stride_size*i+data_start[series]
            else:
                window_start = stride_size*i
            window_end = window_start+window_size
            target_window_end = window_end+target_window_size
            x_input[count, :, 0] = data[window_start:window_end, series]
            x_input[count, :, 1:1+num_covariates] = covariates[window_start:window_end, :]
            label[count, :, 0] = data[window_end:target_window_end, series]
            label[count,:, 1:1+num_covariates] = covariates[window_end:target_window_end, :]
            nonzero_sum = (x_input[count, 1:input_size, 0]!=0).sum()
            if nonzero_sum == 0:
                v_input[count, 0] = 0
            else:
                v_input[count, 0] = np.true_divide(x_input[count, :input_size, 0].sum(),nonzero_sum)+1
                x_input[count, :, 0] = x_input[count, :, 0]/v_input[count, 0]
                label[count, :, 0] = label[count, :, 0]/v_input[count, 0]
            count += 1
    return x_input, v_input, label

def gen_covariates(times, num_covariates):
    covariates = np.zeros((times.shape[0], num_covariates))
    for i, input_time in enumerate(times):
        covariates[i, 0] = input_time.weekday()
        covariates[i, 1] = input_time.hour
        covariates[i, 2] = input_time.month
    return covariates[:, :num_covariates]

In [12]:
name = 'LD2011_2014.txt'
save_name = 'elect'
save_path = os.path.join('/home/vikash/data', save_name)

In [14]:
name = 'LD2011_2014.txt'
save_name = 'elect'
save_path = os.path.join('data', save_name)

if not os.path.exists(save_path):
    os.makedirs(save_path)
csv_path = os.path.join(save_path, name)
if not os.path.exists(csv_path):
    zipurl = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip'
    with urlopen(zipurl) as zipresp:
        with ZipFile(BytesIO(zipresp.read())) as zfile:
            zfile.extractall(save_path)

data_frame = pd.read_csv(csv_path, sep=";", index_col=0, parse_dates=True, decimal=',')
data_frame = data_frame.resample('1H',label = 'left',closed = 'right').sum()[train_start:test_end]
data_frame.fillna(0, inplace=True) # (32304, 370)
# generate covariates (has both train and test limits)
covariates = gen_covariates(data_frame[train_start:test_end].index, num_covariates) # (32304, 3)

In [15]:
cov_dims = pd.DataFrame(covariates).nunique().tolist()
train_data = data_frame[train_start:train_end]
test_data = data_frame[test_start:test_end]

In [16]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(train_data)
train_target_df = pd.DataFrame(scaler.transform(train_data), index=train_data.index, columns=train_data.columns)
test_target_df = pd.DataFrame(scaler.transform(test_data), index=test_data.index, columns=test_data.columns)

In [17]:
train_data = train_target_df.values
test_data = test_target_df.values
data_start = (train_data!=0).argmax(axis=0) #find first nonzero value in each time series
total_time = data_frame.shape[0] #32304
num_series = data_frame.shape[1] #370

In [18]:
X_train, v_train, y_train = prep_data(train_data, covariates, data_start)
X_test, v_test, y_test = prep_data(test_data, covariates, data_start, train=False)

100%|██████████| 370/370 [00:08<00:00, 42.24it/s]
100%|██████████| 370/370 [00:00<00:00, 12310.65it/s]


In [22]:
y_train.shape

(388731, 24, 4)

In [24]:
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import RandomSampler

class TrainDataset(Dataset):
    def __init__(self, data, label):
        self.data = data
        self.label = label
        self.train_len = self.data.shape[0]

    def __len__(self):
        return self.train_len

    def __getitem__(self, index):
        # return time series sequence, current covariates, label sequence, future covariates
        return (self.data[index,:,0], self.data[index,:,1:1+num_covariates], self.label[index,:,0], self.label[index,:,1:1+num_covariates])

class TestDataset(Dataset):
    def __init__(self, data, v, label):
        self.data = data
        self.v = v
        self.label = label
        self.test_len = self.data.shape[0]

    def __len__(self):
        return self.test_len

    def __getitem__(self, index):
        # return time series sequence, current covariates, normalizing stats, label sequence, future covariates
        return (self.data[index,:,0], self.data[index,:,1:1+num_covariates], self.v[index], self.label[index,:,0], self.label[index,:,1:1+num_covariates])

In [25]:
train_batch_size = 8

train_set = TrainDataset(X_train, y_train)
test_set = TestDataset(X_test, v_test, y_test)
train_loader = DataLoader(train_set, batch_size=train_batch_size, drop_last=True)
test_loader = DataLoader(test_set, batch_size=len(test_set), sampler=RandomSampler(test_set))

In [26]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [27]:
class ResidualBlock(nn.Module):
    def __init__(self, input_dim, d, stride=1, num_filters=35, p=0.2, k=2, weight_norm=True):
        super(ResidualBlock, self).__init__()
        self.k, self.d, self.dropout_fn = k, d, nn.Dropout(p)
        
        self.conv1 = nn.Conv1d(input_dim, num_filters, kernel_size=k, dilation=d)
        self.conv2 = nn.Conv1d(num_filters, num_filters, kernel_size=k, dilation=d)
        if weight_norm:
            self.conv1, self.conv2 = nn.utils.weight_norm(self.conv1), nn.utils.weight_norm(self.conv2)
        
        self.downsample = nn.Conv1d(input_dim, num_filters, 1) if input_dim != num_filters else None
    
    def forward(self, x):
        out = self.dropout_fn(F.relu(self.conv1(x.float())))
        out = self.dropout_fn(F.relu(self.conv2(out)))
        
        residual = x if self.downsample is None else self.downsample(x)
        return F.relu(out + residual[:,:,-out.shape[2]:])

class FutureResidual(nn.Module):
    def __init__(self, in_features):
        super(FutureResidual, self).__init__()
        self.net = nn.Sequential(nn.Linear(in_features=in_features, out_features=in_features),
#                                  nn.BatchNorm1d(in_features),
                                 nn.ReLU(),
                                 nn.Linear(in_features=in_features, out_features=in_features),)
#                                  nn.BatchNorm1d(in_features),)
        
    def forward(self, lag_x, x):
        out = self.net(x.squeeze())
        return F.relu(torch.cat((lag_x, out), dim=2))

class DeepTCN(nn.Module):
    def __init__(self, cov_dims=cov_dims, num_class=num_series, embedding_dim=20, dilations=[1,2,4,8,16,24,32], p=0.25, device=torch.device('cuda')):
        super(DeepTCN, self).__init__()
        self.input_dim, self.cov_dims, self.embeddings, self.device = 1+(len(cov_dims)*embedding_dim), cov_dims, [], device
        for cov in cov_dims:
            self.embeddings.append(nn.Embedding(num_class, embedding_dim, device=device))
        
        self.encoder = nn.ModuleList()
        for d in dilations:
            self.encoder.append(ResidualBlock(input_dim=self.input_dim, num_filters=self.input_dim, d=d))
        self.decoder = FutureResidual(in_features=self.input_dim-1)
        self.mlp = nn.Sequential(nn.Linear(1158, 8), nn.BatchNorm1d(8), nn.SiLU(), nn.Dropout(p), nn.Linear(8,1), nn.ReLU())
    
    def forward(self, x, current_cov, next_cov):
        current_cov_embeddings, next_cov_embeddings = [], []
        for cov_idx, cov_dim in enumerate(self.cov_dims):
            current_cov_embeddings.append(self.embeddings[cov_idx](current_cov[:,:,cov_idx].to(self.device).long()))
            next_cov_embeddings.append(self.embeddings[cov_idx](next_cov[:,:,cov_idx].to(self.device).long()))
        embed_concat = torch.cat(current_cov_embeddings, dim=2).to(self.device)
        next_cov_concat = torch.cat(next_cov_embeddings, dim=2).to(self.device)
        
        encoder_input = torch.cat((x.unsqueeze(2), embed_concat), dim=2)
        encoder_input = encoder_input.permute(0, 2, 1)
        
        for layer in self.encoder:
            encoder_input = layer(encoder_input)
        encoder_output = encoder_input.permute(0, 2, 1)
        encoder_output = torch.reshape(encoder_output, (encoder_output.shape[0], 1, -1))
        encoder_output = torch.repeat_interleave(encoder_output, next_cov_concat.shape[1], dim=1)

        decoder_output = self.decoder(lag_x=encoder_output, x=next_cov_concat)
        t, n = decoder_output.size(0), decoder_output.size(1)
        decoder_output = decoder_output.view(t * n, -1)
        output = self.mlp(decoder_output.float())
        output = output.view(t, n, -1)
        
        return output.squeeze()

In [11]:
import torch.optim as optim
from tqdm import trange, tqdm

def train(model, device=torch.device('cuda'), num_epochs = 1, learning_rate = 1e-3):
    train_len = len(train_loader)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_summary = np.zeros((train_len * num_epochs))
    loss_fn = F.mse_loss
    
    for epoch in range(num_epochs):
        model.train()
        loss_epoch = np.zeros(len(train_loader))

        pbar = tqdm(train_loader)
        for (ts_data_batch, current_covs_batch, labels_batch, next_covs_batch) in pbar:
            optimizer.zero_grad()
            
            loss = torch.zeros(1, device=device, dtype=torch.float32)
            out = model(ts_data_batch.to(device), current_covs_batch.to(device), next_covs_batch.to(device))
            loss = loss_fn(out.float(), labels_batch.squeeze().to(device).float())
            
            pbar.set_description(f"Loss:{loss.item()}")
            loss.backward()
            optimizer.step()
        
        loss_summary[epoch * train_len:(epoch + 1) * train_len] = loss.cpu().detach()
        
    return loss_summary, optimizer

def evaluate(model, optimizer, device=torch.device('cuda')):
    results = []

    with torch.no_grad():
        model.eval()
        loss_epoch = np.zeros(len(train_loader))

        pbar = tqdm(test_loader)
        for (ts_data_batch, current_covs_batch, v_batch, labels_batch, next_covs_batch) in pbar:
            optimizer.zero_grad()

            out = model(ts_data_batch.to(device), current_covs_batch.to(device), next_covs_batch.to(device))
            results.append(out.squeeze(0).cpu())

    predictions = torch.cat(results)
    criterion = nn.MSELoss()
    test_rmse = torch.sqrt(criterion(predictions, labels_batch)).item()
    return test_rmse

In [12]:
model = DeepTCN(device=torch.device('cuda')).cuda()

In [13]:
loss, optimizer = train(model, num_epochs=2)

Loss:0.00854562409222126: 100%|██████████| 48591/48591 [13:06<00:00, 61.82it/s]   
Loss:0.009649071842432022: 100%|██████████| 48591/48591 [13:01<00:00, 62.16it/s]  


In [14]:
evaluate(model, optimizer, device=torch.device('cuda'))

100%|██████████| 1/1 [00:00<00:00, 12.43it/s]


0.15075178444385529