In [267]:
import torch
import torchvision
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader, Dataset, IterableDataset
from torch.utils.data import random_split
from torch import optim
import math
from math import sqrt
import time 
%matplotlib inline

In [2]:
def get_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
print(get_device())

cuda


## Prepare data

In [3]:
# load
data = pd.read_csv('data/ETTh1.csv')

In [4]:
print('dataset length: ',len(data))
data.head(3)

dataset length:  17420


Unnamed: 0,date,HUFL,HULL,MUFL,MULL,LUFL,LULL,OT
0,2016-07-01 00:00:00,5.827,2.009,1.599,0.462,4.203,1.34,30.531
1,2016-07-01 01:00:00,5.693,2.076,1.492,0.426,4.142,1.371,27.787001
2,2016-07-01 02:00:00,5.157,1.741,1.279,0.355,3.777,1.218,27.787001


### dataset class

In [5]:
class EttDataset(Dataset): # add scaling
    def __init__(self, data, predict_for=24*4, flag=0):
        self.seq_len = 24*4*4
        self.label_len = 24*4
        self.pred_len = predict_for
        self.flag = flag
        self.data = data
        
        border1s = [0, 12*30*24 - self.seq_len, 12*30*24+4*30*24 - self.seq_len]
        border2s = [12*30*24, 12*30*24+4*30*24, 12*30*24+8*30*24]
        border1 = border1s[self.flag]
        border2 = border2s[self.flag]
        
        cols_data = self.data.columns[1:]
        df_data = self.data[cols_data]
            
        df_stamp = self.data[['date']][border1:border2]
        df_stamp['date'] = pd.to_datetime(df_stamp.date)
        
        df_stamp['month'] = df_stamp.date.apply(lambda row:row.month,1)
        df_stamp['day'] = df_stamp.date.apply(lambda row:row.day,1)
        df_stamp['weekday'] = df_stamp.date.apply(lambda row:row.weekday(),1)
        df_stamp['hour'] = df_stamp.date.apply(lambda row:row.hour,1)
        df_stamp['minute'] = df_stamp.date.apply(lambda row:row.minute,1)
        df_stamp['minute'] = df_stamp.minute.map(lambda x:x//15)
        freq_map = {
            'y':[],'m':['month'],'w':['month'],'d':['month','day','weekday'],
            'b':['month','day','weekday'],'h':['month','day','weekday','hour'],
            't':['month','day','weekday','hour','minute'],
        }
        
        self.data_stamp = df_stamp[freq_map['h']].values
        data_vals = df_data.values
        self.data_y = data_vals[border1:border2]
        self.data_x = data_vals[border1:border2]
        

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len 
        r_end = r_begin + self.label_len + self.pred_len

        seq_x = self.data_x[s_begin:s_end]
        seq_y = self.data_y[r_begin:r_end]
        seq_x_mark = self.data_stamp[s_begin:s_end]
        seq_y_mark = self.data_stamp[r_begin:r_end]

        return seq_x, seq_y, seq_x_mark, seq_y_mark

    
    def __len__(self):
        return len(self.data)

In [6]:
# split 3:1:1 ratio
train_size = int((0.6)*len(data))
val_size = int((0.2)*len(data))
test_size = len(data)-val_size-train_size
train_d, val_d, test_d = data[:train_size], data[train_size:train_size+val_size], data[-test_size:]

In [7]:
len(train_d), len(val_d), len(test_d)

(10452, 3484, 3484)

## Model classes

### Embedding

In [279]:
class PositionalEmbedding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEmbedding, self).__init__()
        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model).float()
        pe.require_grad = False

        position = torch.arange(0, max_len).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return self.pe[:, :x.size(1)]

class TokenEmbedding(nn.Module):
    def __init__(self, c_in, d_model):
        super(TokenEmbedding, self).__init__()
        padding = 1
        self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, 
                                    kernel_size=3, padding=padding, padding_mode='circular')
        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight,mode='fan_in',nonlinearity='leaky_relu')

    def forward(self, x):
        x = self.tokenConv(x.permute(0, 2, 1)).transpose(1,2)
        return x

class FixedEmbedding(nn.Module):
    def __init__(self, c_in, d_model):
        super(FixedEmbedding, self).__init__()

        w = torch.zeros(c_in, d_model).float()
        w.require_grad = False

        position = torch.arange(0, c_in).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

        w[:, 0::2] = torch.sin(position * div_term)
        w[:, 1::2] = torch.cos(position * div_term)

        self.emb = nn.Embedding(c_in, d_model)
        self.emb.weight = nn.Parameter(w, requires_grad=False)

    def forward(self, x):
        return self.emb(x).detach()

class TemporalEmbedding(nn.Module):
    def __init__(self, d_model, embed_type='fixed', freq='h'):
        super(TemporalEmbedding, self).__init__()

        minute_size = 4; hour_size = 24
        weekday_size = 7; day_size = 32; month_size = 13

        Embed = FixedEmbedding if embed_type=='fixed' else nn.Embedding
        if freq=='t':
            self.minute_embed = Embed(minute_size, d_model)
        self.hour_embed = Embed(hour_size, d_model)
        self.weekday_embed = Embed(weekday_size, d_model)
        self.day_embed = Embed(day_size, d_model)
        self.month_embed = Embed(month_size, d_model)
    
    def forward(self, x):
        x = x.long()
        
        minute_x = self.minute_embed(x[:,:,4]) if hasattr(self, 'minute_embed') else 0.
        hour_x = self.hour_embed(x[:,:,3])
        weekday_x = self.weekday_embed(x[:,:,2])
        day_x = self.day_embed(x[:,:,1])
        month_x = self.month_embed(x[:,:,0])
        
        return hour_x + weekday_x + day_x + month_x + minute_x

class TimeFeatureEmbedding(nn.Module):
    def __init__(self, d_model, embed_type='timeF', freq='h'):
        super(TimeFeatureEmbedding, self).__init__()

        freq_map = {'h':4, 't':5, 's':6, 'm':1, 'a':1, 'w':2, 'd':3, 'b':3}
        d_inp = freq_map[freq]
        self.embed = nn.Linear(d_inp, d_model)
    
    def forward(self, x):
        return self.embed(x)

class DataEmbedding(nn.Module):
    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
        super(DataEmbedding, self).__init__()

        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
        self.position_embedding = PositionalEmbedding(d_model=d_model)
        self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type!='timeF' else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)

        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x, x_mark):
        print('value_embedding: ', self.value_embedding(x))
        x = self.value_embedding(x) + self.position_embedding(x) + self.temporal_embedding(x_mark)
        return self.dropout(x)

In [306]:
class FixedEmbedder(nn.Module):
    def __init__(self, c_in, d_model):
        super(FixedEmbedder, self).__init__()
        
        w = torch.zeros(c_in, d_model).float()
        w.require_grad = False

        position = torch.arange(0, c_in).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

        w[:, 0::2] = torch.sin(position * div_term)
        w[:, 1::2] = torch.cos(position * div_term)

        self.emb = nn.Embedding(c_in, d_model)
        self.emb.weight = nn.Parameter(w, requires_grad=False)

    def forward(self, x):
        return self.emb(x).detach()

class Embedder(nn.Module):
    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
        super(Embedder, self).__init__()
        self.d_model = d_model
        self.c_in = c_in
        self.dropout = nn.Dropout(p=dropout)
        self.conv1 = nn.Conv1d(in_channels=self.c_in, out_channels=self.d_model, kernel_size=3, padding=1, padding_mode='circular')
        
    def forward(self, x, x_mark):
        #x0 = x.clone().detach()
        # value
        value_embedding = self.conv1(x.permute(0, 2, 1)).transpose(1,2)
        print('value_embedding: ', value_embedding)
        # position
        max_len=5000
        temp = torch.zeros(max_len, self.d_model).float()
        temp.require_grad = False
        position = torch.arange(0, max_len).float().unsqueeze(1)
        div_term = (torch.arange(0, self.d_model, 2).float() * -(math.log(10000.0) / self.d_model)).exp()
        temp[:, 0::2] = torch.sin(position * div_term)
        temp[:, 1::2] = torch.cos(position * div_term)
        temp = temp.unsqueeze(0)
        position_embedding = temp[:, :x.size(1)]
        # timestamp
        minute_size = 4; hour_size = 24; weekday_size = 7; day_size = 32; month_size = 13
        hour_embed = FixedEmbedder(hour_size, self.d_model)
        weekday_embed = FixedEmbedder(weekday_size, self.d_model)
        day_embed = FixedEmbedder(day_size, self.d_model)
        month_embed = FixedEmbedder(month_size, self.d_model)
        x1 = x_mark.long()
        hour_x = hour_embed(x1[:,:,3])
        weekday_x = weekday_embed(x1[:,:,2])
        day_x = day_embed(x1[:,:,1])
        month_x = month_embed(x1[:,:,0])
        temporal_embedding= hour_x + weekday_x + day_x + month_x + minute_x
        
        
        x = value_embedding + position_embedding + temporal_embedding
        
        return self.dropout(x)