# TCN Experiments

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.nn.utils import weight_norm

## Basics

### Transposing Input Data

In [2]:
x=np.array([
    [1,2,3,4,5],
    [6,7,8,9,10],
    [11,12,13,14,15]
])
    
x

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [3]:
# This function is used to create time series when the input is
# an array of vectors with each corresponding to one timestep.

y = x.transpose(-1,-2)
y

array([[ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14],
       [ 5, 10, 15]])

### Adaptive Max Pooling

In [4]:
# Adaptive max pooling is used to reduce the dimensionality of the input
# The input is split up into n chunks and the maximum entry in each chunk is retained in the output.

n = 2
m = nn.AdaptiveMaxPool1d(n)
input = torch.randn(1, 10, 8)

output = m(input)
print(input)
print(output)

print(output.size())

tensor([[[-0.6738,  0.1459, -1.9430,  0.5504, -0.9537,  0.2500, -1.2200,
          -0.5923],
         [ 0.0101, -1.0994, -2.0720, -0.9472,  1.0134, -1.0683,  2.1990,
           1.4274],
         [ 0.1794, -0.9806,  3.2110,  1.0176, -1.8194, -0.1435,  0.2970,
          -1.5161],
         [ 0.5059, -1.4812, -0.6248,  0.3844, -0.2665, -1.2313, -0.1889,
           1.5237],
         [ 1.1059,  0.6436, -0.2907,  1.0372,  1.1967,  1.0620,  1.3572,
          -1.9616],
         [ 0.5541, -1.0523,  0.3244,  1.3744, -1.2967, -0.8231,  1.6053,
           2.8394],
         [ 0.2907,  1.4062, -0.2424, -0.4257,  0.2236,  0.1361, -1.0883,
          -2.3522],
         [ 0.4982, -1.7123, -0.8709, -1.0169,  0.6790, -0.4194, -1.7055,
          -3.0911],
         [-0.1077, -0.2264,  0.0804,  0.5738, -0.6949,  0.5811, -0.5590,
           0.2913],
         [ 0.8036,  0.3029,  0.1105, -1.6600, -0.9068, -0.8718,  2.2352,
           0.6868]]])
tensor([[[0.5504, 0.2500],
         [0.0101, 2.1990],
         [3.21

In [5]:
# Squeeze(-1) simply removes the last i.e. the lowest dimentsion of the tensor after max pooling

n = 1
m = nn.AdaptiveMaxPool1d(n)
input = torch.randn(1, 10, 8)

output = m(input)
print(input)
print(output)

print(output.squeeze(-1))


tensor([[[ 0.8550, -0.3290, -0.5676, -0.7550,  0.8608,  0.5145, -0.4325,
           0.1836],
         [-1.1026, -1.4264, -0.7803, -1.3181, -0.3238,  0.6133, -0.1001,
          -0.2332],
         [ 1.0694, -0.9311, -0.5157,  2.1294, -0.4560, -1.7986, -0.4551,
          -0.1024],
         [-0.8484, -1.1227, -0.2158, -0.1050,  1.4975,  0.2288, -0.2153,
           0.4944],
         [-0.3289, -1.3043, -0.9896, -0.0110,  0.7123, -0.3579,  1.5000,
          -1.8310],
         [-1.7235,  1.2236, -0.0754, -0.0234, -0.0733,  1.0821, -0.0297,
           0.1330],
         [-2.1905,  1.4125, -0.9819,  0.1323,  1.1706, -0.1566,  1.2730,
           1.8208],
         [-0.2982,  0.3976,  0.9994,  0.6967,  0.5351,  0.5108, -1.6749,
           0.5342],
         [-0.7169,  0.1687,  0.7361,  0.7410, -1.2018, -1.7414, -0.4973,
          -1.8722],
         [ 0.3125,  0.1573,  0.0790,  1.2682,  0.3986, -1.1823, -1.4031,
           1.3306]]])
tensor([[[0.8608],
         [0.6133],
         [2.1294],
         [1

## Testing Networks

### Chomp1d 

In [6]:
# The chomp class simply removes the last chomp_size entries from each channel.
# This is only necessary to remove the padding at the end of the layer.

x = torch.randn(1, 10, 10)

print(x)
print(x[:, :, :-3])
print(x[:, :, :-5])
print(x[:, :, :-7])

tensor([[[ 0.3662, -0.4530,  0.9133, -1.8705,  1.0084, -0.1480, -0.2611,
          -0.3330,  0.5459,  0.4545],
         [ 0.1157,  0.8037, -0.3166,  0.3968,  0.5129, -0.4063,  0.0884,
          -1.2360,  0.1519,  0.0514],
         [ 0.5537, -0.3588,  0.7492, -1.3030, -0.2966, -0.5220, -1.1743,
          -0.3231, -0.9828,  0.4961],
         [-0.5783, -0.6165,  0.2878, -1.7264,  0.2994,  0.9694,  0.5157,
          -0.1501, -0.2851, -0.9014],
         [ 0.1430, -0.5479,  0.0938, -1.5846,  0.4975, -1.0417, -0.4890,
          -0.3011, -1.9909, -0.2687],
         [ 1.9303, -0.2923, -1.8888,  0.1315, -0.5930, -1.5550, -0.4376,
           0.4946, -1.4731,  0.0102],
         [ 0.5294,  0.2273, -0.2997,  0.6690,  0.5647, -0.5846, -0.9580,
          -1.0514,  3.0484,  0.3631],
         [ 0.4330,  0.2814,  1.4383,  1.9686, -1.6647,  0.6168, -1.3011,
          -0.1112,  0.1016,  2.3426],
         [-0.1750, -0.0091, -0.9142, -0.0672,  0.4026,  0.3471, -0.8517,
          -0.8561,  1.0335,  0.0267],
 

### Conv1d

In [7]:
# Lets apply a 1d conv to some test data to figure out how it works

class TestConv(nn.Module):
    
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        
        super(TestConv, self).__init__()
        
        self.conv1 = nn.Conv1d(n_inputs, n_outputs, kernel_size, stride=stride, padding=padding, dilation=dilation)

        self.net = nn.Sequential(self.conv1)
        
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 0.01)
        
    def forward(self, x):
        out = self.net(x)
        return out

In [8]:
# Lets keep it simple to start and have only one output channel with no dilation

x = torch.randn(1, 10, 10)

TestBlock = TestConv(n_inputs=10, n_outputs=1, kernel_size=3, stride=1, dilation=1, padding=1, dropout=0.2)
print(TestBlock.conv1.weight)
print(TestBlock.conv1.bias)

Parameter containing:
tensor([[[ 0.0007,  0.0065, -0.0023],
         [ 0.0245, -0.0144,  0.0042],
         [ 0.0037,  0.0118, -0.0015],
         [-0.0019, -0.0157, -0.0021],
         [ 0.0118, -0.0229, -0.0047],
         [ 0.0112, -0.0127, -0.0013],
         [ 0.0061,  0.0007,  0.0066],
         [ 0.0020, -0.0095,  0.0263],
         [ 0.0060,  0.0053,  0.0217],
         [-0.0015,  0.0028,  0.0194]]], requires_grad=True)
Parameter containing:
tensor([-0.1789], requires_grad=True)


In [9]:
# Here are the input and output

print(x)
print(TestBlock(x))

tensor([[[ 7.3331e-01,  1.9775e+00,  3.0415e-01,  7.1883e-01, -2.1124e-01,
          -8.8180e-02, -6.1135e-01,  6.5577e-01,  1.2377e+00, -2.5969e-01],
         [-5.0379e-02,  1.4218e+00,  3.0685e-01, -1.3138e-01, -9.0272e-01,
          -1.1392e+00,  6.6706e-01,  2.3474e-01, -1.3778e+00,  5.0836e-01],
         [ 8.0168e-01,  8.5237e-02, -2.7927e-01,  5.0594e-01,  9.0535e-01,
           4.9328e-01,  2.3420e-01, -1.2688e+00, -7.2514e-01,  2.0129e+00],
         [-5.9111e-01, -4.3180e-01, -1.0639e+00,  2.1870e-01,  1.1672e+00,
           5.3729e-01, -5.5616e-01,  6.1319e-01, -5.6282e-01,  6.1455e-01],
         [ 1.6820e+00,  1.5387e+00, -1.6835e-01, -1.7795e-01, -1.2743e+00,
           3.6689e-01,  1.0651e-01,  8.3296e-01, -2.2056e-01,  7.3387e-01],
         [ 7.6575e-01,  1.5882e+00, -7.9935e-01, -1.8805e+00, -4.3445e-02,
          -5.1776e-01, -1.4095e-01,  6.0506e-01, -5.2607e-01, -3.0991e-01],
         [-6.1063e-01,  1.4187e+00, -1.3465e+00,  8.7434e-01, -1.0748e+00,
          -1.1487e-

In [10]:
# Theoretically the outputs are a sum of some inputs times the weights
# Here I try to see which inputs are linked to which outputs.

total = 0
pos = 0

for j in range(0,10):
    for i in range(0,3):
        
        #print(x[0,j,pos+i])
        total += x[0,j,pos+i]*TestBlock.conv1.weight.data[0,j,i]

print()
print(total+TestBlock.conv1.bias.data)



tensor([-0.2203])


## Padding

In [11]:
# In the above if we add padding then the corresponding number of first and last entries
# are based on padded values.

# If we define our padding as (k-1)d then the first node will always see only the first datapoint.
# This ensures a causal connection.

ker = 3
dil = 1

TestBlock2 = TestConv(n_inputs=10, n_outputs=1, kernel_size=ker, stride=1, dilation=dil, padding=(ker-1)*dil, dropout=0.2)
print(TestBlock2.conv1.weight)
print(TestBlock2.conv1.bias)

Parameter containing:
tensor([[[-0.0145, -0.0032, -0.0024],
         [ 0.0120, -0.0060, -0.0052],
         [ 0.0136, -0.0106,  0.0066],
         [ 0.0027,  0.0073, -0.0032],
         [ 0.0171,  0.0111, -0.0165],
         [-0.0176, -0.0067, -0.0045],
         [-0.0115,  0.0017, -0.0021],
         [ 0.0096, -0.0184, -0.0031],
         [ 0.0225, -0.0038,  0.0088],
         [-0.0105,  0.0046,  0.0075]]], requires_grad=True)
Parameter containing:
tensor([0.1179], requires_grad=True)


In [12]:
# Here we see that the first output based on the input datapoints comes after 
# the same number of nodes as the padding.

total = 0
pos = 0

for j in range(0,10):
    for i in range(0,3):
        
        #print(x[0,j,pos+i])
        total += x[0,j,pos+i]*TestBlock2.conv1.weight.data[0,j,i]

print()
print(TestBlock2(x))
print(total+TestBlock2.conv1.bias.data)


tensor([[[0.0946, 0.0300, 0.1850, 0.0602, 0.1594, 0.1434, 0.1472, 0.1506,
          0.1457, 0.0904, 0.1002, 0.1646]]], grad_fn=<ConvolutionBackward0>)
tensor([0.1850])


In [13]:
# By applying the same filter as the function chomp1d we disregard
# a number of nodes at the end of the layer equal to the padding.

print(TestBlock2(x))
print(TestBlock2(x)[:, :, :-((ker-1)*dil)])

tensor([[[0.0946, 0.0300, 0.1850, 0.0602, 0.1594, 0.1434, 0.1472, 0.1506,
          0.1457, 0.0904, 0.1002, 0.1646]]], grad_fn=<ConvolutionBackward0>)
tensor([[[0.0946, 0.0300, 0.1850, 0.0602, 0.1594, 0.1434, 0.1472, 0.1506,
          0.1457, 0.0904]]], grad_fn=<SliceBackward0>)


In [14]:
# Alternatively, and perhaps preferably, a class can be used to apply single sided padding.
# In this way Chomp1d will not be required.

class CausalConv1d(nn.Conv1d):
    
    def __init__(self, in_channels, out_channels, kernel_size, stride, dilation=1, groups=1, bias=True):

        super(CausalConv1d, self).__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding=0,
            dilation=dilation,
            groups=groups,
            bias=bias)
        
        self.__padding = (kernel_size - 1) * dilation
        
    def forward(self, input):
        
        return super(CausalConv1d, self).forward(F.pad(input, (self.__padding, 0)))
    
    
class TestConv(nn.Module):
    
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, dropout=0.2):
        
        super(TestConv, self).__init__()
        
        self.conv1 = CausalConv1d(n_inputs, n_outputs, kernel_size, stride=stride, dilation=dilation)

        self.net = nn.Sequential(self.conv1)
        
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 0.01)
        
    def forward(self, x):
        out = self.net(x)
        return out
    

In [15]:
# Now we should not need to apply padding for the convolution or chomp1d afterwards

x = torch.randn(1, 10, 10)

TestBlock = TestConv(n_inputs=10, n_outputs=1, kernel_size=3, stride=1, dilation=1, dropout=0.2)
print(TestBlock.conv1.weight)
print(TestBlock.conv1.bias)

Parameter containing:
tensor([[[-0.0070,  0.0017,  0.0119],
         [ 0.0022, -0.0087, -0.0082],
         [ 0.0130,  0.0043,  0.0047],
         [ 0.0023,  0.0184,  0.0095],
         [ 0.0006,  0.0016, -0.0123],
         [ 0.0100, -0.0029,  0.0079],
         [-0.0185,  0.0037, -0.0012],
         [-0.0050,  0.0050, -0.0001],
         [ 0.0008, -0.0035,  0.0016],
         [ 0.0186,  0.0035,  0.0163]]], requires_grad=True)
Parameter containing:
tensor([0.0650], requires_grad=True)


In [16]:
# Now we see that once again the first two nodes of the output are based on padding.
# However the last node is now based entirely on the inputs and the length of the layer matches the input.

total = 0
pos = 7

for j in range(0,10):
    for i in range(0,3):
        
        #print(x[0,j,pos+i])
        total += x[0,j,pos+i]*TestBlock.conv1.weight.data[0,j,i]

print()
print(TestBlock(x))
print(total+TestBlock.conv1.bias.data)


tensor([[[0.0853, 0.0617, 0.1216, 0.0573, 0.0288, 0.0154, 0.0635, 0.1616,
          0.1304, 0.1122]]], grad_fn=<ConvolutionBackward0>)
tensor([0.1122])


## Full Temporal Block

In [17]:
# Here we have used our custom padding function and changed the activation to Leaky ReLU for now.

class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, dropout=0.2):
        
        super(TemporalBlock, self).__init__()
        self.conv1 = weight_norm(CausalConv1d(n_inputs, n_outputs, kernel_size, stride=stride, dilation=dilation))
        self.relu1 = nn.LeakyReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = weight_norm(CausalConv1d(n_outputs, n_outputs, kernel_size, stride=stride, dilation=dilation))
        self.relu2 = nn.LeakyReLU()
        self.dropout2 = nn.Dropout(dropout)

        self.net = nn.Sequential(self.conv1, self.relu1, self.dropout1,
                                 self.conv2, self.relu2, self.dropout2)
        
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.relu = nn.LeakyReLU()
        
        self.out_shape= n_outputs
        
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 0.01)
        self.conv2.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

In [18]:
# This block seems to work just fine

x = torch.randn(1, 10, 10)

TestBlock = TemporalBlock(n_inputs=10, n_outputs=5, kernel_size=5, stride=1, dilation=1, dropout=0.2)

print(TestBlock(x))

tensor([[[-1.4150e-03, -2.2201e-03, -1.9480e-03, -1.2050e-03, -2.0838e-03,
          -2.8109e-03, -5.4681e-04, -2.2198e-03, -1.7906e-03,  3.2647e-01],
         [-1.8582e-03, -1.8292e-03, -1.5116e-03, -1.3464e-03, -1.4731e-03,
          -5.0217e-04, -1.4533e-03, -1.5321e-03, -1.4126e-03, -1.1384e-03],
         [ 1.7094e-01,  1.7947e-01,  9.2855e-02,  1.0368e-01, -1.1050e-04,
           2.2344e-01,  2.0613e-01, -5.2247e-04, -3.5436e-04,  8.3818e-01],
         [-1.1638e-03, -1.5325e-03, -1.6374e-03, -7.4738e-04, -3.0139e-03,
          -2.1216e-03,  1.4400e-01,  1.1600e-01, -2.7609e-03, -1.9362e-03],
         [ 2.9261e-01,  2.8148e-01,  2.3685e-01,  2.7892e-01,  2.8824e-01,
           1.9851e-01,  3.6106e-01,  5.7606e-01,  2.7030e-01,  2.5556e-01]]],
       grad_fn=<LeakyReluBackward0>)


## Testing Full Network

In [19]:
class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        
        super(TemporalConvNet, self).__init__()
        
        self.layers = []
        num_levels = len(num_channels)
        
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            self.layers += [TemporalBlock(in_channels, out_channels, kernel_size,
                                     stride=1, dilation=dilation_size, dropout=dropout)]
        
        self.out_shape = self.layers[-1].out_shape
        self.network = nn.Sequential(*self.layers)

    def forward(self, x):
        return self.network(x)

In [20]:
# Testing TempConvNet

x = torch.randn(1, 6, 10)

TestNet = TemporalConvNet(num_inputs=6, num_channels=[5,10,5,1], kernel_size=3, dropout=0.2)

print(x)
print(TestNet(x))

tensor([[[ 0.8211,  0.8633,  0.7954,  1.0118,  0.9933, -0.2309,  0.8287,
          -0.1591, -2.1312,  2.2352],
         [ 0.0847,  0.0975, -0.7539,  1.6902, -1.4064, -0.0913, -1.1369,
          -0.1919, -0.1626, -0.9789],
         [ 0.1559,  0.5798, -1.8434,  0.1558, -0.0197,  1.5330, -1.6641,
           0.8156,  0.9160,  0.2828],
         [-0.9291, -1.5002,  0.2783,  0.1893,  0.7349, -1.7096,  0.9961,
           0.4132,  0.8228,  0.4827],
         [ 3.1243, -1.4045,  1.1466,  0.4907,  1.2401,  0.9644, -0.3407,
           0.8001, -0.0688, -0.3414],
         [-0.2107, -0.8109,  1.0384, -1.2542,  0.1058,  0.5608,  0.3836,
           0.1921, -0.6354, -0.5298]]])
tensor([[[0.5795, 0.5785, 0.5435, 0.1827, 0.5482, 0.5436, 0.5465, 0.5481,
          0.1840, 0.1839]]], grad_fn=<LeakyReluBackward0>)


# TCN Test

In [21]:
# Take a batch of 5 datapoints
# Each of which represents a dataseries of 6 channels for 10 timesteps

x = torch.randn(5, 6, 10)
x

tensor([[[-2.4862e+00,  1.3014e+00,  1.4611e+00, -8.1100e-01, -4.2103e-01,
           2.3076e+00, -6.1092e-01, -1.8902e+00, -6.6616e-01,  1.1922e+00],
         [-5.6652e-02,  4.5527e-01,  6.2742e-01,  4.1564e-01,  3.9542e-01,
          -1.5785e-01, -3.0933e-01,  5.1140e-01,  2.5879e-01, -1.2419e+00],
         [-3.4514e-01,  1.4932e+00,  1.5529e-01,  4.7902e-01,  3.1193e-01,
          -7.4366e-01, -6.3157e-01, -1.2738e+00, -1.0162e-01, -1.0890e+00],
         [-4.8824e-01, -5.8031e-01, -8.4179e-01,  9.7163e-02, -9.7490e-01,
          -3.7526e-01, -1.0742e+00,  9.3186e-02,  1.2080e+00,  1.8556e+00],
         [-1.1773e+00,  1.2767e+00,  2.1428e+00, -1.3469e+00,  2.7301e-01,
           5.8773e-01,  1.4812e+00,  1.5616e+00,  1.7440e-01, -4.3413e-01],
         [-7.0214e-01,  1.6590e+00,  7.9579e-01, -5.3867e-01, -7.1562e-01,
          -7.9563e-01,  5.6138e-02, -2.6339e-01,  8.5440e-01,  1.7991e+00]],

        [[ 5.7744e-02, -6.3469e-01, -1.5595e-01,  8.4911e-01, -2.6266e+00,
           1.6591

In [22]:
class TCN(nn.Module):
    
    def __init__(self, history_length, num_inputs, num_channels, kernel_size=3, dropout=0.2):
        
        super(TCN, self).__init__()
        
        self.tcn = TemporalConvNet(num_inputs, num_channels, kernel_size=kernel_size, dropout=dropout)
        self.linear = nn.Linear(self.tcn.out_shape*history_length, 2)
        self.init_weights()

    def init_weights(self):
        
        self.linear.weight.data.normal_(0, 0.01)

    def forward(self, x):
        
        y = self.tcn(x)
        
        return self.linear(y.flatten(-2,-1))


In [23]:
# Testing TempConvNet

x = torch.randn(5, 6, 10)

TestTCN = TCN(history_length=10, num_inputs=6, num_channels=[5,10,5,3], kernel_size=3, dropout=0.2)

print(x)
print(TestTCN(x))

tensor([[[ 0.4484,  0.2694, -0.1080,  0.5048,  0.2528, -0.4251,  0.4190,
           1.0363,  0.3881,  0.8975],
         [-0.2410,  0.5322, -0.5830,  0.2108, -0.6551, -0.7796,  0.5090,
           0.8757, -0.2601, -0.9079],
         [ 0.1467, -1.0168,  1.7070, -1.0367, -0.9253,  0.9366,  1.1271,
           1.3953,  0.5355,  0.4172],
         [-0.3138, -1.5123,  1.0982, -0.5915,  0.1300, -1.1624,  0.8005,
          -0.7134,  1.0289, -0.4027],
         [-0.3494,  0.1402, -0.4686,  0.1226,  0.3346,  1.5586,  0.2866,
           0.4353, -0.5142, -2.1595],
         [ 0.5346,  0.2737,  1.2471, -1.0877,  0.3483,  0.0458, -0.5418,
           0.2794,  0.9913, -0.5336]],

        [[-1.2928,  0.2680,  0.3462, -0.9636,  0.8960, -0.0047,  1.4130,
          -1.1548, -0.5173,  0.1223],
         [ 0.2855,  1.2524,  0.4575,  1.1822, -1.0817,  0.1718,  0.8343,
           0.3039,  0.4843,  1.0413],
         [ 0.1196, -2.5525, -1.5409,  0.6379,  0.1822, -0.1177,  0.0065,
           0.2777, -0.1573,  0.3910],