In [1]:
import torch
import torch.nn as nn
from torch.nn.utils import weight_norm


class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()


class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        super(TemporalBlock, self).__init__()
        self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp1 = Chomp1d(padding)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp2 = Chomp1d(padding)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)

        self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
                                 self.conv2, self.chomp2, self.relu2, self.dropout2)
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.relu = nn.ReLU()
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 0.01)
        self.conv2.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)


class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        super(TemporalConvNet, self).__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=(kernel_size-1) * dilation_size, dropout=dropout)]

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

In [28]:
from torch import nn


class TCN(nn.Module):
    def __init__(self, input_size, output_size, num_channels, kernel_size, dropout):
        super(TCN, self).__init__()
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size=kernel_size, dropout=dropout)
        self.linear = nn.Linear(num_channels[-1], output_size)
        self.init_weights()

    def init_weights(self):
        self.linear.weight.data.normal_(0, 0.01)

    def forward(self, x):
        y1 = self.tcn(x)
        return self.linear(y1[:, :, -1])

In [29]:
import torch
import numpy as np
from torch.autograd import Variable


def data_generator(N, seq_length):
    """
    Args:
        seq_length: Length of the adding problem data
        N: # of data in the set
    """
    X_num = torch.rand([N, 1, seq_length])
    X_mask = torch.zeros([N, 1, seq_length])
    Y = torch.zeros([N, 1])
    for i in range(N):
        positions = np.random.choice(seq_length, size=2, replace=False)
        X_mask[i, 0, positions[0]] = 1
        X_mask[i, 0, positions[1]] = 1
        Y[i,0] = X_num[i, 0, positions[0]] + X_num[i, 0, positions[1]]
    X = torch.cat((X_num, X_mask), dim=1)
    return Variable(X), Variable(Y)

In [59]:
input_channels = 2
n_classes = 1
batch_size = 8
seq_length = 400
epochs = 10
#num_hidden: number of hidden units per layer
num_hidden = 10
#levels: Number of stacked "Temporal Blocks" in the TCN
levels = 6

#print("Producing data...")
X_train, Y_train = data_generator(50000, seq_length)
X_test, Y_test = data_generator(1000, seq_length)


# Note: We use a very simple setting here (assuming all levels have the same # of channels.
channel_sizes = [num_hidden] * levels
kernel_size = 7
dropout = 0.0
model = TCN(input_channels, n_classes, channel_sizes, kernel_size=kernel_size, dropout=dropout)

In [61]:
X_train[0][0]

tensor([3.3828e-01, 7.6006e-01, 9.1701e-01, 9.5113e-01, 2.8325e-01, 7.6695e-01,
        9.7632e-01, 4.1888e-01, 1.9247e-01, 7.1049e-01, 6.8221e-01, 9.9924e-02,
        2.2611e-01, 4.0765e-01, 3.8856e-01, 8.1870e-01, 8.1219e-01, 6.2296e-01,
        6.1935e-01, 6.6132e-02, 8.5097e-02, 2.9459e-01, 2.9862e-01, 2.1198e-01,
        5.9707e-01, 3.8256e-01, 9.1999e-01, 3.4878e-01, 6.3482e-01, 8.5936e-01,
        1.2417e-02, 1.0475e-01, 7.4702e-01, 9.7629e-01, 6.7929e-01, 6.9704e-01,
        3.5785e-01, 2.1712e-01, 3.3499e-01, 2.3734e-01, 5.8855e-01, 1.9085e-01,
        2.3255e-01, 7.0426e-01, 9.4318e-01, 7.6312e-01, 1.8672e-01, 5.1846e-02,
        1.3478e-01, 4.3276e-01, 3.3906e-01, 1.1481e-01, 8.3152e-01, 3.4553e-01,
        5.3452e-01, 5.2529e-01, 4.9701e-01, 2.1752e-01, 5.1216e-01, 1.8565e-01,
        2.5466e-01, 4.1591e-01, 8.1981e-01, 9.4530e-01, 4.6204e-01, 9.7778e-01,
        3.6878e-01, 2.7689e-01, 8.7917e-01, 3.0063e-01, 4.0216e-01, 2.3362e-01,
        3.0507e-01, 9.3912e-01, 5.0923e-

In [62]:
for i in range(0, X_train.size(0), batch_size):
        if i + batch_size > X_train.size(0):
            x, y = X_train[i:], Y_train[i:]
        else:
            x, y = X_train[i:(i+batch_size)], Y_train[i:(i+batch_size)]

In [67]:
x.size()

torch.Size([8, 2, 400])

In [66]:
x[3][0]

tensor([1.4691e-01, 4.7932e-01, 3.4841e-01, 3.0986e-01, 7.0181e-01, 4.7097e-01,
        2.2430e-01, 3.1750e-01, 1.4830e-01, 8.2008e-01, 3.5697e-01, 5.8757e-01,
        6.6105e-01, 3.1613e-01, 9.1187e-01, 3.5923e-01, 3.5367e-01, 1.8791e-01,
        1.0985e-01, 2.8470e-01, 3.2126e-01, 9.9816e-01, 1.9188e-01, 1.9611e-01,
        4.4079e-01, 2.6091e-01, 3.4290e-01, 9.5968e-01, 1.5774e-01, 5.4944e-01,
        4.2977e-01, 1.9353e-01, 2.5661e-01, 9.4326e-01, 3.1973e-01, 5.3713e-01,
        2.1968e-01, 7.4206e-01, 9.4140e-01, 1.9690e-01, 3.8755e-01, 8.2674e-01,
        4.4132e-01, 6.9913e-01, 8.3288e-01, 9.9932e-01, 7.7595e-01, 4.4082e-01,
        5.0792e-02, 4.7113e-01, 9.5398e-01, 3.0008e-01, 9.9670e-01, 5.0500e-01,
        9.7430e-01, 3.6835e-01, 3.2493e-01, 9.2898e-01, 3.5834e-01, 9.5333e-01,
        1.2156e-01, 3.4994e-01, 3.1817e-01, 7.2535e-03, 7.5396e-01, 9.2259e-01,
        6.3666e-01, 5.3627e-01, 8.9480e-01, 2.9077e-01, 7.1576e-01, 8.9599e-01,
        9.9244e-01, 2.2213e-01, 9.0209e-

In [68]:
#Input x must be of size 
model.eval()
model(x)

tensor([[0.1103],
        [0.1104],
        [0.1103],
        [0.1114],
        [0.1098],
        [0.1111],
        [0.1105],
        [0.1111]], grad_fn=<AddmmBackward>)

In [23]:
#num_hidden: number of hidden units per layer
num_hidden = 10
#levels: Number of stacked "Temporal Blocks" in the TCN
levels = 6

#channel_sizes = [num_hidden] * levels
channel_sizes = [10, 8, 14, 12, 19]

In [24]:
channel_sizes

[10, 8, 14, 12, 19]

In [25]:
len(channel_sizes)

5

In [33]:
num_inputs = 2
for i in range(len(channel_sizes)):
    in_channels = num_inputs if i == 0 else channel_sizes[i-1]
    out_channels = channel_sizes[i]
    dilation_size = 2 ** i
    print(i, in_channels, out_channels, dilation_size)

0 2 10 1
1 10 10 2
2 10 10 4
3 10 10 8
4 10 10 16
5 10 10 32
