In [170]:
from torch.nn import Conv1d
import torch

In [171]:
conv = Conv1d(1, 1, 3)

In [172]:
test_tensor = torch.tensor([[1.,2.,3.,4.,5.,6.,7.,8.,9.,10.]])

In [173]:
conv(test_tensor)

tensor([[1.1897, 1.6000, 2.0103, 2.4206, 2.8308, 3.2411, 3.6514, 4.0617]],
       grad_fn=<SqueezeBackward1>)

In [174]:
weight, bias = list(conv.parameters())[0][0], list(conv.parameters())[1][0]
weight, bias

(tensor([[-0.2716,  0.5025,  0.1794]], grad_fn=<SelectBackward0>),
 tensor(-0.0818, grad_fn=<SelectBackward0>))

In [175]:
filter_size = weight.shape[1]
num_elements = test_tensor.shape[1]

# Conv1D
for i in range(num_elements - filter_size + 1):
    data = test_tensor[:, i:i+filter_size]
    activation = data @ weight.T + bias
    print(data[:,1], data, activation)

tensor([2.]) tensor([[1., 2., 3.]]) tensor([[1.1897]], grad_fn=<AddBackward0>)
tensor([3.]) tensor([[2., 3., 4.]]) tensor([[1.6000]], grad_fn=<AddBackward0>)
tensor([4.]) tensor([[3., 4., 5.]]) tensor([[2.0103]], grad_fn=<AddBackward0>)
tensor([5.]) tensor([[4., 5., 6.]]) tensor([[2.4206]], grad_fn=<AddBackward0>)
tensor([6.]) tensor([[5., 6., 7.]]) tensor([[2.8308]], grad_fn=<AddBackward0>)
tensor([7.]) tensor([[6., 7., 8.]]) tensor([[3.2411]], grad_fn=<AddBackward0>)
tensor([8.]) tensor([[7., 8., 9.]]) tensor([[3.6514]], grad_fn=<AddBackward0>)
tensor([9.]) tensor([[ 8.,  9., 10.]]) tensor([[4.0617]], grad_fn=<AddBackward0>)


In [176]:
from torch.nn.functional import pad

In [177]:
padded_tensor = pad(test_tensor, (2,2))
padded_tensor

tensor([[ 0.,  0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.,  0.,  0.]])

In [178]:
filter_size = weight.shape[1]
num_elements = padded_tensor.shape[1]

# Implementing Casual Conv1D
for i in range(num_elements - filter_size + 1):
    data = padded_tensor[:, i:i+filter_size]
    activation = data @ weight.T + bias
    print(data[:,2][0], data, activation)

tensor(1.) tensor([[0., 0., 1.]]) tensor([[0.0976]], grad_fn=<AddBackward0>)
tensor(2.) tensor([[0., 1., 2.]]) tensor([[0.7795]], grad_fn=<AddBackward0>)
tensor(3.) tensor([[1., 2., 3.]]) tensor([[1.1897]], grad_fn=<AddBackward0>)
tensor(4.) tensor([[2., 3., 4.]]) tensor([[1.6000]], grad_fn=<AddBackward0>)
tensor(5.) tensor([[3., 4., 5.]]) tensor([[2.0103]], grad_fn=<AddBackward0>)
tensor(6.) tensor([[4., 5., 6.]]) tensor([[2.4206]], grad_fn=<AddBackward0>)
tensor(7.) tensor([[5., 6., 7.]]) tensor([[2.8308]], grad_fn=<AddBackward0>)
tensor(8.) tensor([[6., 7., 8.]]) tensor([[3.2411]], grad_fn=<AddBackward0>)
tensor(9.) tensor([[7., 8., 9.]]) tensor([[3.6514]], grad_fn=<AddBackward0>)
tensor(10.) tensor([[ 8.,  9., 10.]]) tensor([[4.0617]], grad_fn=<AddBackward0>)
tensor(0.) tensor([[ 9., 10.,  0.]]) tensor([[2.4986]], grad_fn=<AddBackward0>)
tensor(0.) tensor([[10.,  0.,  0.]]) tensor([[-2.7979]], grad_fn=<AddBackward0>)


In [179]:
# Last two elements shouldnt be used
for i in range(num_elements - filter_size -1):
    data = padded_tensor[:, i:i+filter_size]
    activation = data @ weight.T + bias
    print(data[:,2], data, activation)

tensor([1.]) tensor([[0., 0., 1.]]) tensor([[0.0976]], grad_fn=<AddBackward0>)
tensor([2.]) tensor([[0., 1., 2.]]) tensor([[0.7795]], grad_fn=<AddBackward0>)
tensor([3.]) tensor([[1., 2., 3.]]) tensor([[1.1897]], grad_fn=<AddBackward0>)
tensor([4.]) tensor([[2., 3., 4.]]) tensor([[1.6000]], grad_fn=<AddBackward0>)
tensor([5.]) tensor([[3., 4., 5.]]) tensor([[2.0103]], grad_fn=<AddBackward0>)
tensor([6.]) tensor([[4., 5., 6.]]) tensor([[2.4206]], grad_fn=<AddBackward0>)
tensor([7.]) tensor([[5., 6., 7.]]) tensor([[2.8308]], grad_fn=<AddBackward0>)
tensor([8.]) tensor([[6., 7., 8.]]) tensor([[3.2411]], grad_fn=<AddBackward0>)
tensor([9.]) tensor([[7., 8., 9.]]) tensor([[3.6514]], grad_fn=<AddBackward0>)
tensor([10.]) tensor([[ 8.,  9., 10.]]) tensor([[4.0617]], grad_fn=<AddBackward0>)


In [180]:
import torch.nn as nn

class CasualConv1D(nn.Conv1d):
    def __init__(self, in_channels, out_channels, kernel_size, 
                 bias=True, device=None, dtype=None):
        super(CasualConv1D, self).__init__(in_channels, out_channels, kernel_size, padding=kernel_size-1, 
                                           bias=bias, device=device, dtype=dtype)
    def forward(self, inputs):
        assert inputs.dim() == 3
        activations = super(CasualConv1D, self).forward(inputs)
        return activations[:,:,:activations.shape[-1]-self.kernel_size[0]]

In [181]:
casualconv = CasualConv1D(1, 1, 3)

In [182]:
casualconv(test_tensor.view(1,1,-1))

tensor([[[0.7186, 1.2596, 1.2612, 1.2628, 1.2644, 1.2660, 1.2675, 1.2691,
          1.2707]]], grad_fn=<SliceBackward0>)

In [183]:
weight, bias = list(casualconv.parameters())[0][0], list(casualconv.parameters())[1][0]

In [184]:
# It matches so the implmentation works as expected
padded_tensor = pad(test_tensor, (2,2))
for i in range(num_elements - filter_size -1):
    data = padded_tensor[:, i:i+filter_size]
    activation = data @ weight.T + bias
    print(data[:,2], data, activation)

tensor([1.]) tensor([[0., 0., 1.]]) tensor([[0.7186]], grad_fn=<AddBackward0>)
tensor([2.]) tensor([[0., 1., 2.]]) tensor([[1.2596]], grad_fn=<AddBackward0>)
tensor([3.]) tensor([[1., 2., 3.]]) tensor([[1.2612]], grad_fn=<AddBackward0>)
tensor([4.]) tensor([[2., 3., 4.]]) tensor([[1.2628]], grad_fn=<AddBackward0>)
tensor([5.]) tensor([[3., 4., 5.]]) tensor([[1.2644]], grad_fn=<AddBackward0>)
tensor([6.]) tensor([[4., 5., 6.]]) tensor([[1.2660]], grad_fn=<AddBackward0>)
tensor([7.]) tensor([[5., 6., 7.]]) tensor([[1.2675]], grad_fn=<AddBackward0>)
tensor([8.]) tensor([[6., 7., 8.]]) tensor([[1.2691]], grad_fn=<AddBackward0>)
tensor([9.]) tensor([[7., 8., 9.]]) tensor([[1.2707]], grad_fn=<AddBackward0>)
tensor([10.]) tensor([[ 8.,  9., 10.]]) tensor([[1.2723]], grad_fn=<AddBackward0>)
