In [1]:
import numpy as np
from utils.utils import (write_json, get_data_sub_paths,
                         load_total_counts, set_system_seed)
from utils.configs import BaseConf
from utils.data_processing import sequencify
from utils.preprocessing import split_data
from torch.autograd import Variable
import torch
from torch import nn
import torch.nn.functional as F

In [2]:
conf = BaseConf()

data_sub_paths = [i for i in get_data_sub_paths() if i.startswith('Totals')]

In [3]:
# load data
df = load_total_counts(folder_name=data_sub_paths[0])

In [4]:
df

Unnamed: 0,Total,Theft,Battery,Criminal Damage,Narcotics,Assault,Burglary,Motor Vehicle Theft,Robbery
2014-01-01 00:00:00,68,24,17,13,3,3,3,4,1
2014-01-01 01:00:00,68,22,32,5,3,5,1,0,0
2014-01-01 02:00:00,55,8,28,7,0,4,2,1,5
2014-01-01 03:00:00,30,3,21,4,0,0,0,0,2
2014-01-01 04:00:00,36,3,23,2,0,3,2,2,1
...,...,...,...,...,...,...,...,...,...
2018-12-31 19:00:00,26,7,2,8,2,3,0,3,1
2018-12-31 20:00:00,26,10,4,7,2,0,0,2,1
2018-12-31 21:00:00,28,4,5,12,1,0,0,3,3
2018-12-31 22:00:00,34,3,10,9,1,5,3,2,1


In [5]:
def calculate_out_length(input_length, kernel_size, padding=0, dilation=1, stride=1):
    """
    calculate_out_length calculates the out_length parameter needed for a PyTorch nn.Conv1d layer given the other
    parameters. This is mainly used to determine the output length given a layer architecture, than is need in the next layer as input_length.

    :param input_length: length of the input vector. Conv1d takes a (N,Cin,Lin) tensor and produces and (N,Cout,Lout) tensor.
    :param kernel_size: size of the window or filter applied over input
    :param padding:  controls the amount of padding applied to the input. It can be either a string {‘valid’, ‘same’} or a tuple of ints giving the amount of implicit padding applied on both sides.
    :param dilation: controls the spacing between the kernel points
    :param stride: controls the stride for the cross-correlation, a single number or a one-element tuple
    :return:
    """
    assert input_length > 0, "input_length <= 0"
    assert kernel_size > 0, "kernel_size <= 0"
    return int(np.floor(((input_length + 2 * padding - dilation * (kernel_size - 1) - 1) / stride) + 1))

In [6]:
calculate_out_length(
    input_length=1000,
    kernel_size=99,
    padding=49,
    dilation=1,
    stride=1,
)

1000

In [7]:
sequencify(np.stack([
    np.arange(1000) * 1,
    np.arange(1000) * 10,
]).T, 10).shape

(990, 10, 2)

In [11]:
from utils.data_processing import sequencify, batchify

seq_len = 10
data = sequencify(np.arange(0, 100), seq_len=seq_len)
data = batchify(np.arange(0, 100), batch_size=seq_len)


data

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])

In [8]:
data = df.Total.values
# data = np.arange(10_0000, dtype=np.float32)
x = Variable(
    torch.FloatTensor(
        np.expand_dims(sequencify(data, seq_len=24 * 7 * 2 + 24)[:10], axis=1)
    )
)
x.size()

torch.Size([10, 1, 360])

In [6]:
conv1 = nn.Conv1d(1, 8, 24)  # summarize 24 hours using kernel_size=24
conv2 = nn.Conv1d(8, 16, 7)  # summarize 7 days using kernel_size=7
pool1 = nn.MaxPool1d(24)
pool2 = nn.MaxPool1d(7)
y = pool1(F.relu(conv1(x)))
y = pool2(F.relu(conv2(y)))
y.size()

NameError: name 'nn' is not defined

In [7]:
list(nn.Conv1d(2, 2, 4).parameters())

NameError: name 'nn' is not defined

In [8]:
import torch.nn as nn
import torch.nn.functional as F


class CNN1D(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv1d(1, 8, 24)  # summarize 24 hours using kernel_size=24
        self.conv2 = nn.Conv1d(8, 16, 7)  # summarize 7 days using kernel_size=7
        self.pool1 = nn.MaxPool1d(24)
        self.pool2 = nn.MaxPool1d(7)
        self.fc1 = nn.Linear(16, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [9]:
conv1 = nn.Conv1d(
    in_channels=1,  #: int,
    out_channels=5,  #: int,
    kernel_size=(5,),  #: Union[int, Tuple[int]],
    stride=(1,),  #: controls the stride for the cross-correlation,
    padding=0,  #: controls the amount of padding applied to the input
    dilation=(1,),  #: controls the spacing between the kernel points
    groups=1,  #: controls the connections between inputs and outputs
    bias=True,  #: bool = True,
    padding_mode='zeros',  #: str = 'zeros',
    #     device=, #=None,
    #     dtype=, #=None,
)

pool1 = nn.MaxPool1d(kernel_size=5)

conv1(x).size(), pool1(conv1(x)).size()

NameError: name 'x' is not defined

In [23]:
class TimeSeriesCNN(nn.Module):
    def __init__(self):
        super(TimeSeriesCNN, self).__init__()

        self.name = "TimeSeriesCNN"

        self.conv1 = nn.Conv1d(
            in_channels=1,
            out_channels=8,
            kernel_size=(24,),
        )  # summarize 24 hours using kernel_size=24
        self.conv2 = nn.Conv1d(
            in_channels=8,
            out_channels=16,
            kernel_size=(7,),
        )  # summarize 7 days using kernel_size=7
        self.pool1 = nn.MaxPool1d(kernel_size=24)
        self.pool2 = nn.MaxPool1d(7)
        self.fc1 = nn.Linear(16, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [24]:

max_seq_len = 24 * 7
# data = sequencify(df.Total.values,max_seq_len)
data = sequencify(np.arange(1000), max_seq_len)
train_data_fraction = 0.8

train_data, test_data = split_data(data, train_data_fraction)
train_data, val_data = split_data(train_data, train_data_fraction)
train_x, train_y = torch.tensor(train_data[:, :-1]), torch.tensor(train_data[:, -1])
test_x, test_y = torch.tensor(test_data[:, :-1]), torch.tensor(test_data[:, -1])
val_x, val_y = torch.tensor(val_data[:, :-1]), torch.tensor(val_data[:, -1])

In [25]:
model = TimeSeriesCNN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.07)
criterion = nn.functional.mse_loss

print(model)

TimeSeriesCNN(
  (conv1): Conv1d(1, 8, kernel_size=(24,), stride=(1,))
  (conv2): Conv1d(8, 16, kernel_size=(7,), stride=(1,))
  (pool1): MaxPool1d(kernel_size=24, stride=24, padding=0, dilation=1, ceil_mode=False)
  (pool2): MaxPool1d(kernel_size=7, stride=7, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=16, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
)


In [26]:
def train(epoch):
    model.train()
    tr_loss = 0

    x_train, y_train = Variable(train_x), Variable(train_y)
    x_val, y_val = Variable(val_x), Variable(val_y)

    optimizer.zero_grad()

    output_train = model(x_train)
    output_val = model(x_val)

    loss_train = criterion(output_train, y_train)
    loss_val = criterion(output_val, y_val)
    train_losses.append(loss_train)
    val_losses.append(loss_val)

    loss_train.backward()
    optimizer.step()
    tr_loss = loss_train.item()
    if epoch % 2 == 0:
        # printing the validation loss
        print('Epoch : ', epoch + 1, '\t', 'loss :', loss_val)

In [27]:
n_epochs = 3
train_losses = []
val_losses = []
for epoch in range(n_epochs):
    train(epoch)

RuntimeError: Expected 3-dimensional input for 3-dimensional weight [8, 1, 24], but got 2-dimensional input of size [532, 167] instead