# Dynamic Convolutional Neural Network for sequential data

In [None]:
import os
import sys
from pathlib import Path 

import numpy as np

import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision.ops import sigmoid_focal_loss
import torch.nn.functional as F

from torchsummary import summary

# evaluation
import scikitplot as skplt
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_curve, average_precision_score, precision_recall_fscore_support

import matplotlib.pylab as plt

import logging
logging.basicConfig(stream=sys.stdout, format='',
                level=logging.INFO, datefmt=None)
logger = logging.getLogger('elliptic_scouting')

from IPython.display import display, Markdown, HTML, Image

sys.path.append('../')
from utils import *
from evaluation.model_performance import *

torch.__version__


In [None]:
#model 
KTOP = 2

def get_dmaxp(d, in_size, tot_L):
    m = (tot_L - in_size) * d // tot_L
    return max(KTOP, m)

def conv1dOShape(l_in, k, s=2, p=0):
    return (l_in - k + 2*p) // s + 1


class Baseline(nn.Module):
    def __init__(self, in_features, focal_loss=False):
        super(Baseline, self).__init__()
        self._in_features = in_features
        self.l1 = nn.Linear(self._in_features, 32)
        self.l2 = nn.Linear(32, 8)
        self.output = nn.Linear(8, 1)
        self._focal = focal_loss

    def forward(self, x):
        x = F.leaky_relu(self.l1(x))
        x = F.leaky_relu(self.l2(x))
        x = self.output(x).reshape(1)
        if not self._focal:
            return torch.sigmoid(x)
        else:
            return x.reshape(1)


class DCNN(nn.Module):
    def __init__(self, in_channels, d, focal_loss=True, tot_l=2, wide=False):
        super(DCNN, self).__init__()
        self._in_channels = in_channels
        self._d = d
        self._k = self._d + 3 if wide else 3 # input size s^{m x d} d number of features
        self._tot_l = tot_l
        self._focal_loss = focal_loss

        self.conv1 = nn.Conv1d(self._in_channels, 5, self._k, stride=1, dtype=torch.float)
        self.conv2 = nn.Conv1d(5, 3, 3, stride=1)
        self.conv3 = nn.Conv1d(3, 3, 3, stride=1)
        self.f1 = nn.Linear(57, 14)
        self.output = nn.Linear(14, 1)
        
    def forward(self, x):
        x = F.max_pool1d(F.leaky_relu(self.conv1(x)), 2)
        x = F.max_pool1d(F.leaky_relu(self.conv2(x)), 2)
        x = F.max_pool1d(F.leaky_relu(self.conv3(x)), 2)
        x = torch.flatten(x, 0)
        x = self.f1(x)
        if self._focal_loss:
            return self.output(x)
        else:
            return torch.sigmoid(self.output(x))
    

class EllipticDataset(Dataset):
    def __init__(self, X, y):
        self._X = X
        self._y = y

    def __len__(self):
        return len(self._X) 

    def __getitem__(self, idx):
        x = np.asarray(self._X.iloc[idx].values, dtype=np.float32)
        y = np.asarray(self._y.iloc[idx], dtype=np.float16)
        return torch.from_numpy(x), torch.from_numpy(y).type(torch.float)


In [None]:
t = torch.randn(1, 166)
t1 = nn.Conv1d(1, 5, 3)(t)
t2 = F.max_pool1d(t1, 2)
t3 = nn.Conv1d(5, 3, 3)(t2)
t4 = F.max_pool1d(t3, 2)
t1.shape, t2.shape, t3.shape, t4.shape

In [None]:
last_time_step = 49
last_train_time_step = 34
only_labeled = True

X_train_df, X_test_df, y_train, y_test = run_elliptic_preprocessing_pipeline(last_train_time_step=last_train_time_step,
                                                                             last_time_step=last_time_step,
                                                                             only_labeled=only_labeled)

In [None]:
X_train_df.shape, y_train.shape, X_test_df.shape, y_test.shape

In [None]:
BATCH_SIZE = 1
train_ds = EllipticDataset(X_train_df, y_train)
test_ds = EllipticDataset(X_test_df, y_test)
train_ds = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=False)
test_ds = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
for x, y in train_ds:
    print(x.shape)
    print(x.dtype)
    print(y.shape)
    print(y.dtype)
    break

In [None]:
net = DCNN(BATCH_SIZE, X_train_df.shape[1])
net

In [None]:
baseline = Baseline(X_test_df.shape[1], focal_loss=True)
baseline

In [None]:
# conv1 = nn.Conv1d(1, 5, 3, stride=2)
# maxp1 = nn.MaxPool1d(get_dmaxp(d, conv1dOShape(d, 3), 2))
# conv2 = nn.Conv1d(5, 3, 3, stride=2)
# maxp2 = nn.MaxPool1d(get_dmaxp(d, conv1dOShape(conv1dOShape(d, 3), 3), 2))
# fl = nn.Flatten(0)
# ln = nn.Linear(3, 2)
# sm = nn.Softmax(dim=0)

# x1 = conv1(x)
# x2 = maxp1(x1)
# x3 = conv2(x2)
# x4 = maxp2(x3)
# x5 = fl(x4)
# x6 = ln(x5)
# out = sm(x6)
# out

In [None]:
with torch.no_grad():
    for x, y in train_ds:
        print(x)
        print(x.shape)
        print(type(x))
        print(net(x).view(-1))
        break

In [None]:
# EPOCHS = 5
# ce_loss = torch.nn.BCELoss()
# with torch.no_grad():
#  for i, data in enumerate(train_ds):
#     pred = baseline(data[0])
#     #print(ce_loss(pred, data[1]))
#     print(sigmoid_focal_loss(pred, data[1]))
#     if i == 3:
#        break

In [None]:
EPOCHS = 5
ce_loss = torch.nn.BCELoss()
focal_loss = sigmoid_focal_loss
# optimizer = torch.optim.Adam(params=baseline.parameters())
optimizer = torch.optim.Adam(params=net.parameters())

for epoch in range(EPOCHS):
    net.train()
    for x, y in train_ds:
        #y_pred = baseline(x)
        y_pred = net(x)
        #loss = ce_loss(y_pred, y)
        loss = focal_loss(y_pred, y) # pred is the 
        # calculate gradients of loss with respect model params
        optimizer.zero_grad()
        loss.backward()
        # update params
        optimizer.step()
    print(f'epoch {epoch} loss {loss.item()}')

In [None]:
losses = []
preds = []
trues = []
with torch.no_grad():
    for x, y in test_ds:
        #pred = baseline(x)
        pred = net(x)
        loss = focal_loss(pred, y)
        trues.append(y.data.cpu().numpy())
        losses.append(loss.item())
        preds.append(torch.sigmoid(pred).data.cpu().numpy())

In [None]:
preds = np.asarray(preds)
bin_preds = np.asarray(preds > .5).astype(np.half)
cf_matrix = confusion_matrix(trues, bin_preds)
cf_matrix

In [None]:
def plot_confusion_matrix(y_true, y_pred, title=None, xtickslabels=None, ytickslabels=None):
  precision, recall, f1score, support = precision_recall_fscore_support(y_true, y_pred)
  display(Markdown(f'Precision {precision}, recall {recall}, f1score {f1score}, support {support}'))    
  ax = skplt.metrics.plot_confusion_matrix(
          y_true,
          y_pred,
          normalize=True,
          figsize=(10, 8),
          title=title
         )
    
  if xtickslabels is not None:
    ax.set_xticklabels(xtickslabels)

  if ytickslabels is not None:
    ax.set_yticklabels(ytickslabels)
        
  plt.show()

In [None]:
plot_confusion_matrix(trues, bin_preds)