# Dataloader Creation

In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
import gensim
import torch.nn.functional as F
from sklearn.metrics import f1_score, precision_score, recall_score

In [None]:
CONSTANTS = {
    'label': 'default payment next month',
    'path': 'preprocessed_upsampled.csv',
    'sequence_features': ['PAY_', 'BILL_AMT', 'PAY_AMT', 'Avg_exp_', 'Client_', 'Closeness_'],
    'non_sequence_features': ['LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE'],
    'length': 6,
    'batch_size': 256,
}

In [None]:
class Dataset_seq(Dataset):
  def __init__(self, path):
    self.data = pd.read_csv(path)
    self.label = CONSTANTS['label']
    self.features = list(self.data.columns)
    self.features.remove(self.label)
  
  def __getitem__(self, index):
    ex = self.data.iloc[index]
    label = ex[self.label]
    features = ex[self.features]

    non_sequential_features = list(features[CONSTANTS['non_sequence_features']].values)

    all_features = []

    for i in range(CONSTANTS['length'], 0, -1):
      seq_i = []
      for base_feature in CONSTANTS['sequence_features']:
        seq_i.append(features[f'{base_feature}{i}'])
      
      all_features.append(seq_i)
    
    non_sequential_features = torch.tensor(non_sequential_features, dtype=torch.float)
    all_features = np.array(all_features)
    all_features = all_features.astype(np.double)
    all_features = torch.from_numpy(all_features)

    all_features = all_features.type(torch.int64)
    all_features = torch.nn.functional.one_hot(all_features, 10).view((all_features.shape[0], -1))
    all_features = all_features.type(torch.float)
    # all_features = torch.tensor(all_features, dtype=torch.float)

    labels = torch.tensor(label, dtype=torch.int64)
    labels = torch.nn.functional.one_hot(labels, 2)
    labels = torch.tensor(labels, dtype=torch.int64)

    return all_features, non_sequential_features, labels
  
  def __len__(self):
    return self.data.shape[0]

In [None]:
seq_dataset = Dataset_seq(CONSTANTS['path'])
train_data, val_data = torch.utils.data.random_split(seq_dataset, [37425, 9303])
train_loader = DataLoader(train_data, batch_size=CONSTANTS['batch_size'], shuffle=True)
val_loader = DataLoader(val_data, batch_size=CONSTANTS['batch_size'], shuffle=True)

# Train Functions

In [None]:
def metrics(y_true:torch.Tensor, y_pred:torch.Tensor, is_training=False) -> torch.Tensor:
    '''Calculate F1 score. Can work with gpu tensors
    
    The original implmentation is written by Michal Haltuf on Kaggle.
    
    Returns
    -------
    torch.Tensor
        `ndim` == 1. 0 <= val <= 1
    
    Reference
    ---------
    - https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric
    - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score
    - https://discuss.pytorch.org/t/calculating-precision-recall-and-f1-score-in-case-of-multi-label-classification/28265/6
    
    '''
    assert y_true.ndim == 1
    assert y_pred.ndim == 1 or y_pred.ndim == 2
    
    if y_pred.ndim == 2:
        y_pred = y_pred.argmax(dim=1)
        
    
    tp = (y_true * y_pred).sum().to(torch.float32)
    tn = ((1 - y_true) * (1 - y_pred)).sum().to(torch.float32)
    fp = ((1 - y_true) * y_pred).sum().to(torch.float32)
    fn = (y_true * (1 - y_pred)).sum().to(torch.float32)
    
    epsilon = 1e-7
    
    precision = tp / (tp + fp + epsilon)
    recall = tp / (tp + fn + epsilon)
    
    f1 = 2* (precision*recall) / (precision + recall + epsilon)
    f1.requires_grad = is_training
    
    return torch.round(precision, decimals=3), torch.round(recall, decimals=3), torch.round(f1, decimals=3)

In [None]:
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

def train(model, optimizer, lr_scheduler, criterion, train_loader, test_loader, epochs):
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  model = model.to(device)
  n = len(train_loader)
  train_loss, test_loss = [], []
  train_acc, test_acc = [], []
  train_f1, test_f1 = [], []

  for epoch in range(epochs):
    correct = 0
    num_examples = 0
    running_loss = 0
    model.train()
    preds, labels = [], []
    for i, (seq, non_seq, target) in enumerate(train_loader):
      seq = seq.to(device)
      target = target.to(device)
      non_seq = non_seq.to(device)
      
      optimizer.zero_grad()
      output = model(seq, non_seq)
      loss = criterion(output, target.float())
      loss.backward()
      torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
      optimizer.step()
      _, pred = torch.max(output, 1)
      _, target = torch.max(target, 1)
      preds.extend(list(pred.detach().cpu()))
      labels.extend(target.tolist())
      correct += (pred == target).float().detach().cpu().sum()
      num_examples += len(output)
      running_loss += loss.item()

    accuracy = correct / num_examples
    precision, recall, f1 = metrics(torch.tensor(preds), torch.tensor(labels))
    print (f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss / len(train_loader)}, Accuracy:{accuracy} Precision: {precision} Recall: {recall} F1: {f1}')
    train_loss.append(running_loss / len(train_loader))
    train_acc.append(accuracy)
    train_f1.append(f1)

    model.eval()
    correct = 0
    num_examples = 0
    running_loss = 0
    precision, recall, f1 = 0, 0, 0
    with torch.no_grad():
      for i, (seq, non_seq, target) in enumerate(test_loader):
        seq = seq.to(device)
        target = target.to(device)
        non_seq = non_seq.to(device)
        output = model(seq, non_seq)
        optimizer.zero_grad()
        loss = criterion(output, target.float())
        _, pred = torch.max(output, 1)
        _, target = torch.max(target, 1)
        correct += (pred == target).float().detach().cpu().sum()
        num_examples += len(target)
        running_loss += loss.item()
        preds.extend(list(pred.detach().cpu()))
        labels.extend(target.tolist())

    lr_scheduler.step(loss.item())
    accuracy = correct / num_examples
    precision, recall, f1 = metrics(torch.tensor(preds), torch.tensor(labels))
    print (f'Test Loss: {running_loss / len(test_loader)}, Test Accuracy:{accuracy} Test Precision: {precision} Test Recall: {recall} Test F1: {f1}')
    test_loss.append(running_loss / len(test_loader))
    test_acc.append(accuracy)
    test_f1.append(f1)

  return train_loss, test_loss, train_acc, test_acc, train_f1, test_f1

# TCN

In [None]:
import math
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torch.nn.utils import weight_norm
from torch.autograd import Variable

# code used from: https://github.com/flrngel/TCN-with-attention

class Chomp1d(nn.Module):
  def __init__(self, chomp_size):
    super(Chomp1d, self).__init__()
    self.chomp_size = chomp_size

  def forward(self, x):
    return x[:, :, :-self.chomp_size].contiguous()


class TemporalBlock(nn.Module):
  def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2, bn):
    super(TemporalBlock, self).__init__()

    if bn:
      self.norm1 = nn.BatchNorm1d(n_outputs)
      self.norm2 = nn.BatchNorm1d(n_outputs)
    else:
      self.norm1 = weight_norm()
      self.norm2 = weight_norm()

    self.conv1 = nn.Conv1d(n_inputs, n_outputs, kernel_size,
                                       stride=stride, padding=padding, dilation=dilation)
    self.chomp1 = Chomp1d(padding)
    self.relu1 = nn.LeakyReLU()
    self.dropout1 = nn.Dropout2d(dropout)

    self.conv2 = nn.Conv1d(n_outputs, n_outputs, kernel_size,
                                       stride=stride, padding=padding, dilation=dilation)
    self.chomp2 = Chomp1d(padding)
    self.relu2 = nn.LeakyReLU()
    self.dropout2 = nn.Dropout2d(dropout)

    self.net = nn.Sequential(self.conv1, self.norm1, self.chomp1, self.relu1, self.dropout1,
                             self.conv2, self.norm2, self.chomp2, self.relu2, self.dropout2)
    self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
    self.relu = nn.ReLU()
    self.init_weights()

  def init_weights(self):
    nn.init.xavier_uniform_(self.conv1.weight, gain=np.sqrt(2))
    nn.init.xavier_uniform_(self.conv2.weight, gain=np.sqrt(2))

    if self.downsample is not None:
        nn.init.xavier_uniform_(self.downsample.weight, gain=np.sqrt(2))

  def forward(self, x):
    net = self.net(x)
    res = x if self.downsample is None else self.downsample(x)
    return self.relu(net + res)


class AttentionBlock(nn.Module):
  def __init__(self, dims, k_size, v_size, seq_len=None):
    super(AttentionBlock, self).__init__()
    self.key_layer = nn.Linear(dims, k_size)
    self.query_layer = nn.Linear(dims, k_size)
    self.value_layer = nn.Linear(dims, v_size)
    self.sqrt_k = math.sqrt(k_size)

  def forward(self, minibatch):
    keys = self.key_layer(minibatch)
    queries = self.query_layer(minibatch)
    values = self.value_layer(minibatch)
    logits = torch.bmm(queries, keys.transpose(2,1))
    mask = np.triu(np.ones(logits.size()), k=1).astype('bool')
    mask = torch.from_numpy(mask).cuda()
    logits.data.masked_fill_(mask, False)
    probs = F.softmax(logits, dim=1) / self.sqrt_k
    read = torch.bmm(probs, values)
    return minibatch + read

class TemporalConvNet(nn.Module):
  def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2, max_length=60, attention=False, bn=False):
    super(TemporalConvNet, self).__init__()
    layers = []
    num_levels = len(num_channels)
    for i in range(num_levels):
      dilation_size = 2 ** i
      in_channels = num_inputs if i == 0 else num_channels[i-1]
      out_channels = num_channels[i]
      layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                               padding=(kernel_size-1) * dilation_size, dropout=dropout, bn=bn)]
      if attention == True:
        layers += [AttentionBlock(max_length, max_length, max_length)]

    self.network = nn.Sequential(*layers)

  def forward(self, x):
    x = self.network(x)
    return x

class TCNModel(nn.Module):
  def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2, attention=False):
    super(TCNModel, self).__init__()
    self.tcn = TemporalConvNet(num_inputs, num_channels, kernel_size, dropout, attention=attention)
    # self.pool = nn.AvgPool2d(2)
    self.norm = nn.LayerNorm(5)
    self.dropout = nn.Dropout(dropout)

    self.decoder = nn.Sequential(
        nn.Linear(261, 2),
    )
    
  def forward(self, seq, non_seq):
    x = self.tcn(seq)
    x = self.dropout(x[:, :, -1])
    x = torch.cat([x, self.norm(non_seq)], dim=-1)
    x = self.decoder(x)

    return F.softmax(x, dim=-1)

# Multimodal TCN with Attention Training


In [None]:
import gc

gc.collect()
torch.cuda.empty_cache()

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = TCNModel(num_inputs = 6, num_channels=[64, 64, 128, 128, 256, 256], kernel_size=3, dropout=0.50, attention=True)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), lr=1e-3, weight_decay=.0001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.8)
criterion = nn.CrossEntropyLoss()
epochs = 15
train_loss, test_loss, train_acc, test_acc, train_f1, test_f1 = train(model, optimizer, lr_scheduler, criterion, train_loader, val_loader, epochs)

In [None]:
import matplotlib.pyplot as plt

plt.semilogy(np.array(range(len(train_loss))), train_loss)
plt.semilogy(np.array(range(len(test_loss))), test_loss)
plt.legend(('Train', 'Test',))
plt.ylabel('Training loss')
plt.xlabel('Epoch')
plt.show()

In [None]:
plt.semilogy(np.array(range(len(train_acc))), train_acc)
plt.semilogy(np.array(range(len(test_acc))), test_acc)
plt.legend(('Train', 'Test',))
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.show()

In [None]:
plt.semilogy(np.array(range(len(train_acc))), train_f1)
plt.semilogy(np.array(range(len(test_acc))), test_f1)
plt.legend(('Train', 'Test',))
plt.ylabel('F1')
plt.xlabel('Epoch')
plt.show()

# Multimodal TCN without Attention Training

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = TCNModel(num_inputs = 6, num_channels=[32, 32, 64, 64, 128, 128, 256, 256], kernel_size=3, dropout=0.50, attention=False)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), lr=1e-3, weight_decay=.0001)
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, threshold=0.00001, threshold_mode='abs')
criterion = nn.CrossEntropyLoss()
epochs = 15
train_loss, test_loss, train_acc, test_acc, train_f1, test_f1 = train(model, optimizer, lr_scheduler, criterion, train_loader, val_loader, epochs)

In [None]:
plt.semilogy(np.array(range(len(train_loss))), train_loss)
plt.semilogy(np.array(range(len(test_loss))), test_loss)
plt.legend(('Train', 'Test',))
plt.ylabel('Training loss')
plt.xlabel('Epoch')
plt.show()

In [None]:
plt.semilogy(np.array(range(len(train_acc))), train_acc)
plt.semilogy(np.array(range(len(test_acc))), test_acc)
plt.legend(('Train', 'Test',))
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.show()

In [None]:
plt.semilogy(np.array(range(len(train_acc))), train_f1)
plt.semilogy(np.array(range(len(test_acc))), test_f1)
plt.legend(('Train', 'Test',))
plt.ylabel('F1')
plt.xlabel('Epoch')
plt.show()