In [171]:
import torch
import torch.nn as nn
from torch.nn.utils import weight_norm
from datetime import datetime
from torch.utils.tensorboard import SummaryWriter
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, FunctionTransformer, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import roc_auc_score, classification_report, recall_score
import numpy as np
import pandas as pd
import pickle

In [3]:
with open('../data/dataset.pkl', 'rb') as f:
    features, labels = pickle.load(f)
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [4]:
class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()

In [26]:
class TCN_block(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, dilation, padding, stride=1, dropout=0.2):
        super(TCN_block, self).__init__()
        self.conv = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp = Chomp1d(padding)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

        self.net = nn.Sequential(self.conv, self.chomp, self.relu, self.dropout)
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.init_weights()

    def init_weights(self):
        self.conv.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

In [90]:
class TCN_Model(nn.Module):
    def __init__(self, n_features, firs_layer_outputs, kernel_size, dilation):
        super(TCN_Model, self).__init__()
        padding = dilation * (kernel_size - 1)
        first_linear_input_size = n_features*firs_layer_outputs*5
        self.tcn1 = TCN_block(1, firs_layer_outputs, kernel_size, dilation, padding)
        self.tcn2 = TCN_block(firs_layer_outputs, firs_layer_outputs * 5, kernel_size, dilation, padding)
        self.flatten = nn.Flatten()
        self.dense1 = nn.Linear(first_linear_input_size, first_linear_input_size//15)
        self.dense2 = nn.Linear(first_linear_input_size//15, first_linear_input_size//30)
        self.dense3 = nn.Linear(first_linear_input_size//30, first_linear_input_size//60)
        self.dense4 = nn.Linear(first_linear_input_size//60, first_linear_input_size//(120*19))
        self.dense_layers = nn.Sequential(self.tcn1, self.tcn2, self.flatten,
                                          self.dense1, nn.ReLU(),
                                          self.dense2, nn.ReLU(),
                                          self.dense3, nn.ReLU(),
                                          self.dense4, nn.Sigmoid())
        self.init_weights()
        
    def init_weights(self):
        self.dense1.weight.data.normal_(0, 0.01)
        self.dense2.weight.data.normal_(0, 0.01)
        self.dense3.weight.data.normal_(0, 0.01)
        self.dense4.weight.data.normal_(0, 0.01)
    
    def forward(self, x):
        return self.dense_layers(x)

In [60]:
def outlier_handler(df):
    q1 = df.quantile(0.25)
    q3 = df.quantile(0.75)
    IQR = q3-q1
    lwr_bound = q1-(1.5*IQR)
    upr_bound = q3+(1.5*IQR)
    df = np.where(df > upr_bound, df.median(), np.where(df < lwr_bound, df.median(), df))
    return df

numeric_transformer = Pipeline([
    ('Outlier_handler', FunctionTransformer(outlier_handler)),
    ('Imputer', SimpleImputer(strategy='median'))
])

categorical_transformer = Pipeline([
    ('Imputer', SimpleImputer(strategy='constant', fill_value='Missing')),
    ('Binary_encoder', OneHotEncoder(sparse=False, drop='if_binary', handle_unknown='ignore'))
    # ('encoder', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1))
])

numeric_features = X_train.select_dtypes(['int64', 'float64']).columns
cat_cols = X_train.select_dtypes('category').columns
preprocessor = ColumnTransformer([
    ("num", numeric_transformer, numeric_features),
    ("cat", categorical_transformer, cat_cols)
])

proc = Pipeline([
    ("preprocessor", preprocessor),
    ("scaler", StandardScaler())
])

In [116]:
processed_data = proc.fit_transform(X_train, y_train)
data_shape = processed_data.shape
processed_data = processed_data.reshape([data_shape[0], 1, data_shape[1]])
processed_test = proc.transform(X_test)
test_shape = processed_test.shape
processed_test = processed_test.reshape([test_shape[0], 1, test_shape[1]])

batch_size = 40

training_set = []
test_set = []
for i in range(len(processed_data)):
   training_set.append([processed_data[i], y_train.to_numpy().reshape([data_shape[0], 1])[i]])

for i in range(len(processed_test)):
   test_set.append([processed_test[i], y_test.to_numpy().reshape([test_shape[0], 1])[i]])

training_loader = torch.utils.data.DataLoader(training_set, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=4)



In [138]:
firs_layer_outputs = 5
kernel_size = 2
dilation = 2

model = TCN_Model(data_shape[1], firs_layer_outputs, kernel_size, dilation)
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [102]:
def train_one_epoch(epoch_index, tb_writer):
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    
    for i, data in enumerate(training_loader):
        # Every data instance is an input + label pair
        inputs, labels = data

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        
        outputs = model(inputs.to(torch.float32))
        
        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels.to(torch.float32))
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        if i % 1000 == 999:
            last_loss = running_loss / 1000 # loss per batch
            print('  batch {} loss: {}'.format(i + 1, last_loss))
            tb_x = epoch_index * len(training_loader) + i + 1
            tb_writer.add_scalar('Loss/train', last_loss, tb_x)
            running_loss = 0.

    return last_loss

In [139]:


timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/fashion_trainer_{}'.format(timestamp))
epoch_number = 0
for epoch in range(100):
    model = model.to(torch.float32)
    model.train(True)
    avg_loss = train_one_epoch(epoch_number, writer)
    epoch_number += 1

  batch 1000 loss: 0.5721479599177838
  batch 1000 loss: 0.5609330433309079
  batch 1000 loss: 0.5585950682163239
  batch 1000 loss: 0.5573384247720241
  batch 1000 loss: 0.5562880094051361
  batch 1000 loss: 0.5558862969577313
  batch 1000 loss: 0.5552192472815514
  batch 1000 loss: 0.5544380911588669
  batch 1000 loss: 0.5538688451051712
  batch 1000 loss: 0.5534031301438809
  batch 1000 loss: 0.5525525442361832
  batch 1000 loss: 0.5523538565337658
  batch 1000 loss: 0.5503150171041489
  batch 1000 loss: 0.5485590830445289
  batch 1000 loss: 0.545848892211914
  batch 1000 loss: 0.5422591486573219
  batch 1000 loss: 0.5383140710890293
  batch 1000 loss: 0.5327852047085762
  batch 1000 loss: 0.5277874648869038
  batch 1000 loss: 0.5213058760762215
  batch 1000 loss: 0.5151996994316578
  batch 1000 loss: 0.508344748467207
  batch 1000 loss: 0.5016878519952297
  batch 1000 loss: 0.4934824362695217
  batch 1000 loss: 0.4874349544048309
  batch 1000 loss: 0.4803324277698994
  batch 1000 l

In [163]:
running_loss = 0.
roc_auc = 0.
recall = 0.
model.train(False)
for i, test_data in enumerate(test_loader):
    inputs, labels = test_data
    outputs = model(inputs.to(torch.float32))
    loss = loss_fn(outputs, labels.to(torch.float32))
    running_loss += loss
    roc_auc += roc_auc_score(labels, outputs.detach().numpy())
    recall += recall_score(labels, outputs.detach().numpy().round())

tavg_loss = running_loss / (i + 1)
avg_roc_auc = roc_auc / (i + 1)
avg_recall = recall / (i + 1)
print('LOSS train {} valid {}'.format(avg_loss, tavg_loss))
print('ROC AUC score: {} Recall: {}'.format(avg_roc_auc, avg_recall))

LOSS train 0.30620070078223943 valid 1.841170310974121


In [165]:
avg_roc_auc = roc_auc / (i + 1)
avg_recall = recall / (i + 1)
print('ROC AUC score: {} Recall: {}'.format(avg_roc_auc, avg_recall))

ROC AUC score: 0.5476429120156832 Recall: 0.2298254891994832


In [170]:
roc_auc_list = [0.388671875, 0.72265625, 0.58984375]
recall_list = [0., 0., 0.2298254891994832]
cols = ['dil=1,ep=1', 'dil=2,ep=1', 'dil=2,ep=100']
rows= ['roc_auc', 'recall']
pd.DataFrame([roc_auc_list, recall_list], columns=cols, index=rows)

Unnamed: 0,"dil=1,ep=1","dil=2,ep=1","dil=2,ep=100"
roc_auc,0.388672,0.722656,0.589844
recall,0.0,0.0,0.229825
