## 1. Basic libraries

In [1]:
# Data Science Packages
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from  matplotlib import pyplot
import seaborn as sns
import warnings
from tqdm import tqdm, tqdm_notebook?

Object `tqdm_notebook` not found.


In [2]:
# Seaborn Style
sns.set(style='ticks')
sns.set_style({'font.family': 'Hiragino Maru Gothic Pro'})
sns.set_palette("cool")

# Pandas Style
pd.set_option("display.max_column", 9999)
pd.set_option("display.max_row", 9999)

# Ignore annoying warning 
warnings.filterwarnings('ignore')

In [3]:
%load_ext autoreload
%autoreload 2

In [5]:
from config import *

In [6]:
from preprocessing import *

## 2. Load and Preproess the data

In [None]:
#main()

## 3. Load the preprocess data into the DataLoader through Dataset

In [7]:
from datamodule import *

In [None]:
#main()

## 4. Train the AutoEncder model

In [8]:
from encoder import *
from decoder import *
from autoencoder import *

In [None]:
#main()

In [9]:
# Load the TensorBoard notebook extension
%load_ext tensorboard
%reload_ext tensorboard

In [23]:
%tensorboard --logdir /Users/vikaschaturvedi/Documents/projects/predictive_care/experiment_gaps/models/lightning_logs/ --host localhost

In [11]:
cpetype='W724Ci'
path_tensorboard = '../models/lightning_logs/cpe_replacement_autoencoder/'+cpetype+'/version_2/checkpoints/'
! ls $path_tensorboard

epoch=59-step=3119.ckpt


## 5. Train the Classifier model

In [12]:
from classifier import *

Define the classifier model with the pretrained AutoEncoder model and loading preprocessed data into the Classifier for the Training

In [None]:
#main()

## 6. Evaluate the model

Here we are going to load the train model to evaluate the performance per class

In [30]:
cpetype='W724Ci'
path_tensorboard = '../models/lightning_logs/cpe_replacement_classifier/'+cpetype+'/version_11/checkpoints/'
! ls $path_tensorboard

epoch=59-step=3119.ckpt


### 6.1 Load the trained model

In this section we will load our model using the check point with best accuracy.

In [31]:
from classifier import ReplacementClassifier

classifier = ReplacementClassifier.load_from_checkpoint('../models/lightning_logs/cpe_replacement_classifier/W724Ci/version_11/checkpoints/epoch=59-step=3119.ckpt')
classifier.eval()

ReplacementClassifier(
  (accuracy): Accuracy()
  (F1): F1()
  (recall): Recall()
  (feature_extractor): ConvNetAutoEncoder(
    (encoder): ConvEncoder(
      (conv1): Conv1d(190, 380, kernel_size=(3,), stride=(1,), padding=(1,))
      (conv2): Conv1d(380, 760, kernel_size=(3,), stride=(1,), padding=(1,))
      (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (flatten): Flatten(start_dim=1, end_dim=-1)
      (batch_norm1): BatchNorm1d(380, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (batch_norm2): BatchNorm1d(760, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (linear): Linear(in_features=5320, out_features=500, bias=True)
    )
    (decoder): ConvDecoder(
      (t_conv1): ConvTranspose1d(760, 380, kernel_size=(3,), stride=(2,))
      (t_conv2): ConvTranspose1d(380, 190, kernel_size=(2,), stride=(2,))
      (batch_norm1): BatchNorm1d(380, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tr

### 6.2 Training Results

Here we are going to define the path to the folder where the preprocessed data is located. We are going to select a huge batch_size in order to query the complete data set in just one iteration.

In [15]:
cpetype = 'W724Ci'
path_preprocessed = config['path']['root']+"/data/preprocessed/data_w"+str(config['dataset']['window'])+"_g"+str(config['dataset']['gap'])+"/"+cpetype
data_module = ReplacementDataModule(path_preprocessed, batch_size=10000, num_workers=0)

Lets setup the `data_module` in the stage `fit`.

In [19]:
import numpy as np
from sklearn.metrics import multilabel_confusion_matrix,confusion_matrix,precision_score,recall_score,f1_score,accuracy_score

def evaluation_metrics(y_true,y_pred):
    '''
    This function print the different evaluations metrics for different labels
    
    Args:
        y_true: True Label
        y_pred: Predicted Label
    
    '''

    labels = ["label_no_problem", "label_healthy"]

    conf_mat_dict={}
    precision={}
    recall={}
    f1={}
    accuracy={}

    for label_col in range(len(labels)):
        y_true_label = y_true[:, label_col]
        y_pred_label = y_pred[:, label_col]
        conf_mat_dict[labels[label_col]] = confusion_matrix(y_pred=y_pred_label, y_true=y_true_label)
        precision[labels[label_col]] = precision_score(y_pred=y_pred_label, y_true=y_true_label)
        recall[labels[label_col]] = recall_score(y_pred=y_pred_label, y_true=y_true_label)
        f1[labels[label_col]] = f1_score(y_pred=y_pred_label, y_true=y_true_label)
        accuracy[labels[label_col]] = accuracy_score(y_pred=y_pred_label, y_true=y_true_label)

    for label, matrix in conf_mat_dict.items():
        print("Confusion matrix for label {}:".format(label))
        print(matrix)
        print('\n')

    for label, score in precision.items():
        print("Precision for label {}:".format(label))
        print(score)
        print('\n')

    for label, score in recall.items():
        print("Recall for label {}:".format(label))
        print(score)
        print('\n')

    for label, score in f1.items():
        print("F1 score for label {}:".format(label))
        print(score)
        print('\n')
        
    for label, score in accuracy.items():
        print("Accuracy for label {}:".format(label))
        print(score)
        print('\n')

In [17]:
data_module.setup(stage='fit')
labels, matrix = next(iter(data_module.train_dataloader()))
print('Dim Input:',matrix.shape)
y_true = np.array(labels)
y_pred = np.array(classifier(matrix).detach().numpy()).round()
cm_train = multilabel_confusion_matrix(y_true, y_pred)

Dim Input: torch.Size([1655, 30, 190])


In [20]:
evaluation_metrics(y_true,y_pred)

Confusion matrix for label label_no_problem:
[[890 285]
 [ 50 430]]


Confusion matrix for label label_healthy:
[[744  39]
 [  6 866]]


Precision for label label_no_problem:
0.6013986013986014


Precision for label label_healthy:
0.9569060773480663


Recall for label label_no_problem:
0.8958333333333334


Recall for label label_healthy:
0.9931192660550459


F1 score for label label_no_problem:
0.7196652719665273


F1 score for label label_healthy:
0.9746764209341586


Accuracy for label label_no_problem:
0.797583081570997


Accuracy for label label_healthy:
0.972809667673716




### 6.3 Testing Results

In [21]:
import numpy as np
from sklearn.metrics import multilabel_confusion_matrix
data_module.setup(stage='test')
labels, matrix = next(iter(data_module.test_dataloader()))
print('Dim Input:',matrix.shape)
y_true = np.array(labels)
y_pred = np.array(classifier(matrix).detach().numpy()).round()

Dim Input: torch.Size([197, 30, 190])


In [22]:
#Dropout=0.25(before linear layer also), Adam optimizer with weight decay and single linear layer
evaluation_metrics(y_true,y_pred)

Confusion matrix for label label_no_problem:
[[102  36]
 [  4  55]]


Confusion matrix for label label_healthy:
[[93  5]
 [ 0 99]]


Precision for label label_no_problem:
0.6043956043956044


Precision for label label_healthy:
0.9519230769230769


Recall for label label_no_problem:
0.9322033898305084


Recall for label label_healthy:
1.0


F1 score for label label_no_problem:
0.7333333333333333


F1 score for label label_healthy:
0.9753694581280787


Accuracy for label label_no_problem:
0.7969543147208121


Accuracy for label label_healthy:
0.9746192893401016


