<a href="https://colab.research.google.com/github/Zain-mahfoud94/Python-Uni/blob/main/evidential_deep_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%load_ext autoreload
%matplotlib inline
%autoreload 2

In [None]:
import sys, os
import multiprocessing

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output

sys.path.append(os.path.join(os.path.abspath('..'), 'src'))

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from imblearn.over_sampling import RandomOverSampler

In [None]:
from data_split import return_datasets
from time_series_loader import TimeSeriesData

# Ressources variables and PyTorch

In [None]:
import torch

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Training on: ', device)

Training on:  cuda


In [None]:
SLURM_CPUS_PER_TASK = os.getenv("SLURM_CPUS_PER_TASK")

if SLURM_CPUS_PER_TASK is None:
    n_cpu = multiprocessing.cpu_count()
    SLURM_CPUS_PER_TASK = f'{n_cpu}'
else:
    n_cpu = int(SLURM_CPUS_PER_TASK)


os.environ["OMP_NUM_THREADS"] = SLURM_CPUS_PER_TASK
os.environ["MKL_NUM_THREADS"] = SLURM_CPUS_PER_TASK
os.environ["NUMEXPR_NUM_THREADS"] = SLURM_CPUS_PER_TASK

print('Using ', SLURM_CPUS_PER_TASK, ' cpu cores')

torch.set_num_threads(int(SLURM_CPUS_PER_TASK))

Using  10  cpu cores


# Read Data

In [None]:
data_path = os.path.join(os.path.abspath('../../../../../..'), 'datasets', 'DLL', 'aimee', 'processed')

In [None]:
multi_class = True
load_machine_idx = 0
DUT_idx = None
case_index_list = [0, 1, 2, 3, 4]

X_train, y_train, \
X_test, y_test, classes = return_datasets(data_path,
                                          case_index_list=case_index_list,
                                          multi_class=multi_class,
                                          load_machine_idx=load_machine_idx,
                                          DUT_idx=DUT_idx)

## Scaling

In [None]:
scaler = StandardScaler().fit(X_train)


X_train_scaled_ = scaler.transform(X_train)
X_train_scaled = pd.DataFrame(X_train_scaled_, columns=X_train.columns.values, index=X_train.index)

X_test_scaled_ = scaler.transform(X_test)
X_test_scaled = pd.DataFrame(X_test_scaled_, columns=X_test.columns.values, index=X_test.index)

## DataLoader

In [None]:
from torch.utils.data import DataLoader, Dataset

# oversampling minority classes
oversampler = RandomOverSampler()

# sliding window size
#sw = '8s'
sw = '8s'

model_type = 'CNN'
#batch_size = 100
batch_size = 100

### Create PyTorch Datasets

In [None]:
train_data = TimeSeriesData(X_train_scaled,
                            y_train,
                            sw=sw,
                            class_balance=True,
                            sampling_method=oversampler,
                            dim_format=model_type)

train_data_unbalanced = TimeSeriesData(X_train_scaled,
                                       y_train,
                                       sw=sw,
                                       class_balance=False,
                                       sampling_method=oversampler,
                                       dim_format=model_type)

test_data = TimeSeriesData(X_test_scaled,
                           y_test,
                           sw=sw,
                           dim_format=model_type)

### Create DataLoader

In [None]:
pin_memory = False

loader_train = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=n_cpu,
                          pin_memory=pin_memory)
loader_train_unbalanced = DataLoader(train_data_unbalanced, batch_size=batch_size, shuffle=False,
                                     num_workers=n_cpu, pin_memory=pin_memory)
loader_test = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=n_cpu,
                         pin_memory=pin_memory)

### Load the Model

In [None]:
sw = '8s'
model_type = 'CNN_EDL'
net = torch.load(f'../models/{model_type}_CL_sw_{sw}.pt')
net.eval()

ConvNet_EDL(
  (conv1): Conv1d(131, 1000, kernel_size=(11,), stride=(1,))
  (bn1): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout1): Dropout(p=0.9, inplace=False)
  (conv2): Conv1d(1000, 500, kernel_size=(11,), stride=(1,))
  (bn2): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout2): Dropout(p=0.9, inplace=False)
  (fc1): Linear(in_features=390000, out_features=5, bias=True)
)

# Model

In [None]:
from models import ConvNet_CL
from models import ConvNet_EDL
input_size = train_data[0][0].shape[0]
seq_length = train_data[0][0].shape[1]
print(f'Input size CNN: {input_size}')
print(f'sequence length CNN: {seq_length}')

In [None]:
net = ConvNet_EDL(input_size, classes, seq_length, kernel_size=11, dropout=.9)
net = net.float()
net.to(device)

## Helper Functions

In [None]:
def KLDivergenceLoss(evidence, target):
    alpha = evidence + 1.
    n_classes = evidence.shape[-1]
    alpha_tilde = target + (1 - target) * alpha
    strength_tilde = alpha_tilde.sum(dim=-1)
    first = (torch.lgamma(alpha_tilde.sum(dim=-1))
             - torch.lgamma(alpha_tilde.new_tensor(float(n_classes)))
             - (torch.lgamma(alpha_tilde)).sum(dim=-1))
    second = (
        (alpha_tilde - 1) *
        (torch.digamma(alpha_tilde) - torch.digamma(strength_tilde)[:, None])
    ).sum(dim=-1)
    loss = (first + second)
    return loss.mean()

In [None]:
from labml_helpers.module import Module

class TrackStatistics(Module):
    def forward(self, evidence: torch.Tensor, target: torch.Tensor):
        n_classes = evidence.shape[-1]
        match = evidence.argmax(dim=-1).eq(target.argmax(dim=-1))
        tracker.add('accuracy.', match.sum() / match.shape[0])
        alpha = evidence + 1.
        strength = alpha.sum(dim=-1)
        expected_probability = alpha / strength[:, None]
        expected_probability, _ = expected_probability.max(dim=-1)
        uncertainty_mass = n_classes / strength
        tracker.add('u.succ.', uncertainty_mass.masked_select(match))
        tracker.add('u.fail.', uncertainty_mass.masked_select(~match))
        tracker.add('prob.succ.', expected_probability.masked_select(match))
        tracker.add('prob.fail.', expected_probability.masked_select(~match))

In [None]:
from labml_helpers.schedule import RelativePiecewise
def kl_div_coef(kl_div_coef_schedule,epochs,train_dataset_size):
    return RelativePiecewise(kl_div_coef_schedule, epochs * train_dataset_size)
print(kl_div_coef([(0, 0.), (0.2, 0.01), (1, 1.)],1,200000))

## Loss Functions

#### 1- CrossEntropyBayesRisk

In [None]:
def CrossEntropyBayesRisk(evidence, target):
    alpha = evidence + 1.
    strength = alpha.sum(dim=-1)
    loss = (target * (torch.digamma(strength)[:, None] - torch.digamma(alpha))).sum(dim=-1)
    return loss.mean()

#### 2- SquaredErrorBayesRisk

In [None]:
def SquaredErrorBayesRisk(evidence, target):
    alpha = evidence + 1.
    strength = alpha.sum(dim=-1)
    p = alpha / strength[:, None]
    err = (target - p) ** 2
    var = p * (1 - p) / (strength[:, None] + 1)
    loss = (err + var).sum(dim=-1)
    return loss.mean()

#### 3- MaximumLikelihoodLoss

In [None]:
def MaximumLikelihoodLoss(evidence, target):
    alpha = evidence + 1.
    strength = alpha.sum(dim=-1)
    loss = (target * (strength.log()[:, None] - alpha.log())).sum(dim=-1)
    return loss.mean()

# Model Train

In [None]:
lr = 1e-4
optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=.4)

In [None]:
import torch.nn.functional as F
from labml import tracker
import torch.nn as nn

# intilization step
kl_div_coef_schedule = [(0, 0.), (0.2, 0.01), (1, 1.)]
stats = TrackStatistics()
tracker.set_scalar("loss.*", True)
tracker.set_scalar("accuracy.*", True)
tracker.set_histogram('u.*', True)
tracker.set_histogram('prob.*', False)
tracker.set_scalar('annealing_coef.*', False)
tracker.set_scalar('kl_div_loss.*', False)
epochs = 2
verb_it = 1000
num_classes = len(case_index_list)
net.train()

loss_epoch = []
acc_epoch = []

losses = {"loss": [], "phase": [], "epoch": []}
accuracy = {"accuracy": [], "phase": [], "epoch": []}
evidences = {"evidence": [], "type": [], "epoch": []}

for epoch in range(epochs):

    loss_list = []
    acc_list = []

    running_loss = 0.0
    running_corrects = 0.0
    correct = 0

    for batch_i, (X_batch, y_batch) in enumerate(loader_train):
        tracker.add_global_step(len(loader_train.dataset))

        data = X_batch.to(device)
        y_batch = y_batch.long().view(-1).to(device)
        # One-hot coded targets
        eye = torch.eye(num_classes).to(torch.float).to(device)
        target = eye[y_batch]
        # Update global step (number of samples processed) when in training mode
        tracker.add_global_step(len(data))
        # Get model outputs
        outputs = net(data)
        # Get evidences e_t >= 0
        outputs_to_evidence = nn.Softplus()
        evidence = outputs_to_evidence(outputs)
        # Calculate loss
        loss = CrossEntropyBayesRisk(evidence,target)
        # Calculate KL Divergence regularization loss
        kl_div_loss = KLDivergenceLoss(evidence, target)
        tracker.add("loss.", loss)
        tracker.add("kl_div_loss.", kl_div_loss)
        # KL Divergence loss coefficient λ_t
        annealing_step = num_classes
        annealing_coef = torch.min(
            torch.tensor(1.0, dtype=torch.float32),
            torch.tensor(epoch / num_classes, dtype=torch.float32),
        )
        tracker.add("annealing_coef.", annealing_coef)
        # Total loss
        loss = loss + annealing_coef * kl_div_loss
        # Track statistics
        stats(evidence, target)
        # Calculate gradients
        loss.backward()
        # Take optimizer step
        optimizer.step()
        # Clear the gradients
        optimizer.zero_grad()
        # Save the tracked metrics
        tracker.save()
        loss_list.append(loss.clone().cpu().detach().numpy())
        acc = accuracy_score(y_batch.cpu(), outputs.cpu().argmax(1))
        acc_list.append(acc)

    loss_epoch.append(np.array(loss_list).mean())
    acc_epoch.append(np.array(acc_list).mean())

In [None]:
plt.plot(loss_epoch)
plt.plot(acc_epoch)

# Evaluation

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from evaluation import plot_confusion_matrix

In [None]:
import torch.nn as nn

def model_evaluate(net, dataloader, device):
    net.eval()
    list_out = []
    list_y = []
    list_uncertainty = []
    list_prob = []
    for X_batch, y_batch in dataloader:

        X_batch = X_batch.to(device)
        y_batch = y_batch.view(-1).to(device)

        out = net(X_batch)

        outputs_to_evidence = nn.Softplus()
        evidence = outputs_to_evidence(out)
        alpha = evidence + 1
        uncertainty = 5 / torch.sum(alpha, dim=1, keepdim=True)
        prob = alpha / torch.sum(alpha, dim=1, keepdim=True)


        list_out.append(out.cpu().detach().numpy())
        list_y.append(y_batch.cpu().detach().numpy())
        list_uncertainty.append(uncertainty.cpu().detach().numpy())
        list_prob.append(prob.cpu().detach().numpy())

    y_score = np.vstack(list_out)
    y_labels = np.hstack(list_y)
    y_uncertainty = np.vstack(list_uncertainty)
    y_prob = np.vstack(list_prob)

    return y_score, y_labels, y_uncertainty, y_prob

In [None]:
import torch.nn as nn

def model_evaluate(net, dataloader, device):
    net.eval()
    list_out = []
    list_y = []
    list_uncertainty = []
    list_prob = []
    list_predict = []
    for batch_i,(X_batch, y_batch) in enumerate(dataloader):
        X_batch = X_batch.to(device)
        y_batch = y_batch.view(-1).to(device)
        out = net(X_batch)

        outputs_to_evidence = nn.ReLU()
        evidence = outputs_to_evidence(out)
        alpha = evidence + 1
        uncertainty = 5 / torch.sum(alpha, dim=1, keepdim=True)
        _, preds = torch.max(out, 1)
        prob = alpha / torch.sum(alpha, dim=1, keepdim=True)
        out = out.flatten()
        prob = prob.flatten()
        preds = preds.flatten()

        list_out.append(out.cpu().detach().numpy())
        list_y.append(y_batch.cpu().detach().numpy())
        list_uncertainty.append(uncertainty.cpu().detach().numpy())
        list_prob.append(prob.cpu().detach().numpy())

    y_score = np.vstack(list_out)
    y_labels = np.hstack(list_y)
    y_uncertainty = np.vstack(list_uncertainty)
    y_prob = np.vstack(list_prob)

    return y_score, y_labels,y_uncertainty,y_prob

In [None]:
y_score, y_labels,u_list,y_prob = model_evaluate(net, loader_test, device)



In [None]:
print(np.shape(y_prob))
print(np.shape(y_score))
print(np.shape(y_labels))

In [None]:
y_predicted_labels= y_score.argmax(1).reshape(-1,1)
acc = accuracy_score(y_labels, y_predicted_labels)
print('Accuracy', acc)

In [None]:
print(classification_report(y_labels, y_predicted_labels, digits=5))

### Confusion Matrix

In [None]:
cm = confusion_matrix(y_labels, y_predicted_labels, labels=classes, normalize='true')
plt.figure(figsize=(5,4))
plt.grid(False)
plot_confusion_matrix(cm, classes=classes, title='CM')
plt.tight_layout()
plt.show()

# Save Model

In [None]:
if not os.path.exists('../models/'):
    os.mkdir('../models/')
model_type = "CNN_EDL_Full_batchId"
# Save model weights in PyTorch format
torch.save(net, f'../models/{model_type}_CL_sw_{sw}.pt')

# Plot predictions

In [None]:
pred_df = pd.DataFrame(y_score, columns=[0,1,2,3,4], index=test_data.labels.index)

In [None]:
for name, group in pred_df.groupby(level=y_test.index.names[:-1]):
    fig, ax = plt.subplots(figsize=(10,5))
    pred_df.loc[name, :].plot(ax=ax)
    y_test.loc[name, :].plot(ax=ax)
    u_list_df.loc[name, :].plot(ax=ax)
    plt.ylabel('class prob')
    plt.title(f'Time Series ID: {name}')
    plt.show()