# MILES-GUESS Classification Example Notebook

John Schreck, David John Gagne, Charlie Becker, Gabrielle Gantos, Dhamma Kimpara, Thomas Martin

#### Objective

This notebook is meant to demonstrate a functional example of how to use the miles-guess repository for training an evidential classification model.

Steps to get this notebook running:
1) Follow package installation steps in the miles-guess [ReadMe](https://github.com/ai2es/miles-guess/tree/main).
2) Run the cells in this notebook in order.

In [42]:
import yaml
import copy
import torch
import numpy as np
import pandas as pd

from mlguess.keras.data import load_ptype_uq, preprocess_data
from mlguess.torch.models import CategoricalDNN

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import numpy as np

from mlguess.torch.class_losses import edl_mse_loss, edl_digamma_loss, edl_log_loss, relu_evidence

In [2]:
config = "../config/evidential_classifier_torch.yml"

In [3]:
with open(config) as cf:
    conf = yaml.load(cf, Loader=yaml.FullLoader)

In [4]:
input_features = []
for features in ["TEMP_C", "T_DEWPOINT_C", "UGRD_m/s", "VGRD_m/s"]:
    input_features += conf["data"][features]
output_features = conf["data"]["ptypes"]

# Load data
_conf = copy.deepcopy(conf)
_conf.update(conf["data"])
data = load_ptype_uq(_conf, data_split=0, verbose=1, drop_mixed=False)
# check if we should scale the input data by groups
scale_groups = [] if "scale_groups" not in conf["data"] else conf["data"]["scale_groups"]
groups = [list(conf["data"][g]) for g in scale_groups]
leftovers = list(
    set(input_features)
    - set([row for group in scale_groups for row in conf["data"][group]])
)
if len(leftovers):
    groups.append(leftovers)
# scale the data
scaled_data, scalers = preprocess_data(
    data,
    input_features,
    output_features,
    scaler_type=conf["data"]["scaler_type"],
    encoder_type="onehot",
    groups=[],
)

In [29]:
X_train = torch.FloatTensor(scaled_data["train_x"].values)
y_train = torch.LongTensor(np.argmax(scaled_data["train_y"], axis=1))

# Create dataset and dataloader
dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=128, shuffle=True)

In [30]:
conf["model"]["softmax"] = True

In [31]:
mlp = CategoricalDNN(**conf["model"])

In [32]:
mlp

CategoricalDNN(
  (fcn): Sequential(
    (0): Linear(in_features=84, out_features=212, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Dropout(p=0.1167, inplace=False)
    (3): Linear(in_features=212, out_features=212, bias=True)
    (4): LeakyReLU(negative_slope=0.01)
    (5): Dropout(p=0.1167, inplace=False)
    (6): Linear(in_features=212, out_features=212, bias=True)
    (7): LeakyReLU(negative_slope=0.01)
    (8): Dropout(p=0.1167, inplace=False)
    (9): Linear(in_features=212, out_features=212, bias=True)
    (10): LeakyReLU(negative_slope=0.01)
    (11): Dropout(p=0.1167, inplace=False)
    (12): Linear(in_features=212, out_features=4, bias=True)
    (13): Softmax(dim=-1)
  )
)

In [33]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mlp.parameters(), lr=0.001)

# Training loop
num_epochs = 100
batches_per_epoch = 10
for epoch in range(num_epochs):
    mlp.train()
    total_loss = 0
    correct_predictions = 0
    total_predictions = 0
    for k, (batch_X, batch_y) in enumerate(dataloader):
        # Forward pass
        outputs = mlp(batch_X)
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += batch_y.size(0)
        correct_predictions += (predicted == batch_y).sum().item()

        if (k + 1) == batches_per_epoch:
            break
    
    # Calculate epoch statistics
    avg_loss = total_loss / len(dataloader)
    accuracy = correct_predictions / total_predictions
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}')

Epoch [1/100], Loss: 0.0021, Accuracy: 0.6578
Epoch [2/100], Loss: 0.0019, Accuracy: 0.7133
Epoch [3/100], Loss: 0.0017, Accuracy: 0.8320
Epoch [4/100], Loss: 0.0017, Accuracy: 0.8141
Epoch [5/100], Loss: 0.0017, Accuracy: 0.8156
Epoch [6/100], Loss: 0.0017, Accuracy: 0.8445
Epoch [7/100], Loss: 0.0016, Accuracy: 0.8359
Epoch [8/100], Loss: 0.0016, Accuracy: 0.8469
Epoch [9/100], Loss: 0.0016, Accuracy: 0.8477
Epoch [10/100], Loss: 0.0016, Accuracy: 0.8453
Epoch [11/100], Loss: 0.0016, Accuracy: 0.8562
Epoch [12/100], Loss: 0.0016, Accuracy: 0.8414
Epoch [13/100], Loss: 0.0016, Accuracy: 0.8625
Epoch [14/100], Loss: 0.0016, Accuracy: 0.8602
Epoch [15/100], Loss: 0.0016, Accuracy: 0.8656
Epoch [16/100], Loss: 0.0016, Accuracy: 0.8539
Epoch [17/100], Loss: 0.0016, Accuracy: 0.8656
Epoch [18/100], Loss: 0.0016, Accuracy: 0.8609
Epoch [19/100], Loss: 0.0016, Accuracy: 0.8617
Epoch [20/100], Loss: 0.0016, Accuracy: 0.8523
Epoch [21/100], Loss: 0.0015, Accuracy: 0.8805
Epoch [22/100], Loss: 

### Use an evidential neural network 

In [60]:
conf["model"]["softmax"] = False
conf["model"]["lng"] = False

In [61]:
ev_mlp = CategoricalDNN(**conf["model"])

In [62]:
ev_mlp

CategoricalDNN(
  (fcn): Sequential(
    (0): Linear(in_features=84, out_features=212, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Dropout(p=0.1167, inplace=False)
    (3): Linear(in_features=212, out_features=212, bias=True)
    (4): LeakyReLU(negative_slope=0.01)
    (5): Dropout(p=0.1167, inplace=False)
    (6): Linear(in_features=212, out_features=212, bias=True)
    (7): LeakyReLU(negative_slope=0.01)
    (8): Dropout(p=0.1167, inplace=False)
    (9): Linear(in_features=212, out_features=212, bias=True)
    (10): LeakyReLU(negative_slope=0.01)
    (11): Dropout(p=0.1167, inplace=False)
    (12): Linear(in_features=212, out_features=4, bias=True)
  )
)

In [63]:
import torch
import torch.optim as optim

def one_hot_embedding(labels, num_classes=10):
    # Convert to One Hot Encoding
    y = torch.eye(num_classes)
    return y[labels]

criterion = edl_digamma_loss
optimizer = optim.Adam(ev_mlp.parameters(), lr=0.001)

# Training loop
num_epochs = 100
batches_per_epoch = 10
num_classes = 4  # Assuming 4 classes based on your one_hot_embedding call

for epoch in range(num_epochs):
    ev_mlp.train()
    total_loss = 0
    total_acc = 0
    total_evidence = 0
    total_evidence_succ = 0
    total_evidence_fail = 0
    total_uncertainty = 0
    
    for k, (batch_X, batch_y) in enumerate(dataloader):
        # Forward pass
        outputs = ev_mlp(batch_X)
        batch_y_onehot = one_hot_embedding(batch_y, num_classes)
        
        loss = criterion(
            outputs, batch_y_onehot.float(), epoch, num_classes, 10, batch_y.device
        )
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        # Calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        match = torch.eq(predicted, batch_y).float().view(-1, 1)
        acc = torch.mean(match)
        
        evidence = relu_evidence(outputs)
        alpha = evidence + 1
        u = num_classes / torch.sum(alpha, dim=1, keepdim=True)
        
        total_evidence_batch = torch.sum(evidence, 1, keepdim=True)
        mean_evidence = torch.mean(total_evidence_batch)
        mean_evidence_succ = torch.sum(total_evidence_batch * match) / (torch.sum(match) + 1e-20)
        mean_evidence_fail = torch.sum(total_evidence_batch * (1 - match)) / (torch.sum(1 - match) + 1e-20)
        
        total_acc += acc.item()
        total_evidence += mean_evidence.item()
        total_evidence_succ += mean_evidence_succ.item()
        total_evidence_fail += mean_evidence_fail.item()
        total_uncertainty += torch.mean(u).item()
        
        if (k + 1) == batches_per_epoch:
            break
    
    # Calculate epoch statistics
    avg_loss = total_loss / batches_per_epoch
    avg_acc = total_acc / batches_per_epoch
    avg_evidence = total_evidence / batches_per_epoch
    avg_evidence_succ = total_evidence_succ / batches_per_epoch
    avg_evidence_fail = total_evidence_fail / batches_per_epoch
    avg_uncertainty = total_uncertainty / batches_per_epoch
    
    print(f'Epoch [{epoch+1}/{num_epochs}]:')
    print(f'  Loss: {avg_loss:.4f}')
    print(f'  Accuracy: {avg_acc:.4f}')
    print(f'  Mean Evidence: {avg_evidence:.4f}')
    print(f'  Mean Evidence (Correct): {avg_evidence_succ:.4f}')
    print(f'  Mean Evidence (Incorrect): {avg_evidence_fail:.4f}')
    print(f'  Mean Uncertainty: {avg_uncertainty:.4f}')

print("Training completed!")

Epoch [1/100]:
  Loss: 1.4121
  Accuracy: 0.6539
  Mean Evidence: 1.1506
  Mean Evidence (Correct): 1.1339
  Mean Evidence (Incorrect): 1.1881
  Mean Uncertainty: 0.7936
Epoch [2/100]:
  Loss: 1.1648
  Accuracy: 0.6875
  Mean Evidence: 2.2726
  Mean Evidence (Correct): 2.3175
  Mean Evidence (Incorrect): 2.1707
  Mean Uncertainty: 0.6441
Epoch [3/100]:
  Loss: 1.0674
  Accuracy: 0.7195
  Mean Evidence: 3.0092
  Mean Evidence (Correct): 3.0867
  Mean Evidence (Incorrect): 2.7428
  Mean Uncertainty: 0.5794
Epoch [4/100]:
  Loss: 0.9974
  Accuracy: 0.8320
  Mean Evidence: 3.3631
  Mean Evidence (Correct): 3.4676
  Mean Evidence (Incorrect): 2.8431
  Mean Uncertainty: 0.5520
Epoch [5/100]:
  Loss: 0.9768
  Accuracy: 0.7969
  Mean Evidence: 3.5468
  Mean Evidence (Correct): 3.7274
  Mean Evidence (Incorrect): 2.8339
  Mean Uncertainty: 0.5397
Epoch [6/100]:
  Loss: 0.9526
  Accuracy: 0.8430
  Mean Evidence: 2.9893
  Mean Evidence (Correct): 3.1542
  Mean Evidence (Incorrect): 2.1033
  Mean 