# MILES-GUESS Classification Example Notebook (PyTorch)

John Schreck, David John Gagne, Charlie Becker, Gabrielle Gantos, Dhamma Kimpara, Thomas Martin

#### Objective

This notebook is meant to demonstrate a functional example of how to use the miles-guess repository for training an evidential classification model.

Steps to get this notebook running:
1) Follow package installation steps in the miles-guess [ReadMe](https://github.com/ai2es/miles-guess/tree/main).
2) Run the cells in this notebook in order.

In [1]:
import yaml
import copy
import torch
import numpy as np
import pandas as pd

from mlguess.keras.data import load_ptype_uq, preprocess_data
from mlguess.torch.models import CategoricalDNN
from mlguess.torch.metrics import MetricsCalculator

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
from collections import defaultdict

from mlguess.torch.class_losses import edl_mse_loss, edl_digamma_loss, edl_log_loss, relu_evidence

### Load a config file

In [2]:
config = "../config/evidential_classifier_torch.yml"

In [3]:
with open(config) as cf:
    conf = yaml.load(cf, Loader=yaml.FullLoader)

### Load the training splits

In [4]:
input_features = []
for features in ["TEMP_C", "T_DEWPOINT_C", "UGRD_m/s", "VGRD_m/s"]:
    input_features += conf["data"][features]
output_features = conf["data"]["ptypes"]

# Load data
_conf = copy.deepcopy(conf)
_conf.update(conf["data"])
data = load_ptype_uq(_conf, data_split=0, verbose=1, drop_mixed=False)
# check if we should scale the input data by groups
scale_groups = [] if "scale_groups" not in conf["data"] else conf["data"]["scale_groups"]
groups = [list(conf["data"][g]) for g in scale_groups]
leftovers = list(
    set(input_features)
    - set([row for group in scale_groups for row in conf["data"][group]])
)
if len(leftovers):
    groups.append(leftovers)
# scale the data
scaled_data, scalers = preprocess_data(
    data,
    input_features,
    output_features,
    scaler_type=conf["data"]["scaler_type"],
    encoder_type="onehot",
    groups=[],
)

In [5]:
def one_hot_embedding(labels, num_classes=10):
    # Convert to One Hot Encoding
    y = torch.eye(num_classes)
    return y[labels]

### Convert the pandas dataframe into torch tensors, wrap in Dataset then Dataloader

In [6]:
X_train = torch.FloatTensor(scaled_data["train_x"].values)
y_train = torch.LongTensor(np.argmax(scaled_data["train_y"], axis=1))

batch_size = 1024

# Create dataset and dataloader
dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

### First lets train a standard (non-evidential) classifier

In [7]:
conf["model"]["softmax"] = True

In [8]:
mlp = CategoricalDNN(**conf["model"])

In [9]:
mlp

CategoricalDNN(
  (fcn): Sequential(
    (0): Linear(in_features=84, out_features=212, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Dropout(p=0.1167, inplace=False)
    (3): Linear(in_features=212, out_features=212, bias=True)
    (4): LeakyReLU(negative_slope=0.01)
    (5): Dropout(p=0.1167, inplace=False)
    (6): Linear(in_features=212, out_features=212, bias=True)
    (7): LeakyReLU(negative_slope=0.01)
    (8): Dropout(p=0.1167, inplace=False)
    (9): Linear(in_features=212, out_features=212, bias=True)
    (10): LeakyReLU(negative_slope=0.01)
    (11): Dropout(p=0.1167, inplace=False)
    (12): Linear(in_features=212, out_features=4, bias=True)
    (13): Softmax(dim=-1)
  )
)

### Train the model

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mlp.parameters(), lr=0.001)
metrics = MetricsCalculator(use_uncertainty=False)

# Training loop
num_epochs = 100
batches_per_epoch = 10

results_dict = defaultdict(list)
for epoch in range(num_epochs):
    mlp.train()
    total_loss = 0
    correct_predictions = 0
    total_predictions = 0
    for k, (batch_X, batch_y) in enumerate(dataloader):
        # Forward pass
        outputs = mlp(batch_X)
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += batch_y.size(0)
        correct_predictions += (predicted == batch_y).float().mean().item()

        metrics_dict = metrics(one_hot_embedding(batch_y, 4), outputs, split="train")
        for name, value in metrics_dict.items():
            results_dict[name].append(value.item())
    

        if (k + 1) == batches_per_epoch:
            break
    
    # Calculate epoch statistics
    avg_loss = total_loss / batches_per_epoch
    accuracy = correct_predictions / batches_per_epoch
    
    #print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}')

In [11]:
for key, val in results_dict.items():
    print(key, np.mean(val))

train_csi 0.25876237462243046
train_ave_acc 0.4687493490720337
train_prec 0.42272548775507235
train_recall 0.4687493490720337
train_f1 0.4416411424933151
train_auc 0.898654333499807


### Next lets train an evidential classifier

In [12]:
conf["model"]["softmax"] = False
conf["model"]["lng"] = False

In [13]:
ev_mlp = CategoricalDNN(**conf["model"])

In [14]:
ev_mlp

CategoricalDNN(
  (fcn): Sequential(
    (0): Linear(in_features=84, out_features=212, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Dropout(p=0.1167, inplace=False)
    (3): Linear(in_features=212, out_features=212, bias=True)
    (4): LeakyReLU(negative_slope=0.01)
    (5): Dropout(p=0.1167, inplace=False)
    (6): Linear(in_features=212, out_features=212, bias=True)
    (7): LeakyReLU(negative_slope=0.01)
    (8): Dropout(p=0.1167, inplace=False)
    (9): Linear(in_features=212, out_features=212, bias=True)
    (10): LeakyReLU(negative_slope=0.01)
    (11): Dropout(p=0.1167, inplace=False)
    (12): Linear(in_features=212, out_features=4, bias=True)
  )
)

### Note here there is no output activation
### The other main difference is the choice of loss, seen below

In [15]:
def one_hot_embedding(labels, num_classes=10):
    # Convert to One Hot Encoding
    y = torch.eye(num_classes)
    return y[labels]

criterion = edl_digamma_loss
optimizer = optim.Adam(ev_mlp.parameters(), lr=0.001)
metrics = MetricsCalculator(use_uncertainty=False)

# Training loop
num_epochs = 100
batches_per_epoch = 10
num_classes = 4  # Assuming 4 classes based on your one_hot_embedding call

for epoch in range(num_epochs):
    ev_mlp.train()
    total_loss = 0
    total_acc = 0
    total_evidence = 0
    total_evidence_succ = 0
    total_evidence_fail = 0
    total_uncertainty = 0
    
    for k, (batch_X, batch_y) in enumerate(dataloader):
        # Forward pass
        outputs = ev_mlp(batch_X)
        batch_y_onehot = one_hot_embedding(batch_y, num_classes)
        
        loss = criterion(
            outputs, batch_y_onehot.float(), epoch, num_classes, 10, batch_y.device
        )
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        # Calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        match = torch.eq(predicted, batch_y).float().view(-1, 1)
        acc = torch.mean(match)
        
        evidence = relu_evidence(outputs)
        alpha = evidence + 1
        u = num_classes / torch.sum(alpha, dim=1, keepdim=True)
        
        total_evidence_batch = torch.sum(evidence, 1, keepdim=True)
        mean_evidence = torch.mean(total_evidence_batch)
        mean_evidence_succ = torch.sum(total_evidence_batch * match) / (torch.sum(match) + 1e-20)
        mean_evidence_fail = torch.sum(total_evidence_batch * (1 - match)) / (torch.sum(1 - match) + 1e-20)
        
        total_acc += acc.item()
        total_evidence += mean_evidence.item()
        total_evidence_succ += mean_evidence_succ.item()
        total_evidence_fail += mean_evidence_fail.item()
        total_uncertainty += torch.mean(u).item()

        metrics_dict = metrics(one_hot_embedding(batch_y, 4), outputs, split="train")
        for name, value in metrics_dict.items():
            results_dict[name].append(value.item())
        
        if (k + 1) == batches_per_epoch:
            break
    
    # Calculate epoch statistics
    avg_loss = total_loss / batches_per_epoch
    avg_acc = total_acc / batches_per_epoch
    avg_evidence = total_evidence / batches_per_epoch
    avg_evidence_succ = total_evidence_succ / batches_per_epoch
    avg_evidence_fail = total_evidence_fail / batches_per_epoch
    avg_uncertainty = total_uncertainty / batches_per_epoch
    
    # print(f'Epoch [{epoch+1}/{num_epochs}]:')
    # print(f'  Loss: {avg_loss:.4f}')
    # print(f'  Accuracy: {avg_acc:.4f}')
    # print(f'  Mean Evidence: {avg_evidence:.4f}')
    # print(f'  Mean Evidence (Correct): {avg_evidence_succ:.4f}')
    # print(f'  Mean Evidence (Incorrect): {avg_evidence_fail:.4f}')
    # print(f'  Mean Uncertainty: {avg_uncertainty:.4f}')

In [16]:
for key, val in results_dict.items():
    print(key, np.mean(val))

train_csi 0.25621067497624167
train_ave_acc 0.467960801452284
train_prec 0.4292476873831698
train_recall 0.467960801452284
train_f1 0.4421940509144526
train_auc 0.8909479463346074


### Thats it! 

### Questions? Email John Schreck (schreck@ucar.edu)