# 0. Import libraries

In [5]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.utils.data import random_split, DataLoader
from sklearn.metrics import precision_score, f1_score, confusion_matrix, auc, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import mlflow
from helper.earlyStop import EarlyStopping
from helper.model import CNNModel
from sklearn.preprocessing import label_binarize

# 1. Preprocessing to prepare data for training 

### 1.1. Dataset Path

In [6]:
# Path and transform
DATA_PATH = 'dataset/own_dataset/'
BATCH_SIZE = 64
RESULT_DIR = "results"
os.makedirs(RESULT_DIR, exist_ok=True)

### 1.2. Data Transformation

In [7]:
data_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),           # Convert to grayscale
  #  transforms.RandomRotation(15),                         # Rotate images by ±15 degrees
   # transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Random translation
   # transforms.RandomPerspective(distortion_scale=0.2, p=0.5), # Perspective transformation
   # transforms.RandomHorizontalFlip(p=0.5),                # Random horizontal flip
    transforms.Resize((28, 28)),                           # Resize to 28x28
    transforms.ToTensor(),                                 # Convert to tensor
    transforms.Normalize((0.5,), (0.5,))                   # Normalize
])

### 1.3. Load and Split the Dataset 

In [8]:
dataset = datasets.ImageFolder(DATA_PATH, transform=data_transforms)

# Calculate sizes of each partition
total_size = len(dataset)
train_size = int(0.8 * total_size)
test_size = int(0.1 * total_size)
eval_size = total_size - train_size - test_size  # To cover rounding#

# Split
train_dataset, test_dataset, eval_dataset = random_split(
    dataset, [train_size, test_size, eval_size],
    generator=torch.Generator().manual_seed(42)  # for reproducibility
)

### 1.4. Create different Dataloaders

In [9]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
eval_loader = DataLoader(eval_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Get a batch from train_loader
dataiter = iter(train_loader)
images, labels = next(dataiter)
labels.unique()


tensor([0, 1, 2, 3, 4, 5, 6, 7])

### 1.5.  Consider all of the Labels

In [10]:
CATEGORIES = ['air', 'earth', 'energy', 'fire', 'light', 'power','time', 'water' ]

# 2. Training

### 2.1. Create model, loss function and optimizer

In [11]:
model = CNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

### 2.2. Set Configuration Requirements

In [12]:
num_epochs = 10
early_stopping = EarlyStopping(patience=5, min_delta=1e-4)

train_loss_list = []
train_acc_list = []
train_precision_list = []
train_f1_list = []
val_loss_list = []
val_acc_list = []
val_precision_list = []
val_f1_list = []

### 2.3. Set MLFlow and train the Model

In [13]:
MLFLOW_DIR = "mlrun"
mlflow.set_tracking_uri(f"file://{os.path.abspath(MLFLOW_DIR)}")

In [14]:
with mlflow.start_run(nested=True):
    # Initialize lists to store probabilities for ROC calculation
    all_train_probs = []
    all_val_probs = []
    
    for epoch in range(num_epochs):
        # === Training ===
        model.train()
        running_loss = 0.0
        total = 0
        correct = 0
        all_train_preds = []
        all_train_labels = []
        epoch_train_probs = []

        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_train_preds.extend(predicted.cpu().numpy())
            all_train_labels.extend(labels.cpu().numpy())
            epoch_train_probs.extend(torch.softmax(outputs, dim=1).cpu().detach().numpy())

        all_train_probs.append(epoch_train_probs)
        
        epoch_loss = running_loss / len(train_loader)
        train_acc = 100 * correct / total
        train_precision = precision_score(all_train_labels, all_train_preds, average='macro', zero_division=0)
        train_f1 = f1_score(all_train_labels, all_train_preds, average='macro')

        train_loss_list.append(epoch_loss)
        train_acc_list.append(train_acc)
        train_precision_list.append(train_precision)
        train_f1_list.append(train_f1)

        mlflow.log_metric("train_loss", epoch_loss, step=epoch)
        mlflow.log_metric("train_accuracy", train_acc, step=epoch)
        mlflow.log_metric("train_precision", train_precision, step=epoch)
        mlflow.log_metric("train_f1_score", train_f1, step=epoch)

        # === Validation ===
        model.eval()
        val_running_loss = 0.0
        val_total = 0
        val_correct = 0
        val_preds = []
        val_labels = []
        val_probs = []

        with torch.no_grad():
            for images, labels in eval_loader:
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

                val_preds.extend(predicted.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())
                val_probs.extend(torch.softmax(outputs, dim=1).cpu().numpy())

        all_val_probs.append(val_probs)
        
        val_loss = val_running_loss / len(eval_loader)
        val_acc = 100 * val_correct / val_total
        val_precision = precision_score(val_labels, val_preds, average='macro', zero_division=0)
        val_f1 = f1_score(val_labels, val_preds, average='macro')

        val_loss_list.append(val_loss)
        val_acc_list.append(val_acc)
        val_precision_list.append(val_precision)
        val_f1_list.append(val_f1)

        mlflow.log_metric("val_loss", val_loss, step=epoch)
        mlflow.log_metric("val_accuracy", val_acc, step=epoch)
        mlflow.log_metric("val_precision", val_precision, step=epoch)
        mlflow.log_metric("val_f1_score", val_f1, step=epoch)

        print(f"Epoch [{epoch+1}/{num_epochs}] "
              f"Train Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} "
              f"Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}% "
              f"Val Precision: {val_precision:.4f} | Val F1: {val_f1:.4f}")

        # Early Stopping Check
        early_stopping(val_loss, model)
        if early_stopping.early_stop:
            print(f"Early stopping triggered at epoch {epoch+1}")
            break

    # === Plot individual accuracy metrics ===
    def plot_single_metric(x_vals, y_vals, metric_name, color='blue', file_prefix=""):
        plt.figure()
        plt.plot(x_vals, y_vals, color=color)
        plt.xlabel("Epoch")
        plt.ylabel(metric_name)
        plt.title(f"{metric_name} per Epoch ({file_prefix})")
        plt.grid(True)
        path = os.path.join(RESULT_DIR, f"{file_prefix}_{metric_name.lower().replace(' ', '_')}.png")
        plt.savefig(path)
        mlflow.log_artifact(path)
        plt.close()

    # Plot individual accuracy curves
    plot_single_metric(range(1, len(train_acc_list)+1), train_acc_list, "Accuracy", 'blue', "train")
    plot_single_metric(range(1, len(val_acc_list)+1), val_acc_list, "Accuracy", 'orange', "val")

    # === Plot ROC curves ===
    def plot_roc_curve(true_labels, probs, classes, title, file_prefix):
        # Binarize the labels
        y_true = label_binarize(true_labels, classes=classes)
        n_classes = y_true.shape[1]
        
        # Compute ROC curve and ROC area for each class
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        
        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_true[:, i], probs[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
        
        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(y_true.ravel(), probs.ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
        
        # Plot all ROC curves
        plt.figure(figsize=(8, 6))
        colors = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'orange', 'purple', 'brown']
        
        for i, color in zip(range(n_classes), colors):
            plt.plot(fpr[i], tpr[i], color=color, lw=2,
                     label='Class {0} (AUC = {1:0.2f})'
                     ''.format(CATEGORIES[i], roc_auc[i]))
        
        plt.plot(fpr["micro"], tpr["micro"],
                 label='micro-average (AUC = {0:0.2f})'
                 ''.format(roc_auc["micro"]),
                 color='deeppink', linestyle=':', linewidth=4)
        
        plt.plot([0, 1], [0, 1], 'k--', lw=2)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(f'ROC Curve - {title}')
        plt.legend(loc="lower right")
        
        path = os.path.join(RESULT_DIR, f"{file_prefix}_roc_curve.png")
        plt.savefig(path)
        mlflow.log_artifact(path)
        plt.close()
        
        # Log AUC metrics
        for i in range(n_classes):
            mlflow.log_metric(f"{file_prefix}_auc_class_{CATEGORIES[i]}", roc_auc[i])
        mlflow.log_metric(f"{file_prefix}_auc_micro_avg", roc_auc["micro"])

    # Get the probabilities from the last epoch for ROC curves
    last_train_probs = np.array(all_train_probs[-1])
    last_val_probs = np.array(all_val_probs[-1])
    
    # Plot ROC curves for training and validation
    plot_roc_curve(all_train_labels, last_train_probs, range(len(CATEGORIES)), "Training", "train")
    plot_roc_curve(val_labels, last_val_probs, range(len(CATEGORIES)), "Validation", "val")

    # === Save Best Model ===
    best_model_path = "cnn_model_best.pth"
    torch.save(early_stopping.best_model_state, best_model_path)
    mlflow.log_artifact(best_model_path)
    model.load_state_dict(early_stopping.best_model_state)

    # === ONNX Export ===
    onnx_model_path = "cnn_model.onnx"
    dummy_input = torch.randn(1, 1, 28, 28)
    torch.onnx.export(model, dummy_input, onnx_model_path,
                      input_names=['input'], output_names=['output'],
                      dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}},
                      opset_version=11)
    mlflow.log_artifact(onnx_model_path)

Epoch [1/10] Train Loss: 0.5866 | Val Loss: 0.0684 Train Acc: 83.52% | Val Acc: 98.50% Val Precision: 0.9851 | Val F1: 0.9841
Epoch [2/10] Train Loss: 0.0576 | Val Loss: 0.0203 Train Acc: 98.69% | Val Acc: 99.75% Val Precision: 0.9968 | Val F1: 0.9968
Epoch [3/10] Train Loss: 0.0327 | Val Loss: 0.0227 Train Acc: 99.28% | Val Acc: 99.25% Val Precision: 0.9922 | Val F1: 0.9929
Epoch [4/10] Train Loss: 0.0166 | Val Loss: 0.0053 Train Acc: 99.50% | Val Acc: 100.00% Val Precision: 1.0000 | Val F1: 1.0000
Epoch [5/10] Train Loss: 0.0083 | Val Loss: 0.0193 Train Acc: 99.84% | Val Acc: 99.25% Val Precision: 0.9916 | Val F1: 0.9921
Epoch [6/10] Train Loss: 0.0083 | Val Loss: 0.0049 Train Acc: 99.75% | Val Acc: 99.75% Val Precision: 0.9973 | Val F1: 0.9976
Epoch [7/10] Train Loss: 0.0015 | Val Loss: 0.0022 Train Acc: 100.00% | Val Acc: 100.00% Val Precision: 1.0000 | Val F1: 1.0000
Epoch [8/10] Train Loss: 0.0010 | Val Loss: 0.0023 Train Acc: 100.00% | Val Acc: 99.75% Val Precision: 0.9973 | Val

### 2.3. Evaluate the Testset

In [16]:
model.eval()
for test_loader, dataset_name in [(test_loader, "Dataset test"), (eval_loader, "Dataset evaluation")]:
    test_preds = []
    test_labels = []
    test_probs = []

    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            probs = torch.softmax(outputs, dim=1)
            _, predicted = torch.max(outputs, 1)

            test_preds.extend(predicted.cpu().numpy())
            test_labels.extend(labels.cpu().numpy())
            test_probs.extend(probs.cpu().numpy())

    test_labels_np = np.array(test_labels)
    test_preds_np = np.array(test_preds)
    test_probs_np = np.array(test_probs)

    cm = confusion_matrix(test_labels_np, test_preds_np)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap="Reds",
                xticklabels=CATEGORIES, yticklabels=CATEGORIES)
    plt.title(f"Confusion Matrix - {dataset_name}")
    plt.xlabel('Predicted')
    plt.ylabel('True')

    mlflow.log_figure(plt.gcf(), f"{dataset_name}_confusion_matrix.png")
    plt.close()

# 4. Show the Results in MLFlow

In [17]:
!mlflow ui --backend-store-uri mlrun


[2025-07-26 10:50:09 +0200] [109465] [INFO] Starting gunicorn 23.0.0
[2025-07-26 10:50:09 +0200] [109465] [INFO] Listening at: http://127.0.0.1:5000 (109465)
[2025-07-26 10:50:09 +0200] [109465] [INFO] Using worker: sync
[2025-07-26 10:50:09 +0200] [109477] [INFO] Booting worker with pid: 109477
[2025-07-26 10:50:09 +0200] [109478] [INFO] Booting worker with pid: 109478
[2025-07-26 10:50:09 +0200] [109479] [INFO] Booting worker with pid: 109479
[2025-07-26 10:50:09 +0200] [109480] [INFO] Booting worker with pid: 109480
[2025-07-26 10:52:07 +0200] [109465] [INFO] Handling signal: int
^C

Aborted!
[2025-07-26 10:52:07 +0200] [109478] [INFO] Worker exiting (pid: 109478)
[2025-07-26 10:52:07 +0200] [109480] [INFO] Worker exiting (pid: 109480)
[2025-07-26 10:52:07 +0200] [109479] [INFO] Worker exiting (pid: 109479)
[2025-07-26 10:52:07 +0200] [109477] [INFO] Worker exiting (pid: 109477)
