# Ensemble Predictions

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
import time
import os
from PIL import Image
from tempfile import TemporaryDirectory
from pathlib import Path
cudnn.benchmark = True
plt.ion()   # interactive mode
from tqdm import tqdm
import torchvision.models as models
from collections import Counter
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import confusion_matrix as sk_confusion_matrix
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import warnings
from sklearn.exceptions import UndefinedMetricWarning
#from pytorch_grad_cam import GradCAM
#from pytorch_grad_cam.utils.image import show_cam_on_image
#from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget


In [None]:
# Data normalization for training
data_transforms = {
    'Train_sorted': transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'Validation_sorted': transforms.Compose([
        transforms.Resize((256, 256)),
        #transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'Test_sorted': transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
}

data_dir = '/Users/inescocco/Desktop/ISIC2019'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['Train_sorted', 'Validation_sorted']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=32,
                                             shuffle=True, num_workers=0)
              for x in ['Train_sorted', 'Validation_sorted']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['Train_sorted', 'Validation_sorted']}
class_names = image_datasets['Train_sorted'].classes
print(class_names)


# Unweighted Hard Voting Ensemble Predictions

In [None]:
def get_validation_accuracy(model_path, val_dir, class_names, device):
    model = torch.load(model_path, weights_only = False)  # No need to load state_dict separately
    model = model.to(device)  # Move model to the device
    model.eval()  # Ensure model is in evaluation mode
    correct = 0
    total = 0

    # Iterating through each image in the validation directory
    for class_idx, class_name in tqdm(enumerate(class_names), desc="Processing classes", total=len(class_names)):
        class_folder = os.path.join(val_dir, class_name)
        
        # Iterating through each image file in the class folder
        for filename in os.listdir(class_folder):
            if filename.endswith('.jpg') or filename.endswith('.png'):  # Or any image extension
                image_path = os.path.join(class_folder, filename)
                img = Image.open(image_path).convert('RGB')

                # Applying the necessary transformations
                data_transforms = transforms.Compose([
                    transforms.Resize((256, 256)),
                    transforms.ToTensor(),
                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                ])
                img_tensor = data_transforms(img).unsqueeze(0)  # Add batch dimension

                # Sending image tensor to the correct device
                img_tensor = img_tensor.to(device)

                # Getting predictions
                outputs = model(img_tensor)
                _, preds = torch.max(outputs, 1)  # Get the predicted class

                # Updating correct and total counts
                total += 1
                if preds.item() == class_idx:  # Compare predicted class to the true class index
                    correct += 1

    accuracy = correct / total  # Calculate accuracy
    return accuracy

def output_validation_accuracy(model_paths):
    validation_accuracies = []
    for model_path in model_paths:
        accuracy = get_validation_accuracy(model_path, val_dir, class_names, device)
        #print(accuracy)
        validation_accuracies.append(accuracy)
        
    return validation_accuracies


In [None]:
def load_models(model_paths, device):
    models = []
    for model in model_paths:
        model = torch.load(model, weights_only = False)
        model = model.to(device)
        model.eval()  # Set to evaluation mode
        models.append(model)
    return models

def hard_ensemble(models, validation_accuracies, val_dir, class_names):
    
    data_transforms = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    all_results = []  # Store predictions for all images

    # Iterating through each image in the validation directory for prediction
    for class_idx, class_name in tqdm(enumerate(class_names), desc="Processing classes", total=len(class_names)):
        class_folder = os.path.join(val_dir, class_name)

        for filename in os.listdir(class_folder):
            if filename.endswith('.jpg') or filename.endswith('.png'):
                image_path = os.path.join(class_folder, filename)
                img = Image.open(image_path).convert('RGB')

                # Applying the necessary transformations
                data_transforms = transforms.Compose([
                    transforms.Resize((256, 256)),
                    transforms.ToTensor(),
                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                ])
                img_tensor = data_transforms(img).unsqueeze(0)  # Add batch dimension
                img_tensor = img_tensor.to(device)

                # Getting predictions from each model
                model_preds = []
                for model in models:
                    outputs = model(img_tensor)
                    _, preds = torch.max(outputs, 1)
                    model_preds.append(preds.item())

                # Applying majority voting and tie-breaking if needed
                vote_count = Counter(model_preds)
                if len(vote_count) == 1:  # No tie, just pick the class
                    final_pred = vote_count.most_common(1)[0][0]
                elif len(vote_count) == 2:  # Two predictions agree, take majority
                    final_pred = vote_count.most_common(1)[0][0]
                else:
                    # If all predictions are different, break the tie using the model with highest validation accuracy
                    final_pred = model_preds[np.argmax([validation_accuracies])]

                # Storing the results: file name, model predictions, ensemble prediction, and true label
                true_label = class_idx
                image_file = image_path
                model_preds_named = [class_names[pred] for pred in model_preds]
                final_pred_named = class_names[final_pred]
                true_label_named = class_names[true_label]

                all_results.append((image_file, model_preds_named, final_pred_named, true_label_named))

    return all_results


# Function to print predictions and results
def hard_print_predictions(results, class_names):
    for i, (image_file, model_preds, ensemble_pred, true_label) in enumerate(results):
        print(f"Image {i + 1}: {image_file}")
        
        # Printing the true label (numeric and string)
        true_label_index = class_names.index(true_label)  # Convert string label to numeric index
        print(f"  True Label: {true_label}, {true_label_index}")
        
        # Printing model predictions (both class names and numeric labels)
        print(f"  Model Predictions: {[(pred, class_names.index(pred)) for pred in model_preds]}")
        
        # Printing ensemble prediction (both class name and numeric label)
        ensemble_pred_index = class_names.index(ensemble_pred)  # Get the numeric index of ensemble prediction
        print(f"  Ensemble Prediction: {ensemble_pred}, {ensemble_pred_index}\n")

def calculate_ensemble_accuracy(results, class_names):
    """
    Calculate the accuracy of the ensemble predictions.

    Args:
        results (list): List of tuples containing (image_file, model_preds, ensemble_pred, true_label).
        class_names (list): List of class names.

    Returns:
        float: Accuracy of the ensemble predictions as a percentage.
    """
    correct = 0
    total = 0

    for _, _, ensemble_pred, true_label in results:
        # Converting the string labels to numeric indices for comparison
        ensemble_pred_index = class_names.index(ensemble_pred)
        true_label_index = class_names.index(true_label)

        # Checking if the prediction matches the true label
        if ensemble_pred_index == true_label_index:
            correct += 1
        total += 1

    # Calculating accuracy as a percentage
    accuracy = (correct / total) * 100 if total > 0 else 0
    return accuracy


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_model = "model_b7_2_epoch_7_3.pth"
best_model = torch.load(best_model, weights_only = False)
best_model = best_model.to(device)

model_paths = ["model_b7_2_epoch_7_3.pth",
"model_b6_epoch_15_3.pth",
"model_b5_epoch_14.pth"]
val_dir = '/Users/inescocco/Desktop/ISIC2019/Test_sorted'
validation_accuracies = [0.8008, 0.7434, 0.7424]
models = load_models(model_paths, device)

In [None]:
# Running ensemble voting and print results
hard_results = hard_ensemble(models, validation_accuracies, val_dir, class_names)
hard_print_predictions(hard_results, class_names)

In [None]:
ensemble_accuracy = calculate_ensemble_accuracy(hard_results, class_names)
print(f"Unweighted Hard Voting Ensemble Accuracy: {ensemble_accuracy:.2f}%")

# Weighted Hard Voting Ensemble Predictions

In [None]:
val_accuracies = [0.8008, 0.7434, 0.7424]
# CODE ADOPTED FROM: https://www.geeksforgeeks.org/how-to-normalize-an-array-in-numpy-in-python/

# explicit function to normalize array
def normalize(arr, t_min, t_max):
	norm_arr = []
	diff = t_max - t_min
	diff_arr = max(arr) - min(arr) 
	for i in arr:
		temp = (((i - min(arr))*diff)/diff_arr) + t_min
		norm_arr.append(temp)
	return norm_arr

# gives range starting from 1 and ending at 3 
array_1d = val_accuracies 
range_to_normalize = (0,1)
normalized_array_1d = normalize(array_1d, 
								range_to_normalize[0], 
								range_to_normalize[1])

# display original and normalized array
print("Original Array = ",array_1d)
print("Normalized Array = ",normalized_array_1d)


In [None]:
def hard_ensemble_weighted(models, val_accuracies, val_dir, class_names):
    """
    Perform ensemble voting with weighted majority class votes.

    Args:
        model_paths: List of paths to the model files.
        val_accuracies: List of validation accuracies for the models.
        val_dir: Path to the validation directory.
        class_names: List of class names.
        device (torch.device): Device to run models.

    Returns:
        list: List of tuples containing (image_file, model_preds, ensemble_pred, true_label).
    """

    weights = normalize_array_1d

    data_transforms = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    all_results = []  # Store predictions for all images

    # Iterating through each image in the test directory for prediction
    for class_idx, class_name in tqdm(enumerate(class_names), desc="Processing classes", total=len(class_names)):
        class_folder = os.path.join(val_dir, class_name)

        for filename in os.listdir(class_folder):
            if filename.endswith('.jpg') or filename.endswith('.png'):
                image_path = os.path.join(class_folder, filename)
                img = Image.open(image_path).convert('RGB')

                # Applying the necessary transformations
                img_tensor = data_transforms(img).unsqueeze(0)  # Add batch dimension
                img_tensor = img_tensor.to(device)

                # Getting predictions from each model
                model_preds = []
                for model in models:
                    outputs = model(img_tensor)
                    _, preds = torch.max(outputs, 1)
                    model_preds.append(preds.item())

                # Calculating weighted votes
                weighted_votes = np.zeros(len(class_names))
                for i, pred in enumerate(model_preds):
                    weighted_votes[pred] += weights[i]

                # Determining the ensemble prediction based on weighted votes
                final_pred = np.argmax(weighted_votes)

                # Storing results: file name, model predictions, ensemble prediction, and true label
                true_label = class_idx
                image_file = image_path
                model_preds_named = [class_names[pred] for pred in model_preds]
                final_pred_named = class_names[final_pred]
                true_label_named = class_names[true_label]

                all_results.append((image_file, model_preds_named, final_pred_named, true_label_named))

    return all_results


In [None]:
# Running ensemble voting and print results for weighted ensemble predictions
hard_results_weighted = hard_ensemble_weighted(models, validation_accuracies, val_dir, class_names)
hard_print_predictions(hard_results_weighted, class_names)

In [None]:
ensemble_accuracy_weights = calculate_ensemble_accuracy(hard_results_weighted, class_names)
print(f"Weighted Hard Voting Ensemble Accuracy: {ensemble_accuracy_weights:.2f}%")

# Unweighted Soft Voting Ensemble Predictions

In [None]:
def soft_ensemble(models, val_dir, class_names, device):
    data_transforms = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    all_results = []  # Store predictions for all images
    
    # Iterating through each class in the validation directory
    for class_idx, class_name in tqdm(enumerate(class_names), desc="Processing classes", total=len(class_names)):
        class_folder = os.path.join(val_dir, class_name)

        # Iterating through each image in the class folder for prediction
        for filename in os.listdir(class_folder):
            if filename.endswith('.jpg'):
                image_path = os.path.join(class_folder, filename)
                img = Image.open(image_path).convert('RGB')

                # Applying the necessary transformations
                img_tensor = data_transforms(img).unsqueeze(0).to(device)  # Add batch dimension and move to device

                # Initialising an empty list to store the model predictions (scores)
                model_scores = []
                
                # Getting predictions (raw scores or probabilities) from each model
                for model in models:
                    model.eval()  # Set the model to evaluation mode
                    with torch.no_grad():
                        outputs = model(img_tensor)  # Get the raw outputs from the model
                        model_scores.append(outputs.cpu().numpy())  # Store the model output

                # Converting model scores to a NumPy array and compute the average across all models
                model_scores = np.array(model_scores)  # Shape: [num_models, num_classes]
                avg_scores = np.mean(model_scores, axis=0)  # Compute the average across models for each class
                
                # Predicting the class with the highest average score
                final_pred = np.argmax(avg_scores)  # Get the index of the highest average score

                # Storing the result: image file, model scores, ensemble prediction, and true label
                true_label = class_idx
                image_file = image_path
        
                # Formatting each class score individually by iterating over the avg_scores array
                final_pred_named = class_names[final_pred]
                true_label_named = class_names[true_label]

                # Appending the results for this image
                all_results.append((image_file, avg_scores, final_pred_named, true_label_named))

    return all_results


def soft_print_predictions(results, class_names):
    # Iterating through the results
    for image_file, avg_scores, ensemble_pred_named, true_label_named in results:
        # Extracting true label index (class name to index)
        true_label_index = class_names.index(true_label_named)
        
        # Printing image info
        print(f"Image: {image_file}")
        print(f"  True Label: {true_label_named}, {true_label_index}")
        print(f"Classes: {class_names}")
        print(f"Averages: {avg_scores}")
    
        # Printing ensemble prediction (both class name and numeric label)
        ensemble_pred_index = class_names.index(ensemble_pred_named)  # Get the numeric index of ensemble prediction
        print(f"  Ensemble Prediction: {ensemble_pred_named}, {ensemble_pred_index}")
        print("-" * 50)  # Separator for clarity


In [None]:
# Running ensemble voting and print results for weighted ensemble predictions
soft_results = soft_ensemble(models, val_dir, class_names, device)
soft_print_predictions(soft_results, class_names)

In [None]:
ensemble_accuracy = calculate_ensemble_accuracy(soft_results, class_names)
print(f"Unweighted Soft Voting Ensemble Accuracy: {ensemble_accuracy:.2f}%")

# Weighted Soft Voting Ensemble Predictions

In [None]:
def soft_ensemble_weighted(models, val_dir, class_names, device, val_accuracies):
    data_transforms = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    all_results = []  # Store predictions for all images
    model_weights = [w / sum(val_accuracies) for w in val_accuracies]  # Normalize to sum to 1

    
    # Iterating through each class in the validation directory
    for class_idx, class_name in tqdm (enumerate(class_names), total=len(class_names), desc="Processing Classes"):
        class_folder = os.path.join(val_dir, class_name)

        # Iterating through each image in the class folder for prediction
        for filename in os.listdir(class_folder):
            if filename.endswith('.jpg') or filename.endswith('.png'):
                image_path = os.path.join(class_folder, filename)
                img = Image.open(image_path).convert('RGB')

                # Applying the necessary transformations
                img_tensor = data_transforms(img).unsqueeze(0).to(device)  # Add batch dimension and move to device

                # Initialising an empty list to store the model predictions (scores)
                model_scores = []
                
                # Getting predictions (raw scores or probabilities) from each model
                for model in models:
                    model.eval()  # Set the model to evaluation mode
                    with torch.no_grad():
                        outputs = model(img_tensor)  # Get the raw outputs from the model
                        model_scores.append(outputs.cpu().numpy())  # Store the model output

                # Converting model scores to a NumPy array
                model_scores = np.array(model_scores)  # Shape: [num_models, num_classes]

                # Computing the weighted average across models for each class
                avg_scores = np.average(model_scores, axis=0, weights=model_weights)  # Weighted average
                
                # Predicting the class with the highest average score
                final_pred = np.argmax(avg_scores)  # Get the index of the highest average score

                # Storing the result: image file, model scores, ensemble prediction, and true label
                true_label = class_idx
                image_file = image_path
        
                # Formatting each class score individually by iterating over the avg_scores array
                final_pred_named = class_names[final_pred]
                true_label_named = class_names[true_label]

                # Appending the results for this image
                all_results.append((image_file, avg_scores, final_pred_named, true_label_named))

    return all_results


def soft_print_predictions(results, class_names):
    # Iterating through the results
    for image_file, avg_scores, ensemble_pred_named, true_label_named in tqdm(results, desc="Printing Predictions"):
        # Extracting true label index (class name to index)
        true_label_index = class_names.index(true_label_named)
        
        # Printing image info
        print(f"Image: {image_file}")
        print(f"  True Label: {true_label_named}, {true_label_index}")
        print(f"Classes: {class_names}")
        print(f"Averages: {avg_scores}")
    
    
        # Printing ensemble prediction (both class name and numeric label)
        ensemble_pred_index = class_names.index(ensemble_pred_named)  # Get the numeric index of ensemble prediction
        print(f"  Ensemble Prediction: {ensemble_pred_named}, {ensemble_pred_index}")
        print("-" * 50)  # Separator for clarity



In [None]:
# Running ensemble voting and print results for weighted ensemble predictions
soft_results_weighted = soft_ensemble_weighted(models, val_dir, class_names, device, validation_accuracies)
soft_print_predictions(soft_results_weighted, class_names)

In [None]:
ensemble_accuracy = calculate_ensemble_accuracy(soft_results_weighted, class_names)
print(f"Weighted Soft Voting Ensemble Accuracy: {ensemble_accuracy:.2f}%")

## Comparing Ensembles

In [None]:
# Functioning to calculate and print evaluation metrics
def evaluate_ensemble(results, class_names):
    # Extracting true labels and predictions
    y_true = [class_names.index(true_label) for _, _, _, true_label in results]
    y_pred = [class_names.index(pred_label) for _, _, pred_label, _ in results]
    
    # Checking for missing predictions
    missing_predictions = sum(1 for pred in y_pred if pred not in range(len(class_names)))


    # Computing confusion matrix
    cm_matrix = sk_confusion_matrix(y_true, y_pred)  # Use sklearn's confusion_matrix
    disp = ConfusionMatrixDisplay(confusion_matrix=cm_matrix, display_labels=class_names)
    disp.plot(cmap=plt.cm.Blues)  # Use a blue color map for the plot
    plt.title("Confusion Matrix")
    plt.show()
    
    # Suppressing warnings for undefined metrics
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UndefinedMetricWarning)
        
        # Classification Report
        report = classification_report(y_true, y_pred, target_names=class_names, zero_division=0)
        print("\nClassification Report:")
        print(report)

    # Overall Accuracy
    accuracy = accuracy_score(y_true, y_pred)
    print(f"Accuracy: {accuracy:.4f}")
    
    # Printing number of missing predictions
    print(f"\nNumber of missing predictions: {missing_predictions}")


In [None]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix as sk_confusion_matrix, accuracy_score, ConfusionMatrixDisplay
from sklearn.exceptions import UndefinedMetricWarning

# Function to calculate and print evaluation metrics
def evaluate_ensemble(results, class_names):
    # Extracting true labels and predictions
    y_true = [class_names.index(true_label) for _, _, _, true_label in results]
    y_pred = [class_names.index(pred_label) for _, _, pred_label, _ in results]

    # Checking for missing predictions
    missing_predictions = sum(1 for pred in y_pred if pred not in range(len(class_names)))

    # Computing confusion matrix
    cm_matrix = sk_confusion_matrix(y_true, y_pred)  # Use sklearn's confusion_matrix
    disp = ConfusionMatrixDisplay(confusion_matrix=cm_matrix, display_labels=class_names)
    disp.plot(cmap=plt.cm.Blues)  # Use a blue color map for the plot
    plt.title("Confusion Matrix")
    plt.show()

    # Suppressing warnings for undefined metrics
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UndefinedMetricWarning)
        
        # Generating the classification report
        report = classification_report(y_true, y_pred, target_names=class_names, zero_division=0, output_dict=True)

        # Converting the classification report to a DataFrame for better visualization
        report_df = pd.DataFrame(report).transpose()
        report_df = report_df.round(4)

        # Plotting the classification report as a heatmap with gradient coloring
        plt.figure(figsize=(10, 6))
        sns.heatmap(report_df.iloc[:, :-1].astype(float), annot=True, cmap='Blues', fmt='.4f', cbar=True, annot_kws={"size": 12})
        plt.title("Classification Report", fontsize=16)
        plt.tick_params(axis='both', labelsize=14)
        plt.show()

        # Printing detailed classification report
        print("\nDetailed Classification Report:")
        print(report_df)

    # Overall Accuracy
    accuracy = accuracy_score(y_true, y_pred)
    print(f"Accuracy: {accuracy:.4f}")

    # Printing number of missing predictions
    print(f"\nNumber of missing predictions: {missing_predictions}")


### Unweighted Hard Voting Ensemble

In [None]:
# Evaluating results
evaluate_ensemble(hard_results, class_names)

### Weighted Hard Voting Ensemble

In [None]:
# Evaluating results
evaluate_ensemble(hard_results_weighted, class_names)

### Unweighted Soft Voting Ensemble

In [None]:
# Evaluating results
evaluate_ensemble(soft_results, class_names)

### Weighted Soft Voting Ensemble

In [None]:
# Evaluating results
evaluate_ensemble(soft_results_weighted, class_names)