## D7041E - Mini Project Group 9 - Elvira Forslund Widenroth & Jenny Sundström

In [73]:
import copy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold
import itertools
from sklearn.metrics import precision_recall_fscore_support

In [74]:
# Function to define the neural network
class Multiclass(nn.Module):
    def __init__(self, input_size, hidden_neurons, hidden_layers, output_size):
        super().__init__()
        layers = []
        layers.append(nn.Linear(input_size, hidden_neurons))
        layers.append(nn.ReLU())
        for _ in range(hidden_layers - 1):
            layers.append(nn.Linear(hidden_neurons, hidden_neurons))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_neurons, output_size))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [75]:
# Function for training the model
def train_model(model, optimizer, X_train, y_train, num_epochs, batch_size, loss_fn):
    model.train()
    for epoch in range(num_epochs):
        indices = torch.randperm(len(X_train))
        X_train_shuffled, y_train_shuffled = X_train[indices], y_train[indices]

        for i in range(0, len(X_train), batch_size):
            X_batch = X_train_shuffled[i:i + batch_size]
            y_batch = y_train_shuffled[i:i + batch_size]

            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, torch.argmax(y_batch, 1))
            loss.backward()
            optimizer.step()

In [76]:
# Function to evaluate the model on a dataset and calculate accuracy
def evaluate_model(model, X, y):
    model.eval()
    y_pred = model(X)
    acc = (torch.argmax(y_pred, 1) == torch.argmax(y, 1)).float().mean().item()
    return acc

In [77]:
#Function to perform nested k-fold cross-validation
def nested_cross_validation(X, y, hyperparameters, input_size, output_size, outer_splits, inner_splits, seed):
    y_labels = np.argmax(y, axis=1)
    outer_kfold = StratifiedKFold(n_splits=outer_splits, shuffle=True, random_state=seed)
    outer_test_accuracies = []
    outer_test_f1_scores = []
    outer_test_precisions = []
    outer_test_recalls = []
    all_best_hyperparameters = [] 


    for outer_train_index, outer_test_index in outer_kfold.split(X,y_labels):
        X_outer_train, X_outer_test = X[outer_train_index], X[outer_test_index]
        y_outer_train, y_outer_test = y[outer_train_index], y[outer_test_index]

        y_labels_outer_train = np.argmax(y_outer_train, axis=1)

        best_acc = -np.inf
        best_weights = None
        best_hyperparameters = None

        inner_kfold = StratifiedKFold(n_splits=inner_splits, shuffle=True, random_state=seed)

        for hyperparam_set in hyperparameters:
            lr = hyperparam_set['learning_rate']
            hidden_neurons = hyperparam_set['hidden_neurons']
            hidden_layers = hyperparam_set['hidden_layers']
            num_epochs = hyperparam_set['num_epochs']
            loss_fn = hyperparam_set['loss_function']

            model = Multiclass(input_size=input_size, hidden_neurons=hidden_neurons, hidden_layers=hidden_layers, output_size=output_size)
            optimizer = optim.Adam(model.parameters(), lr=lr)

            for inner_train_index, inner_val_index in inner_kfold.split(X_outer_train, y_labels_outer_train):
                X_inner_train, X_inner_val = X_outer_train[inner_train_index], X_outer_train[inner_val_index]
                y_inner_train, y_inner_val = y_outer_train[inner_train_index], y_outer_train[inner_val_index]

                train_model(model, optimizer, X_inner_train, y_inner_train, num_epochs, batch_size, loss_fn)

                acc_val = evaluate_model(model, X_inner_val, y_inner_val)

                if acc_val > best_acc:
                    best_acc = acc_val
                    best_hyperparameters = hyperparam_set

        model = Multiclass(input_size=input_size, hidden_neurons=best_hyperparameters['hidden_neurons'], hidden_layers=best_hyperparameters['hidden_layers'], output_size=output_size)
        optimizer = optim.Adam(model.parameters(), lr=best_hyperparameters['learning_rate'])

        train_model(model, optimizer, X_outer_train, y_outer_train, best_hyperparameters['num_epochs'], batch_size, best_hyperparameters['loss_function'])

        acc_test = evaluate_model(model, X_outer_test, y_outer_test)
        f1, precision, recall, _ = precision_recall_fscore_support(y_outer_test.argmax(axis=1), model(X_outer_test).argmax(dim=1), average='weighted', zero_division=1)

        outer_test_accuracies.append(acc_test)
        outer_test_f1_scores.append(f1)
        outer_test_precisions.append(precision)
        outer_test_recalls.append(recall)
        all_best_hyperparameters.append(best_hyperparameters) 


        print(f"Outer Fold: Test Accuracy={acc_test * 100:.1f}%, F1 Score={f1:.4f}, Precision={precision:.4f}, Recall={recall:.4f} with Hyperparameters={best_hyperparameters}")

    return outer_test_accuracies, all_best_hyperparameters


In [78]:
#Function to Pre-Process the data
def preprocess_data(X, y, correlation_threshold=0.95):
    correlated_features = set()
    correlation_matrix = np.corrcoef(X, rowvar=False)
    num_features = correlation_matrix.shape[0]

    for i in range(num_features):
        for j in range(i + 1, num_features):
            if abs(correlation_matrix[i, j]) > correlation_threshold:
                correlated_features.add(j)

    X_no_correlation = np.delete(X, list(correlated_features), axis=1)
    selector = VarianceThreshold()
    X_no_zero_variance = selector.fit_transform(X_no_correlation)
    X_tensor = torch.tensor(X_no_zero_variance, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.float32)

    return X_tensor, y_tensor


### Train and Evaluate the Model on a chosen Dataset

In [79]:
wine = load_wine()

data = pd.DataFrame(data=wine.data, columns=wine.feature_names)
data['target'] = wine.target

X = data.iloc[:, :-1].values
y = pd.get_dummies(data.iloc[:, -1]).values

X, y = preprocess_data(X, y)
batch_size = 30
input_size = 13
output_size = 3

### Method for Tuning Hyperparameters
In order to choose the Hyper Parameters, we started by reading about the values for the Neural Network. We then initialize the hyperparameters as:

**Lr** :Between 0-1

**Hidden Neurons**: Between output size & feature size (3-13, start with 8)

**Hidden Layers**: 1-2 for small number of features (start with 2)

**Number of Epochs**: Around 10

**Loss Function**: Cross Entropy

We then go through the hyper parameters one by one and tune them accordingly to achive higher accuracy. The performance of each test is documented by Accuracy, F1 score, Recall and Precision. However, the model uses Accuracy as the metric during training. 

The tests for Hyper Parameter Tuning is done by running 5 tests on the data, where a 3-time fold is applied (5x3 = 15 tests).

In [80]:
num_experiments = 5
outer_splits=3 #k-folds outer
inner_splits=3 #k-folds inner
seed = 42 #same seed used when tuning Hyper Parameters

### Varying Learning Rate

In [81]:
hyperparameters_to_tune = [
    {'learning_rate': 0.00001, 'hidden_neurons': 8, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.0001, 'hidden_neurons': 8, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.001, 'hidden_neurons': 8, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 8, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.1, 'hidden_neurons': 8, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.99, 'hidden_neurons': 8, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},

]

all_best_hyperparameters = []

for _ in range(num_experiments):
    test_accuracies_manual_tuning, best_hyperparameters_each_fold = nested_cross_validation(X, y, hyperparameters_to_tune, input_size, output_size, outer_splits=outer_splits, inner_splits=inner_splits, seed=seed)
    all_best_hyperparameters.append(best_hyperparameters_each_fold)
    
learning_rate_votes = {}
for results in all_best_hyperparameters:
    for hyperparameters in results:
        learning_rate = hyperparameters['learning_rate']
        learning_rate_votes[learning_rate] = learning_rate_votes.get(learning_rate, 0) + 1
        
best_learning_rate = max(learning_rate_votes, key=learning_rate_votes.get)
average_test_accuracy = np.mean(test_accuracies_manual_tuning)
print()
print(f"Votes for Learning Rates: {learning_rate_votes}")
print(f"The Learning Rate with the most votes: {best_learning_rate}")
print(f"Average Test Accuracy Across Folds: {average_test_accuracy * 100:.1f}%")


Outer Fold: Test Accuracy=40.0%, F1 Score=0.7600, Precision=0.4000, Recall=0.2286 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 8, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=33.9%, F1 Score=0.7759, Precision=0.3390, Recall=0.1716 with Hyperparameters={'learning_rate': 0.001, 'hidden_neurons': 8, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=62.7%, F1 Score=0.7285, Precision=0.6271, Recall=0.5288 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 8, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=91.7%, F1 Score=0.9241, Precision=0.9167, Recall=0.9171 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 8, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=71.2%, F1 Score=0.7233, Precision=0.7119, Recall=0.6629 with Hyperparameters={'le

**We ran this a couple of times to find that the learning rates that recieved the highest amount of votes for the best accuracy alternates between lr = 0.01 and lr = 0.001. We choose to continue the experiment and tuning of hyperparameters with lr = 0.01.**

### Varying number of Hidden Neurons

In [87]:
hyperparameters_to_tune = [
    {'learning_rate': 0.01, 'hidden_neurons': 1, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 3, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 5, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 8, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 10, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},

]
all_best_hyperparameters = []

for _ in range(num_experiments):
    test_accuracies_manual_tuning, best_hyperparameters_each_fold = nested_cross_validation(X, y, hyperparameters_to_tune, input_size, output_size, outer_splits=outer_splits, inner_splits=inner_splits, seed=seed)
    all_best_hyperparameters.append(best_hyperparameters_each_fold)
    
hidden_neurons_votes = {}
for results in all_best_hyperparameters:
    for hyperparameters in results:
        hidden_neurons = hyperparameters['hidden_neurons']
        hidden_neurons_votes[hidden_neurons] = hidden_neurons_votes.get(hidden_neurons, 0) + 1
        
best_hidden_neurons = max(hidden_neurons_votes, key=hidden_neurons_votes.get)
average_test_accuracy = np.mean(test_accuracies_manual_tuning)
print()
print(f"Votes for Hidden Neurons: {hidden_neurons_votes}")
print(f"The number of Hidden Neurons with the most votes: {best_hidden_neurons}")
print(f"Average Test Accuracy Across Folds: {average_test_accuracy * 100:.1f}%")

Outer Fold: Test Accuracy=40.0%, F1 Score=0.7600, Precision=0.4000, Recall=0.2286 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=86.4%, F1 Score=0.8890, Precision=0.8644, Recall=0.8605 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 8, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=40.7%, F1 Score=0.7587, Precision=0.4068, Recall=0.2352 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 5, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=61.7%, F1 Score=0.7293, Precision=0.6167, Recall=0.5259 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=91.5%, F1 Score=0.9162, Precision=0.9153, Recall=0.9153 with Hyperparameters={'l

**Like previously we ran this part several times to conclude that the amount of hidden neurons that got the highest vote most commonly was 8 or 13 hidden neurons. We choose to continue with 13 hidden neurons.**

### Varying numbers of Hidden Layers

In [83]:
hyperparameters_to_tune = [
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 3, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 4, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 5, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},

]
all_best_hyperparameters = []

for _ in range(num_experiments):
    test_accuracies_manual_tuning, best_hyperparameters_each_fold = nested_cross_validation(X, y, hyperparameters_to_tune, input_size, output_size, outer_splits=outer_splits, inner_splits=inner_splits, seed=seed)
    all_best_hyperparameters.append(best_hyperparameters_each_fold)
    
hidden_layers_votes = {}

for results in all_best_hyperparameters:
    for hyperparameters in results:
        hidden_layers = hyperparameters['hidden_layers']
        hidden_layers_votes[hidden_layers] = hidden_layers_votes.get(hidden_layers, 0) + 1
        
best_hidden_layers = max(hidden_layers_votes, key=hidden_layers_votes.get)
average_test_accuracy = np.mean(test_accuracies_manual_tuning)
print()
print(f"Votes for Number of Hidden Layers: {hidden_layers_votes}")
print(f"The Number of Hidden Layers with the most votes: {best_hidden_layers}")
print(f"Average Test Accuracy Across Folds: {average_test_accuracy * 100:.1f}%")

Outer Fold: Test Accuracy=86.7%, F1 Score=0.8747, Precision=0.8667, Recall=0.8665 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=91.5%, F1 Score=0.9153, Precision=0.9153, Recall=0.9149 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 50, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=74.6%, F1 Score=0.8069, Precision=0.7458, Recall=0.7545 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 50, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=68.3%, F1 Score=0.6905, Precision=0.6833, Recall=0.6840 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 2, 'num_epochs': 50, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=91.5%, F1 Score=0.9153, Precision=0.9153, Recall=0.9149 with Hyperparameters={

**After trying different amount of hidden layers and re-running the votes a couple of times, it seems the most commonly voted amount of layers was 1 and the experiment continues with this setting.**

### Varying number of Epochs

In [84]:
hyperparameters_to_tune = [
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 10, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 50, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 75, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 100, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 125, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 150, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 200, 'loss_function': nn.CrossEntropyLoss()},

]
all_best_hyperparameters = []

for _ in range(num_experiments):
    test_accuracies_manual_tuning, best_hyperparameters_each_fold = nested_cross_validation(X, y, hyperparameters_to_tune, input_size, output_size, outer_splits=outer_splits, inner_splits=inner_splits, seed=seed)
    all_best_hyperparameters.append(best_hyperparameters_each_fold)
    
num_epochs_votes = {}

for results in all_best_hyperparameters:
    for hyperparameters in results:
        num_epochs = hyperparameters['num_epochs']
        num_epochs_votes[num_epochs] = num_epochs_votes.get(num_epochs, 0) + 1
        
best_num_epochs = max(num_epochs_votes, key=num_epochs_votes.get)
average_test_accuracy = np.mean(test_accuracies_manual_tuning)
print()
print(f"Votes for Number of Epochs: {num_epochs_votes}")
print(f"The Number of Epochs with the most votes: {best_num_epochs}")
print(f"Average Test Accuracy Across Folds: {average_test_accuracy * 100:.1f}%")

Outer Fold: Test Accuracy=95.0%, F1 Score=0.9503, Precision=0.9500, Recall=0.9497 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 75, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=93.2%, F1 Score=0.9338, Precision=0.9322, Recall=0.9326 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 100, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=89.8%, F1 Score=0.9052, Precision=0.8983, Recall=0.8970 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 125, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=93.3%, F1 Score=0.9344, Precision=0.9333, Recall=0.9327 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 75, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=91.5%, F1 Score=0.9183, Precision=0.9153, Recall=0.9157 with Hyperparameters

**The best number of Epochs seems to vary between 50, 100 and 200 for different runs. We choose 100.**

### Varying Loss Function

In [85]:
hyperparameters_to_tune = [
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 100, 'loss_function': nn.CrossEntropyLoss()},
    {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 100, 'loss_function': nn.NLLLoss()},

]
all_best_hyperparameters = []
all_average_accuracies = []

for _ in range(num_experiments):
    test_accuracies_manual_tuning, best_hyperparameters_each_fold = nested_cross_validation(X, y, hyperparameters_to_tune, input_size, output_size, outer_splits=outer_splits, inner_splits=inner_splits, seed=seed)
    all_average_accuracies.append(test_accuracies_manual_tuning)
    all_best_hyperparameters.append(best_hyperparameters_each_fold)
    
loss_function_votes = {}

for results in all_best_hyperparameters:
    for hyperparameters in results:
        loss_function = hyperparameters['loss_function']
        loss_function_votes[loss_function] = loss_function_votes.get(loss_function, 0) + 1
        
best_loss_function = max(loss_function_votes, key=loss_function_votes.get)
average_test_accuracy = np.mean(all_average_accuracies)
print()
print(f"Votes for Loss Function: {loss_function_votes}")
print(f"The Loss Function with the most votes: {best_loss_function}")
print(f"Average Test Accuracy Across Folds: {average_test_accuracy * 100:.1f}%")

Outer Fold: Test Accuracy=95.0%, F1 Score=0.9523, Precision=0.9500, Recall=0.9499 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 100, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=88.1%, F1 Score=0.8923, Precision=0.8814, Recall=0.8804 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 100, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=94.9%, F1 Score=0.9548, Precision=0.9492, Recall=0.9494 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 100, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=96.7%, F1 Score=0.9676, Precision=0.9667, Recall=0.9667 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 100, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=66.1%, F1 Score=0.4956, Precision=0.6610, Recall=0.8370 with Hyperparamete

**Cross Entropy gives the best accuracy for most tests. This is chosen.**

### Test model on different Seeds
We now use the selected Hyper Parameters and run the Model with these and vary the seeds in order to report some statistics on the Model.

In [86]:
all_best_hyperparameters = []
all_test_accuracies = []

seed_list = [1, 42, 123, 987]

for seed in seed_list:
    hyperparameters_to_tune = [
        {'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 100, 'loss_function': nn.CrossEntropyLoss()},
    ]

    test_accuracies_manual_tuning, best_hyperparameters_each_fold = nested_cross_validation(X, y, hyperparameters_to_tune, input_size, output_size, outer_splits=outer_splits, inner_splits=inner_splits, seed=seed)
    all_best_hyperparameters.append(best_hyperparameters_each_fold)
    all_test_accuracies.append(test_accuracies_manual_tuning)

    average_test_accuracy = np.mean(test_accuracies_manual_tuning)
    print()
    print(f"Seed {seed}: Average Test Accuracy Across Folds: {average_test_accuracy * 100:.1f}%")
    print()

overall_average_test_accuracy = np.mean([np.mean(acc) for acc in all_test_accuracies])
print(f"\nOverall Average Test Accuracy Across Seeds: {overall_average_test_accuracy * 100:.1f}%")


Outer Fold: Test Accuracy=98.3%, F1 Score=0.9840, Precision=0.9833, Recall=0.9832 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 100, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=89.8%, F1 Score=0.9002, Precision=0.8983, Recall=0.8975 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 100, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=89.8%, F1 Score=0.8989, Precision=0.8983, Recall=0.8981 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 100, 'loss_function': CrossEntropyLoss()}

Seed 1: Average Test Accuracy Across Folds: 92.7%

Outer Fold: Test Accuracy=96.7%, F1 Score=0.9676, Precision=0.9667, Recall=0.9667 with Hyperparameters={'learning_rate': 0.01, 'hidden_neurons': 13, 'hidden_layers': 1, 'num_epochs': 100, 'loss_function': CrossEntropyLoss()}
Outer Fold: Test Accuracy=89.8%, F1 Score=0.9086

### Conclusion
This final test gives an accuracy of around 90% for the tuned model. 