### k_fold.ipynb
- Alessandro Trincone
- Mario Gabriele Carofano

> ...

In [None]:
import auxfunc
import constants
from artificial_neural_network import NeuralNetwork
from training_report import TrainingReport
import dataset_functions as df
import plot_functions as pf

import numpy as np
import pprint
import time
from datetime import datetime
import copy
import gc

...

In [None]:
def grid_search():
    """
        ...

        Parameters:
        -   ... : ...

        Returns:
        -   ... : ...
    """

# end

...

In [None]:
def random_search():
    """
        ...

        Parameters:
        -   ... : ...

        Returns:
        -   ... : ...
    """

# end

...

In [None]:
def k_fold_cross_validation(
        out_directory : str,
        Xtrain : list[np.ndarray],
        Ytrain : list[np.ndarray],
        k : int = constants.DEFAULT_K_FOLD_VALUE
) -> tuple[float, float, float, float]:
    
    """
        E' una tecnica di validazione che utilizza una parte indipendente del training set per la fase di validazione. Si utilizza per la selezione degli iper-parametri che restituiscono il minor errore di validazione sull'addestramento del modello.

        Parameters:
        -   out_directory : la directory di output dove salvare i grafici della k-fold cross validation.
        -   Xtrain : la matrice di esempi da classificare.
        -   Ytrain : la matrice di etichette corrispondenti agli esempi (ground truth).
        -   k : e' un numero intero che indica in quante fold dividere il training set.

        Returns:
        -   err_mean : la media dei valori di errore di validazione su tutti i modelli addestrati.
        -   err_std : la deviazione standard dei valori di errore di validazione su tutti i modelli addestrati.
        -   acc_mean : la media delle percentuali di accuracy di validazione su tutti i modelli addestrati.
        -   acc_std : la deviazione standard delle percentuali di accuracy di validazione su tutti i modelli addestrati.
    """

    fold_reports = []

    Xfolds, Yfolds = df.split_dataset(Xtrain, Ytrain, k)

    for i in range(k):

        print(f"\nFold {i+1} di {k}")

        net = NeuralNetwork(
            784, [64, 10],
            l_act_funs=[auxfunc.leaky_relu, auxfunc.identity],
            e_fun=auxfunc.cross_entropy_softmax
        )

        training_fold = np.concatenate([fold for j, fold in enumerate(Xfolds) if j != i])
        training_labels = np.concatenate([fold for j, fold in enumerate(Yfolds) if j != i])
        validation_fold = Xfolds[i]
        validation_labels = Yfolds[i]

        # if i == 0:
        #     best_fold_params = {
        #         "Fold"      : i+1,
        #         "Weights"   : copy.deepcopy(net.weights),
        #         "Biases"    : copy.deepcopy(net.biases),
        #         "Report"    : copy.deepcopy(net.training_report),
        #         "History"   : copy.deepcopy(history_report)
        #     }

        net.train(
            training_fold, training_labels,
            validation_fold, validation_labels,
        )

        fold_reports.append({
            "Fold"      : i+1,
            "Report"    : copy.deepcopy(net.training_report)
        })

        # # Si scelgono i parametri per la rete che mostrano il minor errore di validazione.
        # if i == 0 or net.training_report.validation_error < best_fold_params["Report"].validation_error:
        #     best_fold_params = {
        #         "Fold"      : i+1,
        #         "Weights"   : copy.deepcopy(net.weights),
        #         "Biases"    : copy.deepcopy(net.biases),
        #         "Report"    : copy.deepcopy(net.training_report),
        #         "History"   : copy.deepcopy(history_report)
        #     }

        del net
        del training_fold, training_labels
        del validation_fold, validation_labels
        gc.collect()

        if constants.DEBUG_MODE:
            break

    # end for i

    # Disegno di un istogramma con le percentuali di accuracy, media e deviazione standard su tutte le fold.
    accs = [r['Report'].validation_accuracy for r in fold_reports]
    acc_mean = np.mean(accs)
    acc_std = np.std(accs)
    pf.plot_k_fold_accuracy_scores(out_directory, fold_reports, acc_mean, acc_std)

    # Disegno di un istogramma con i valori di errore, media e deviazione standard su tutte le fold.
    errs = [r['Report'].validation_error for r in fold_reports]
    err_mean = np.mean(errs)
    err_std = np.std(errs)
    pf.plot_k_fold_error_scores(out_directory, fold_reports, err_mean, err_std)

    # print(f"\nMiglior rete (fold): {best_fold_params['Fold']}")
    # print(repr(best_fold_params["Report"]))
    
    # # Disegno dei grafici delle curve di errore
    # pf.plot_error(
    #     out_directory,
    #     "best-fold",
    #     [r.training_error for r in history_report],
    #     [r.validation_error for r in history_report]
    # )

    # # Disegno dei grafici delle curve di accuracy
    # pf.plot_accuracy(
    #     out_directory,
    #     "best-fold",
    #     [r.training_accuracy for r in history_report],
    #     [r.validation_accuracy for r in history_report]
    # )

    # """
    #     Dopo aver individuato la configurazione di parametri (weights, biases) per cui il modello ottiene il minor errore di validazione, si riaddestra il modello sul training set completo.
    # """
    # net.weights = copy.deepcopy(best_fold_params["Weights"])
    # net.biases = copy.deepcopy(best_fold_params["Biases"])
    # history_report = best_fold_params["History"] + net.train(Xtrain, Ytrain)

    # # Disegno dei grafici delle curve di errore / accuracy
    # pf.plot_error(out_directory, "all-train", [r.training_error for r in history_report])
    # pf.plot_accuracy(out_directory, "all-train", [r.training_accuracy for r in history_report])

    # # Si salvano i parametri della rete addestrata
    # net.save_network_to_file(out_directory)

    # return net
    return err_mean, err_std, acc_mean, acc_std

# end

...

In [None]:
idTrain, Xtrain, Ytrain, idTest, Xtest, Ytest = df.loadDataset(constants.COPPIE_TRAINING, constants.COPPIE_TEST)

...

In [None]:
out_directory = constants.OUTPUT_DIRECTORY + datetime.now().strftime(constants.OUTPUT_DATE_TIME_FORMAT) + "/"
# print(f"\nK-fold cross-validation iniziato: {datetime.now().strftime(constants.PRINT_DATE_TIME_FORMAT)}")
# start_time = time.time()

# net = k_fold_cross_validation(out_directory, Xtrain, Ytrain)

# end_time = time.time()
# tot_time = end_time - start_time

# print(f"\nK-fold cross-validation completato: {datetime.now().strftime(constants.PRINT_DATE_TIME_FORMAT)}")
# print(f"Tempo trascorso: {tot_time:.3f} secondi")

Se si vuole utilizzare una rete già addestrata con i seguenti parametri, iper-parametri e metriche di valutazione: <br>
NeuralNetwork( <br>
&emsp;depth = 2, <br>
&emsp;input_size = 784, <br>
&emsp;network_layers = [<br>
&emsp;Layer( <br>
&emsp;&emsp;size = 64, <br>
&emsp;&emsp;act_fun = <function leaky_relu at 0x10791c4c0>, <br>
&emsp;&emsp;inputs_size = (12500, 784) <br>
&emsp;&emsp;weights_shape = (64, 784), <br>
&emsp;&emsp;biases_shape = (64, 1) <br>
&emsp;), <br>
&emsp;Layer( <br>
&emsp;&emsp;size = 10, <br>
&emsp;&emsp;act_fun = <function identity at 0x107dda680>, <br>
&emsp;&emsp;inputs_size = (12500, 64) <br>
&emsp;&emsp;weights_shape = (10, 64), <br>
&emsp;&emsp;biases_shape = (10, 1) <br>
&emsp;)], <br>
&emsp;err_fun = <function cross_entropy_softmax at 0x107dda8c0>, <br>
&emsp;training_report = TrainingReport( <br>
&emsp;&emsp;num_epochs = 100, <br>
&emsp;&emsp;elapsed_time = 184.053 secondi, <br>
&emsp;&emsp;training_error = 1.83840, <br>
&emsp;&emsp;training_accuracy = 79.22 %, <br>
&emsp;&emsp;validation_error = 0.00000 <br>
&emsp;&emsp;validation_accuracy = 0.00 % <br>
&emsp;) <br>
)

In [None]:
# net = NeuralNetwork.load_network_from_file("../output/2024-06-13_17-31/params.pkl")
net = NeuralNetwork(
    784, [64, 10],
    l_act_funs=[auxfunc.leaky_relu, auxfunc.identity],
    e_fun=auxfunc.cross_entropy_softmax
)

history_report = net.train(Xtrain, Ytrain, rprop=False)
pf.plot_error(out_directory, "back-prop", [r.training_error for r in history_report])
pf.plot_accuracy(out_directory, "back-prop", [r.training_accuracy for r in history_report])

# print(repr(net))

...

In [None]:
net.test(
    out_directory,
    idTest, Xtest, Ytest,
    plot_mode=constants.PlotTestingMode.ALL
)

# net.predict(idTest, Xtest)