## Import libraries

In [None]:
import os
import glob
import pickle
from pathlib import Path, PureWindowsPath

import numpy as np
import pandas as pd 

import matplotlib.pyplot as plt 
import seaborn as sns 

## Config

In [None]:
class cfg:

    THIS_NOTEBOOK_ID: "FR_t01e01nb04v1"

    #=================================================
    ROOT_TRAINING_RSLTS = os.path.join('..', '..', '..',
                                       "results",
                                       "exp_track",
                                      )

## Utility functions

### flatten_dict()

In [None]:
def flatten_dict(d, parent_key='', sep='_'):
    """
    Description
    -----------
        Recursive, inspect inner dict and extract values

    Return: 
    ------
        flat_dictionary: It can be use as one row of a dataframe.
    """
    
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k

        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))

    return dict(items)

### perf_last_epoch_metric_distr_info()

In [None]:
def perf_last_epoch_metric_distr_info(d_meta, 
                                      arch_name_wild_card="resnet", 
                                      pipe_names=['pipe001']): 
    
    """
    Description:
    -----------
        Selecting and extracting performances on test for each fold, and then calculating
            distribution information "'mean', 'std', 'min', '25%', '50%', '75%', 'max'" 
            
    Parameters:
    -----------
        arch_name_wild_card: to select the architecture of interest
        pipe_names: list of preprocessing image pipeline id of interest

    Return: 
        d_m: dataframe organize to show for each training track id, the test performances across folds.

    """
    
    c1 = d_meta['architecture__arch_ID'].str.lower().str.contains(arch_name_wild_card)
    c2 = d_meta['prepro_pipe__pipe_name'].isin(pipe_names)
    
    d = d_meta[c1&c2].reset_index(drop=True)
    d
    
    
    #==========================================
    # Extracting performances, fold, leakage or not leakage
    track_slct_info = []
    for r, data in d.iterrows():
    
        #- - - - - - - - - - - - - - - - - 
        # Extracting information related to performance per epoch during training process. 
        fpath = data['path_perf_dct']
        with open(fpath, 'rb') as f:
            perf_dct = pickle.load(f)
            """
            perf_dct keys: 
                ['prcsn_trn', 'f1score_trn', 'rec_trn', 'ba_trn', 'acc_trn', 'loss_trn', 
                 'prcsn_tst', 'f1score_tst', 'rec_tst', 'ba_tst', 'acc_tst', 'loss_tst', 
                 'lr', 'track_train_id', 'fold_id', 'arch_id']
    
            """
            
        
    
        #- - - - - - - - - - - - - - - - - 
        track_slct_info.append([
            data['track_train_id'], 
            data['split_strategy__split_strategy_id'],
            data['training_step__training_strategy_id'],
            data['architecture__arch_ID'],
            data['fold_id'], 
            data['path_perf_dct'], 
            data['prepro_pipe__pipe_name'],
            data['split_strategy__shuffle_instances'], #True:leakage, False: 'no_leakage'
            data["training_step__epochs"],
            perf_dct['acc_tst'][-1], # last epoch performance
            perf_dct['ba_tst'][-1], # last epoch performance
            perf_dct['f1score_tst'][-1], # last epoch performance
            perf_dct['prcsn_tst'][-1], # last epoch performance
            perf_dct['rec_tst'][-1], # last epoch performance
        ])
    
        #_____________________________________
        # break
    #_________________________________________
    
    
    #===========================================================
    # Extracting test performance of last epoch of each fold. 
    cols = ['track_train_id', 'split_id', 'trn_strtgy_id', 'arch_name', 'fold_id', 'path_perf_dct', 'prepro_pipe__pipe_name', 
            'leakage', 'training_step__epochs', 'acc_tst', 'ba_tst', 'f1score_tst', 'prcsn_tst', 'rec_tst']
    d_m = pd.DataFrame(track_slct_info, columns=cols)
    d_m
    
    #===========================================================
    # Extracting summary (across fold) for same Track_ID
    metrics_dist_info = []
    for gr, data in d_m.groupby(['track_train_id', 'prepro_pipe__pipe_name', 'arch_name', 'split_id', 'trn_strtgy_id','leakage', 'training_step__epochs']): 
        #___________________________________________________
        # Calculating test metrics performance distribution info (last training epoch)
        cols = ['acc_tst', 'ba_tst', 'f1score_tst', 'prcsn_tst', 'rec_tst']
        d = data[cols].describe().T.reset_index().rename(columns={'index':'metrics'})
        
        #___________________________________________________
        # Adding meta info
        d[['track_train_id', 'pre_pipe_id', 'arch_name', 'split_id', 'trn_strtgy_id','leakage', 'n_epochs']] = gr
        
        #___________________________________________________
        # Reorganization of table
        cols = ['track_train_id', 'pre_pipe_id', 'arch_name', 'split_id', 'trn_strtgy_id','leakage', 'n_epochs', 
                'metrics', 'count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max',]
        d = d[cols]
        d = d.rename(columns={'count':'fold_cnt'})
        
        #___________________________________________________
        # Store
        metrics_dist_info.append(d)
    
        #___________________________________________________
        # break
    #_______________________________________________________
    d_m = pd.concat(metrics_dist_info,  ignore_index=True)
    d_m = d_m.sort_values(by=['pre_pipe_id', 'arch_name', 'trn_strtgy_id',"track_train_id",'split_id', "leakage",], ignore_index=True)

    return d_m

## Extract performance during training epochs

In [None]:
#=====================================================================
# READ DATA
ROOT_DATA = cfg.ROOT_TRAINING_RSLTS

model_epo_perf = glob.glob(os.path.join(ROOT_DATA, '*', 'perform_acrss_epo', "*", "performance_across_Epochs.pkl"))
model_epo_perf

#=====================================================================
# LOOP across each perforamnce directory
perf_meta = []
for p in model_epo_perf: 

    #_____________
    # Read file
    with open(p, 'rb') as f:
        perf_dct = pickle.load(f)

    #_____________
    # Extract keys
    trnTrackID = perf_dct['track_train_id']
    foldID = perf_dct['fold_id'] # nomenclature: f"seed{random_state in Kfold fun}_fold(n of data splits)
    archID = perf_dct['arch_id']

    perf_meta.append([trnTrackID, 
                      foldID, 
                      archID, 
                      p, 
                     ])

    #_________________________________________________
    # break
#=====================================================================
# Create metadatabase for perforamnces during training epochs during training.
cols = ['track_train_id', 'fold_id', 'arch_id', 'path_perf_dct']
d_prf_meta = pd.DataFrame(perf_meta, columns=cols)
d_prf_meta.head()

In [None]:
pickle_fpath = d_prf_meta["path_perf_dct"][0]
with open(pickle_fpath, 'rb') as f:
    perf_dct = pickle.load(f)

perf_dct.keys()

**performance_across_Epochs.pkl:**
- For each epoch it calculates and stores performance metrics for training and test sets

## Extract Params for the whole ML pipeline

In [None]:
ROOT_DATA = cfg.ROOT_TRAINING_RSLTS

ml_pipe_params_paths = glob.glob(os.path.join(ROOT_DATA, '*', 'ML_pipe_params', "ML_pipe_all_params_*.pkl"))
ml_pipe_params_paths

#=========================================================================
# LOOP across each perforamnce directory
ml_parms_meta = []
for p in ml_pipe_params_paths:

    #________________________
    # Read file 
    with open(p, 'rb') as f:
        ml_pipe_parms_dct = pickle.load(f)
    #________________________
    # Flat dictionary
    flat_dct = flatten_dict(ml_pipe_parms_dct, parent_key='', sep='__')

    
    #________________________
    # Store
    ml_parms_meta.append(flat_dct)

    #________________________
    # break
#----------------------------
# if a dictionary key is missing in some Training Track IDs, due to diff architecture, training, etc
#  -- it needs to be added with Nan values. <<--- Pending
d_ml_pipe_params = pd.DataFrame(ml_parms_meta)
d_ml_pipe_params.info()

## Extract prediction (y_pred per instance) of final training epoch

In [None]:
split_rslts = "test_result.csv" # last training epoch of that training track

ROOT_DATA = cfg.ROOT_TRAINING_RSLTS

ypred_rslts_paths = glob.glob(os.path.join(ROOT_DATA, "*", "predictions", "*", split_rslts,))
ypred_rslts_paths

In [None]:
tst_rslts = pd.read_csv(ypred_rslts_paths[0])
tst_rslts["y_true"].value_counts()

In [None]:
tst_rslts["y_true"].shape

In [None]:
class_counts = tst_rslts["y_true"].value_counts(normalize=True)  # gives class proportions
class_counts

In [None]:
class_counts = tst_rslts["y_true"].value_counts(normalize=True)  # gives class proportions
expected_random_accuracy = (class_counts ** 2).sum()
expected_random_accuracy

## TRACK META - GLobal

**This DataFrame will contain all information related to:**
- Complete DL Pipelines
- path to pickle file that contains the metric performance calculated during training epochs

In [None]:
d_meta = d_prf_meta.merge(d_ml_pipe_params,
                          how="left",
                          left_on="track_train_id",
                          right_on="training_step__TrackID")
d_meta.info()

## VIZ Performances comparison

In [None]:
metric = 'acc_tst'
#==================================
metric_perf = []
for r, data in d_meta.iterrows():
    
    #---------------------------------------
    # Load performance during training epochs.
    performance_during_training_path = Path(PureWindowsPath(data["path_perf_dct"]))
    with open(performance_during_training_path, 'rb') as f:
        perf_dct = pickle.load(f)
        """
        Keys: 
            ['prcsn_trn', 'f1score_trn', 'rec_trn', 'ba_trn', 'acc_trn', 'loss_trn', 
             'prcsn_tst', 'f1score_tst', 'rec_tst', 'ba_tst', 'acc_tst', 'loss_tst', 
             'lr', 'track_train_id', 'fold_id', 'arch_id']

        """

    #---------------------------------------
    # Extracting metric performance selected during training epochs. 
    metric_perf.append((data["track_train_id"],
                        data["prepro_pipe__pipe_name"],
                        data["arch_id"],
                        perf_dct[metric],
                        data["split_strategy__shuffle_instances"]
                       ),
                       )

    #________________________________________________
    # break
#---------------------------------------------------
cols = ["track_train_id", "prepro_pipe__pipe_name","architecture__arch_ID","metric_vals", "leak"]
d_metric_perf = pd.DataFrame(metric_perf, columns=cols)
d_metric_perf

In [None]:

arch_name_wild_card="alexnet"
pipe_names = ["pipe001", "pipe002"]

c1 = d_metric_perf['architecture__arch_ID'].str.lower().str.contains(arch_name_wild_card)
c2 = d_metric_perf['prepro_pipe__pipe_name'].isin(pipe_names)

from matplotlib.lines import Line2D
custom_legend_handles1, custom_legend_handles2 = [], []
plt.figure(figsize=(16,8))
for r, data in d_metric_perf[c1&c2].sort_values(by=["leak"]).iterrows():

    #_______________________________________________________
    # Creating series label 
    leakage_exp = data["leak"]
    track = data["track_train_id"]
    
    leak = "Yes" if leakage_exp else "No "
    track = track.split("_")[1]
    serie_label_name = f"leak: {leak} | TrackID: {track}"

    #_______________________________________________________
    serie_values = data["metric_vals"]

    #_______________________________________________________
    color_line = "#6C7A89" if leakage_exp else "#D5D8DC"
    plt.plot(serie_values, label=serie_label_name, color=color_line, zorder=3)

    # last epoch performace
    sc = plt.scatter(len(serie_values) - 1, serie_values[-1], zorder=3)
    #----------------------------

    legend_label = (
        f'{serie_label_name}: '
        f'$\u2015$'  # Unicode for horizontal line
        f' (line)  '
        f'$\u25CF$'  # Unicode for filled circle with this color : sc.get_facecolor()[0]
        f' (marker)' 
    )
    custom_legend_handles1.append(Line2D(
        [0], [0],
        color=color_line,
        # marker='o',
        # markerfacecolor=sc.get_facecolor()[0],
        # label=f'{serie_label_name}: line + marker'
        label=legend_label,
    ))
    # custom_legend_handles2.append(Line2D(
    #     [0], [0],
    #     # color=color_line,
    #     marker='o',
    #     # markerfacecolor=sc.get_facecolor()[0],
    #     # label=f'{serie_label_name}: line + marker'
    #     label=legend_label,
    # ))




    #_______________________________________________________
    # break
#-----------------------------------------------------------
plt.axhline(expected_random_accuracy, color='firebrick', linestyle='--', linewidth=1, 
            # label='Random Baseline', 
           )
# Annotate the line
plt.text(
    x=7.5, 
    y=expected_random_accuracy - 0.03, 
    s=f'Random Baseline ≈ {expected_random_accuracy:.2f}',
    color='firebrick',
    fontsize=10,
    fontweight='bold'
)

plt.title("Accuracy for Test set ")
# plt.legend(handles=[(custom_legend_handles1, custom_legend_handles2)])
# plt.legend(handles=custom_legend_handles1)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.grid(color='lightgray', linestyle="--", zorder=0 )
plt.yticks(np.arange(0, 1.1, 0.1)) 
plt.ylim(0,1.05)
plt.show()

# Tracking DL pipeline hyperameters

## Split strategy | without track seed.

![image.png](attachment:d4b0224b-5f48-4872-8cc8-9c7d58d2a08b.png)

![image.png](attachment:860b13f7-75a6-4232-bfe1-bb6980e858dc.png)

## Training Strategy params

![image.png](attachment:ec8263c9-3291-41be-a3dd-393e79f67dbd.png)

![image.png](attachment:b30bbf03-64a2-4027-9104-5c2e0292f11c.png)

![image.png](attachment:2b77ff24-ced6-4039-9c41-f7b16a33a2b0.png)

![image.png](attachment:ad162d16-fde4-4af1-a504-e99279f07cb7.png)

# Report

## Vanilla CNN | Pipe001

In [None]:
d_m = perf_last_epoch_metric_distr_info(d_meta, arch_name_wild_card="vanilla", 
                                        pipe_names=['pipe001'])
d_m

## Vanilla CNN | Pipe002

In [None]:
d_m = perf_last_epoch_metric_distr_info(d_meta, arch_name_wild_card="vanilla", 
                                        pipe_names=['pipe002'])
d_m

## Vanilla CNN | Pipe003

In [None]:
d_m = perf_last_epoch_metric_distr_info(d_meta, arch_name_wild_card="vanilla", 
                                        pipe_names=['pipe003'])
d_m

## Vanilla CNN | Pipe004

In [None]:
d_m = perf_last_epoch_metric_distr_info(d_meta, arch_name_wild_card="vanilla", 
                                        pipe_names=['pipe004'])
d_m

## RESNET | PIPE001

In [None]:
d_m = perf_last_epoch_metric_distr_info(d_meta, arch_name_wild_card="resnet", 
                                        pipe_names=['pipe001'])
d_m

**TrainTrack 004 and 003**
- A clear example showing that a poorly configured algorithm can perform worse than simpler algorithms (e.g., vanilla 2D CNN), even when those simpler models suffer from data leakage.


**TrainTrack 005 and 006:**

Only Training step strategy was updated. 
- Pipe001 outputs images with raw pixel values. Even though ResNet includes batch normalization layers, small batch sizes are likely to cause instability.
- Larger batch sizes and more training epochs were used to improve stability, with the expectation of achieving over 90% accuracy with leakage, and close to 70% without leakage.

## RESNET | PIPE002

In [None]:
d_m = perf_last_epoch_metric_distr_info(d_meta, arch_name_wild_card="resnet", 
                                        pipe_names=['pipe002'])
d_m

## RESNET | PIPE003

In [None]:
d_m = perf_last_epoch_metric_distr_info(d_meta, arch_name_wild_card="resnet", 
                                        pipe_names=['pipe003'])
d_m

## RESNET | PIPE004

In [None]:
d_m = perf_last_epoch_metric_distr_info(d_meta, arch_name_wild_card="resnet", 
                                        pipe_names=['pipe004'])
d_m

## ALEXNET | PIPE001

In [None]:
d_m = perf_last_epoch_metric_distr_info(d_meta, arch_name_wild_card="alexnet", 
                                        pipe_names=['pipe001'])
d_m

**AlexNet**
- user larger kernel size in the first 2D CNN layers compared to Vanilla2DCNN and RESNET18. Therefore, initially is very sensitive to inputs (img) scales and gradients.
- Has no mechanism to normalize internal activations, so it's vulnerable to high variance in gradients
    - AlexNet’s early layers can produce very large activations, especially with unnormalized inputs (pixel values 0–255)
- Small batches gradient estimtes are more noisy, each mini-batch gives a slightly different direction for optimization
- so, smaller lr control activation, larger batch less noisy gradients estimates creating, helping to reach estability faster, but to be sure, increase number of training epochs. 

In [None]:
d_m = perf_last_epoch_metric_distr_info(d_meta, arch_name_wild_card="alexnet", 
                                        pipe_names=['pipe004'])
d_m