In [2]:
import sys
import os
import time
import logging
import argparse
import numpy as np
from datetime import datetime
from joblib import Parallel, delayed

# Set the parent directory. This assumes your notebook is in the project root or a subdirectory.
# Adjust the path as necessary.
parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(parent_dir)

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Custom imports from your project
from estimators.statistical_descriptor import Nagler_WS
from utils.dataset_management import parse_pipeline
from utils.dataset_load import shuffle_data, DatasetLoader
from utils.fold_management import FoldManagement
from utils.label_management import LabelManagement
from utils.balance_management import BalanceManagement
from utils.figures import *
from utils.files_management import *


In [7]:
rng = np.random.RandomState(seed=442)
print(rng)

RandomState(MT19937)


In [13]:
with open('/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/parameter/config_param.yml', 'r') as file:
    tmp = yaml.safe_load(file)

tmp

{'fixed_args': {'options': {'--data_path': '/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/data/dataset/dataset_AD_08200821_14Mas3Top3Phy_W15_corrected_V2.h5',
   '--fold_method': 'combinationFold',
   '--labeling_method': 'crocus',
   '--balancing_method': 'undersample',
   '--request': '(date.dt.month == 3 and date.dt.day == 1) and ((elevation > 1000) and (elevation < 2000))',
   '--shuffle_data': True,
   '--balance_data': False,
   '--import_list': ['from sklearn.svm import SVC',
    'from sklearn.neighbors import KNeighborsClassifier',
    'from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier',
    'from sklearn.linear_model import LogisticRegression',
    'from sklearn.neural_network import MLPClassifier',
    'from estimators.statistical_descriptor import *',
    'from estimators.band_transform import *'],
   '--pipeline': [[['KNN_direct'],
     ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}],
     ['BandTransformer', {'bands':

In [37]:
import_list = tmp["fixed_args"]["options"]["--import_list"]
pipeline = tmp["fixed_args"]["options"]["--pipeline"]
pipeline

[[['KNN_direct'],
  ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}],
  ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}],
  ['Hist_SAR'],
  ['KNeighborsClassifier', {'n_neighbors': 50}]],
 [['RandomForest_direct'],
  ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}],
  ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}],
  ['Hist_SAR'],
  ['RandomForestClassifier', {'n_estimators': 200}, {'criterion': 'entropy'}]],
 [['MLP_direct'],
  ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}],
  ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}],
  ['Hist_SAR'],
  ['MLPClassifier', {'alpha': 0.01}]],
 [['SVMrbf_direct'],
  ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}],
  ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}],
  ['Hist_SAR'],
  ['SVC', {'kernel': 'rbf'}, {'probability': True}]],
 [['LogisticR_direct'],
  ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}],
  ['BandTrans

In [3]:
load_h5("/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/results/pipeline/run_4/group_0/models/AdaBoost_direct/metrics.h5")

[{'f1_score_macro': 76.6,
  'f1_score_weighted': 77.19,
  'f1_score_multiclass': array([80.52, 72.68]),
  'accuracy_score': 77.25999999999999,
  'precision_score_macro': 76.79,
  'recall_score_macro': 76.46,
  'roc_auc_score': 76.46,
  'log_loss': 68.69,
  'kappa_score': 53.21,
  'confusion_matrix':          0        1
  0  81.7468  18.2532
  1  28.8181  71.1819,
  'y_true': array(['0', '0', '0', ..., '0', '1', '0'], dtype='<U21'),
  'y_pred': array([[0.51141205, 0.48858795],
         [0.50226193, 0.49773807],
         [0.49896965, 0.50103035],
         ...,
         [0.50431218, 0.49568782],
         [0.49374719, 0.50625281],
         [0.50304307, 0.49695693]]),
  'fold': 0,
  'training_time': 4.288111209869385,
  'prediction_time': 0.418013334274292},
 {'f1_score_macro': 85.68,
  'f1_score_weighted': 85.97,
  'f1_score_multiclass': array([88.03, 83.32]),
  'accuracy_score': 86.06,
  'precision_score_macro': 86.32,
  'recall_score_macro': 85.34,
  'roc_auc_score': 85.34,
  'log_loss':

In [4]:
load_h5("/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/results/pipeline/run_4/group_1/models/AdaBoost_direct/metrics.h5")

[{'f1_score_macro': 76.6,
  'f1_score_weighted': 77.19,
  'f1_score_multiclass': array([80.52, 72.68]),
  'accuracy_score': 77.25999999999999,
  'precision_score_macro': 76.79,
  'recall_score_macro': 76.46,
  'roc_auc_score': 76.46,
  'log_loss': 68.69,
  'kappa_score': 53.21,
  'confusion_matrix':          0        1
  0  81.7468  18.2532
  1  28.8181  71.1819,
  'y_true': array(['1', '0', '0', ..., '0', '0', '1'], dtype='<U21'),
  'y_pred': array([[0.49795099, 0.50204901],
         [0.50434319, 0.49565681],
         [0.50164015, 0.49835985],
         ...,
         [0.50368661, 0.49631339],
         [0.50323073, 0.49676927],
         [0.500365  , 0.499635  ]]),
  'fold': 0,
  'training_time': 10.859147071838379,
  'prediction_time': 2.210312843322754},
 {'f1_score_macro': 85.68,
  'f1_score_weighted': 85.97,
  'f1_score_multiclass': array([88.03, 83.32]),
  'accuracy_score': 86.06,
  'precision_score_macro': 86.32,
  'recall_score_macro': 85.34,
  'roc_auc_score': 85.34,
  'log_loss'