In [1]:
import sys
import os
import time
import logging
import argparse
import numpy as np
from datetime import datetime
# from joblib import Parallel, delayed
import h5py
# Set the parent directory. This assumes your notebook is in the project root or a subdirectory.
# Adjust the path as necessary.
parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(parent_dir)

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Custom imports from your project
from estimators.statistical_descriptor import Nagler_WS
from utils.dataset_management import parse_pipeline
from utils.dataset_load import shuffle_data, DatasetLoader
from utils.fold_management import FoldManagement
from utils.label_management import LabelManagement
from utils.balance_management import BalanceManagement
from utils.figures import *
from utils.files_management import *


In [None]:
open_pkl("")

In [21]:
folder_key = open_pkl("/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/results/pipeline/run_2/group_0/results/fold_key.pkl")
folder_key

{0: {'train': array(['ARAVIS', 'BEAUFORTAIN', 'BELLEDONNE', 'CHARTEUSE',
         'GRANDES-ROUSSES', 'HTE-MAURIE', 'HTE-TARENT', 'MAURIENNE',
         'MONT-BLANC', 'THABOR', 'VANOISE', 'VERCORS'], dtype='<U15'),
  'test': array(['BAUGES'], dtype='<U15')},
 1: {'train': array(['ARAVIS', 'BAUGES', 'BEAUFORTAIN', 'BELLEDONNE', 'CHARTEUSE',
         'GRANDES-ROUSSES', 'HTE-MAURIE', 'HTE-TARENT', 'MAURIENNE',
         'MONT-BLANC', 'THABOR', 'VANOISE'], dtype='<U15'),
  'test': array(['VERCORS'], dtype='<U15')},
 2: {'train': array(['ARAVIS', 'BAUGES', 'BEAUFORTAIN', 'BELLEDONNE', 'CHARTEUSE',
         'GRANDES-ROUSSES', 'HTE-MAURIE', 'HTE-TARENT', 'MONT-BLANC',
         'THABOR', 'VANOISE', 'VERCORS'], dtype='<U15'),
  'test': array(['MAURIENNE'], dtype='<U15')},
 3: {'train': array(['ARAVIS', 'BAUGES', 'BEAUFORTAIN', 'CHARTEUSE', 'GRANDES-ROUSSES',
         'HTE-MAURIE', 'HTE-TARENT', 'MAURIENNE', 'MONT-BLANC', 'THABOR',
         'VANOISE', 'VERCORS'], dtype='<U15'),
  'test': array(['BE

In [82]:
int(39/13)

3

In [84]:
names_to_check = ['ARAVIS', 'BAUGES', 'VERCORS']

# Collect keys where any name is missing
fold_key = []
for key, value in folder_key.items():
    train_set = value['train']
    if not all(name in train_set for name in names_to_check):
        fold_key.append(key)

print(fold_key)

[0, 1, 8]


In [92]:
storage_path = "/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/results/pipeline/run_1"
folders = []
if os.path.isdir(os.path.join(storage_path, 'group_0')):
    all_items = os.listdir(storage_path)
    for item in all_items:
        item_path = os.path.join(storage_path, item)
        if os.path.isfile(item_path) and item.endswith('.yaml'):
            yaml_file_path = item_path
        elif item.endswith('results'):
            continue
        else:
            folders.append(""+item_path)
else:
    yaml_file_path = storage_path + "/info.yaml"
    folders = [storage_path]

methods_param = load_yaml(yaml_file_path)
fold_key = open_pkl(folders[0]+"/results/fold_key.pkl")

metrics = {}

for idx, folder in enumerate(sorted(folders)):
    models = [methods_param["groups_of_parameters"][idx]["--pipeline"][i][0][0] for i in range(len(methods_param["groups_of_parameters"][idx]["--pipeline"]))] 
    for model in models:
        try:
            if model not in metrics:
                metrics[model] = []
            metric = open_pkl(folder+"/models/"+model+"/metrics.pkl")
            print(metric)
            metrics[model] = metrics[model] + metric
        except Exception as e:
            continue

# check_and_create_directory(storage_path+"/results_final")
# log_results, _ = init_logger(os.path.join(storage_path, "results_final"), "results")

results_dir_figures = os.path.join(storage_path, "results_final/plots/")

metrics_to_plot = ["f1_score_macro", "f1_score_weighted", "f1_score_multiclass", "kappa_score", "training_time", "prediction_time"]

# plot_boxplots(metrics, metrics_to_plot=metrics_to_plot, save_dir=results_dir_figures, fold_key=fold_key, labels_massives=(methods_param["groups_of_parameters"][0]["--fold_method"]=="mFold"))
# plot_roc_curves(metrics, save_dir=results_dir_figures)

# log_results = report_metric_from_log(log_results, metrics, methods_param["groups_of_parameters"][0]["--metrics_to_report"])

[{'f1_score_macro': 76.97, 'f1_score_weighted': 77.61, 'f1_score_multiclass': array([81.26, 72.68]), 'accuracy_score': 77.77, 'precision_score_macro': 77.45, 'recall_score_macro': 76.7, 'roc_auc_score': 76.7, 'log_loss': 68.75, 'kappa_score': 53.99, 'confusion_matrix':          0        1
0  83.8077  16.1923
1  30.4117  69.5883, 'y_true': array(['1', '0', '1', ..., '0', '0', '0'], dtype='<U21'), 'y_pred': array([[0.2 , 0.8 ],
       [0.82, 0.18],
       [0.16, 0.84],
       ...,
       [1.  , 0.  ],
       [0.72, 0.28],
       [0.82, 0.18]]), 'fold': 0, 'training_time': 1.4034764766693115, 'prediction_time': 3.2676234245300293}, {'f1_score_macro': 86.68, 'f1_score_weighted': 86.95, 'f1_score_multiclass': array([88.85, 84.51]), 'accuracy_score': 87.03, 'precision_score_macro': 87.29, 'recall_score_macro': 86.33999999999999, 'roc_auc_score': 86.33999999999999, 'log_loss': 32.66, 'kappa_score': 73.4, 'confusion_matrix':          0        1
0  91.9028   8.0972
1  19.2208  80.7792, 'y_true'

In [97]:
[i["group"] = 42 for i in metrics['AdaBoost_direct']]

SyntaxError: cannot assign to subscript here. Maybe you meant '==' instead of '='? (1386318715.py, line 1)

In [101]:
ten = np.random.rand(100, 100, 9)
ten[:,:,[1,3,5,4]].shape

(100, 100, 4)

In [87]:
[metrics['KNN_direct'][i]["accuracy_score"] for i in range(len(metrics["KNN_direct"])) if metrics['KNN_direct'][i]["fold"]== 0]

[77.77, 77.77]

In [64]:
i_with_highest_accuracy = max((i for i in range(len(metrics["KNN_direct"])) if metrics['KNN_direct'][i]["fold"] == 0),
                              key=lambda i: metrics['KNN_direct'][i]["accuracy_score"])
i_with_highest_accuracy

0

In [49]:
metrics['KNN_direct'][12]["accuracy_score"]

77.42

In [55]:
x= {'commands': ["apptainer run --bind /home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/results/pipeline/run_2:/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/results/pipeline/run_2 --bind /home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline:/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline /home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/evaluation/container_ML.sif python evaluation/learning_models.py --data_path /home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/data/dataset/dataset_AD_08200821_14Mas3Top3Phy_W15_corrected_V2.h5 --fold_method mFold --labeling_method crocus --balancing_method undersample --request (date.dt.month == 3 and date.dt.day == 1) and ((elevation > 1000) and (elevation < 2000)) --shuffle_data True --balance_data False --import_list from sklearn.svm import SVC --import_list from sklearn.neighbors import KNeighborsClassifier --import_list from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier --import_list from sklearn.linear_model import LogisticRegression --import_list from sklearn.neural_network import MLPClassifier --import_list from estimators.statistical_descriptor import * --import_list from estimators.band_transform import * --pipeline [['KNN_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['KNeighborsClassifier', {'n_neighbors': 50}]] --pipeline [['RandomForest_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['RandomForestClassifier', {'n_estimators': 200}, {'criterion': 'entropy'}]] --pipeline [['MLP_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['MLPClassifier', {'alpha': 0.01}]] --pipeline [['LogisticR_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['LogisticRegression']] --pipeline [['AdaBoost_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['AdaBoostClassifier', {'n_estimators': 200}]] --metrics_to_report f1_score_macro --metrics_to_report f1_score_weighted --metrics_to_report f1_score_multiclass --metrics_to_report accuracy_score --metrics_to_report precision_score_macro --metrics_to_report recall_score_macro --metrics_to_report roc_auc_score --metrics_to_report log_loss --metrics_to_report kappa_score --metrics_to_report confusion_matrix --seed 100150 --storage_path /home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/results/pipeline/run_2/group_0", "apptainer run --bind /home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/results/pipeline/run_2:/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/results/pipeline/run_2 --bind /home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline:/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline /home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/evaluation/container_ML.sif python evaluation/learning_models.py --data_path /home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/data/dataset/dataset_AD_08200821_14Mas3Top3Phy_W15_corrected_V2.h5 --fold_method mFold --labeling_method crocus --balancing_method undersample --request (date.dt.month == 3 and date.dt.day == 1) and ((elevation > 1000) and (elevation < 2000)) --shuffle_data True --balance_data False --import_list from sklearn.svm import SVC --import_list from sklearn.neighbors import KNeighborsClassifier --import_list from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier --import_list from sklearn.linear_model import LogisticRegression --import_list from sklearn.neural_network import MLPClassifier --import_list from estimators.statistical_descriptor import * --import_list from estimators.band_transform import * --pipeline [['KNN_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['KNeighborsClassifier', {'n_neighbors': 50}]] --pipeline [['RandomForest_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['RandomForestClassifier', {'n_estimators': 200}, {'criterion': 'entropy'}]] --pipeline [['MLP_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['MLPClassifier', {'alpha': 0.01}]] --pipeline [['LogisticR_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['LogisticRegression']] --pipeline [['AdaBoost_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['AdaBoostClassifier', {'n_estimators': 200}]] --metrics_to_report f1_score_macro --metrics_to_report f1_score_weighted --metrics_to_report f1_score_multiclass --metrics_to_report accuracy_score --metrics_to_report precision_score_macro --metrics_to_report recall_score_macro --metrics_to_report roc_auc_score --metrics_to_report log_loss --metrics_to_report kappa_score --metrics_to_report confusion_matrix --seed 100150 --storage_path /home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/results/pipeline/run_2/group_1"], 'commit_sha': 'e462090903e5ba5be23158dfd45c1de5fbac6dba', 'end_time': [(2024, 7, 8, 11, 15, 31, 176876), (2024, 7, 8, 11, 18, 53, 842600)], 'executable': 'evaluation/learning_models.py', 'executable_command': 'python', 'experiment_id': 1, 'groups_of_parameters': [{'--balance_data': False, '--balancing_method': 'undersample', '--data_path': '/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/data/dataset/dataset_AD_08200821_14Mas3Top3Phy_W15_corrected_V2.h5', '--fold_method': 'mFold', '--import_list': ['from sklearn.svm import SVC', 'from sklearn.neighbors import KNeighborsClassifier', 'from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier', 'from sklearn.linear_model import LogisticRegression', 'from sklearn.neural_network import MLPClassifier', 'from estimators.statistical_descriptor import *', 'from estimators.band_transform import *'], '--labeling_method': 'crocus', '--metrics_to_report': ['f1_score_macro', 'f1_score_weighted', 'f1_score_multiclass', 'accuracy_score', 'precision_score_macro', 'recall_score_macro', 'roc_auc_score', 'log_loss', 'kappa_score', 'confusion_matrix'], '--pipeline': [[['KNN_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['KNeighborsClassifier', {'n_neighbors': 50}]], [['RandomForest_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['RandomForestClassifier', {'n_estimators': 200}, {'criterion': 'entropy'}]], [['MLP_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['MLPClassifier', {'alpha': 0.01}]], [['LogisticR_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['LogisticRegression']], [['AdaBoost_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['AdaBoostClassifier', {'n_estimators': 200}]]], '--request': '(date.dt.month == 3 and date.dt.day == 1) and ((elevation > 1000) and (elevation < 2000))', '--seed': 100150, '--shuffle_data': True}, {'--balance_data': False, '--balancing_method': 'undersample', '--data_path': '/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/data/dataset/dataset_AD_08200821_14Mas3Top3Phy_W15_corrected_V2.h5', '--fold_method': 'mFold', '--import_list': ['from sklearn.svm import SVC', 'from sklearn.neighbors import KNeighborsClassifier', 'from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier', 'from sklearn.linear_model import LogisticRegression', 'from sklearn.neural_network import MLPClassifier', 'from estimators.statistical_descriptor import *', 'from estimators.band_transform import *'], '--labeling_method': 'crocus', '--metrics_to_report': ['f1_score_macro', 'f1_score_weighted', 'f1_score_multiclass', 'accuracy_score', 'precision_score_macro', 'recall_score_macro', 'roc_auc_score', 'log_loss', 'kappa_score', 'confusion_matrix'], '--pipeline': [[['KNN_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['KNeighborsClassifier', {'n_neighbors': 50}]], [['RandomForest_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['RandomForestClassifier', {'n_estimators': 200}, {'criterion': 'entropy'}]], [['MLP_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['MLPClassifier', {'alpha': 0.01}]], [['LogisticR_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['LogisticRegression']], [['AdaBoost_direct'], ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}], ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}], ['Hist_SAR'], ['AdaBoostClassifier', {'n_estimators': 200}]]], '--request': '(date.dt.month == 3 and date.dt.day == 1) and ((elevation > 1000) and (elevation < 2000))', '--seed': 100150, '--shuffle_data': True}], 'main_pid': 13834, 'pids': ['13848', '14670'], 'repertories': ['results/pipeline/run_2/group_0', 'results/pipeline/run_2/group_1'], 'run_id': 2, 'start_time': [(2024, 7, 8, 11, 12, 18, 147963), (2024, 7, 8, 11, 15, 31, 223690)], 'status': ['finished', 'finished'], 'storage_path': 'results/pipeline/run_2', 'working_directory': '/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline'}

In [59]:
for idx, folder in enumerate(sorted(folders)):
        models = [x["groups_of_parameters"][idx]["--pipeline"][i][0][0] for i in range(len(x["groups_of_parameters"][idx]["--pipeline"]))] 
        for model in models:
            try:
                if model not in metrics:
                    metrics[model] = []
                metrics[model] = metrics[model] + open_pkl(folder+"/models/"+model+"/metrics.h5")
            except Exception as e:
                continue

In [60]:
metrics

{'KNN_direct': [{'f1_score_macro': 85.48,
   'f1_score_weighted': 86.86,
   'f1_score_multiclass': array([90.07, 80.89]),
   'accuracy_score': 86.92999999999999,
   'precision_score_macro': 85.92,
   'recall_score_macro': 85.1,
   'roc_auc_score': 85.1,
   'log_loss': 30.320000000000004,
   'kappa_score': 70.97,
   'confusion_matrix':          0        1
   0  91.2029   8.7971
   1  21.0000  79.0000,
   'y_true': array(['0', '0', '0', '0', '0', '0', '1', '0', '1', '1', '1', '1', '0',
          '0', '0', '0', '1', '1', '1', '0', '0', '1', '0', '0', '0', '0',
          '0', '1', '0', '1', '0', '0', '1', '0', '0', '1', '0', '0', '0',
          '1', '1', '1', '0', '0', '0', '0', '1', '1', '0', '0', '0', '1',
          '1', '0', '0', '1', '0', '0', '1', '0', '0', '1', '0', '1', '0',
          '1', '0', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '1',
          '0', '1', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
          '1', '0', '0', '0', '0', '1', '0', '0', '0', '0', '