In [1]:
import sys
import os
import time
import logging
import argparse
import numpy as np
from datetime import datetime
from joblib import Parallel, delayed

# Set the parent directory. This assumes your notebook is in the project root or a subdirectory.
# Adjust the path as necessary.
parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(parent_dir)

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Custom imports from your project
from estimators.statistical_descriptor import Nagler_WS
from utils.dataset_management import parse_pipeline
from utils.dataset_load import shuffle_data, DatasetLoader
from utils.fold_management import FoldManagement
from utils.label_management import LabelManagement
from utils.balance_management import BalanceManagement
from utils.figures import *
from utils.files_management import *


In [2]:
rng = np.random.RandomState(seed=442)
print(rng)

RandomState(MT19937)


In [3]:
with open('/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/parameter/config_param.yml', 'r') as file:
    tmp = yaml.safe_load(file)

tmp

{'fixed_args': {'options': {'--data_path': '/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/data/dataset/dataset_AD_08200821_14Mas3Top3Phy_W15_corrected_V2.h5',
   '--fold_method': 'combinationFold',
   '--labeling_method': 'crocus',
   '--balancing_method': 'undersample',
   '--request': '(date.dt.month == 3 and date.dt.day == 1) and ((elevation > 1000) and (elevation < 2000))',
   '--shuffle_data': True,
   '--balance_data': False,
   '--import_list': ['from sklearn.svm import SVC',
    'from sklearn.neighbors import KNeighborsClassifier',
    'from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier',
    'from sklearn.linear_model import LogisticRegression',
    'from sklearn.neural_network import MLPClassifier',
    'from estimators.statistical_descriptor import *',
    'from estimators.band_transform import *'],
   '--pipeline': [[['KNN_direct'],
     ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}],
     ['BandTransformer', {'bands':

In [4]:
import_list = tmp["fixed_args"]["options"]["--import_list"]
pipeline = tmp["fixed_args"]["options"]["--pipeline"]
pipeline

[[['KNN_direct'],
  ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}],
  ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}],
  ['Hist_SAR'],
  ['KNeighborsClassifier', {'n_neighbors': 50}]],
 [['RandomForest_direct'],
  ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}],
  ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}],
  ['Hist_SAR'],
  ['RandomForestClassifier', {'n_estimators': 200}, {'criterion': 'entropy'}]],
 [['MLP_direct'],
  ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}],
  ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}],
  ['Hist_SAR'],
  ['MLPClassifier', {'alpha': 0.01}]],
 [['LogisticR_direct'],
  ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}],
  ['BandTransformer', {'bands': [0, 1, 2, 3]}, {'transformations': []}],
  ['Hist_SAR'],
  ['LogisticRegression']],
 [['AdaBoost_direct'],
  ['BandSelector', {'bands': [0, 1, 2, 3, 4, 5, 6, 7, 8]}],
  ['BandTransformer', {'bands': [0, 1,

In [60]:
import json
def dump_h5(data, file_path):
    data_dict = {}
    data_dict["data"] = data
    with h5py.File(file_path + '.h5', 'w') as f:
        for key, value in data_dict.items():
            if isinstance(value, np.ndarray):
                f.create_dataset(key, data=value)
            elif isinstance(value, (list, dict)):
                f.attrs[key] = json.dumps(value)
            else:
                f.attrs[key] = str(value)

    
def load_h5(file_path):
    data_dict = {}
    with h5py.File(file_path, 'r') as f:
        for key in f.keys():
            data_dict[key] = np.array(f[key])
        for key in f.attrs:
            try:
                data_dict[key] = json.loads(f.attrs[key])
            except json.JSONDecodeError:
                data_dict[key] = f.attrs[key]
    return data_dict["data"]

In [65]:
data_to_save = {
    'list_data': [1, 2, 3],
    'dict_data': {'a': 1, 'b': 2},
    'pipeline': 1,  # Replace with your scikit-learn pipeline object
    'variable': 42
}

# Save to HDF5
dump_h5(np.array([1, 2, 3]), './data')

In [66]:
loaded_data = load_h5('./data.h5')
loaded_data

array([1, 2, 3])

In [57]:
t = "Pipeline(memory='.cache',steps=[('BandSelector',BandSelector(bands=[0, 1, 2, 3, 4, 5, 6, 7, 8])),('BandTransformer',BandTransformer(bands=[0, 1, 2, 3], transformations=[])),('Hist_SAR', Hist_SAR()),('KNeighborsClassifier', KNeighborsClassifier(n_neighbors=50))],verbose=True)"
ast.literal_eval(t)

ValueError: malformed node or string on line 1: <ast.Call object at 0x7acee77b5750>

In [8]:
open_pkl("/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/results/pipeline/run_3/group_0/results/fold_key.pkl")

{0: {'train': array(['BAUGES', 'BAUGES', 'BAUGES', ..., 'VERCORS', 'VERCORS', 'VERCORS'],
        dtype='<U15'),
  'test': array(['ARAVIS', 'ARAVIS', 'ARAVIS', ..., 'THABOR', 'THABOR', 'THABOR'],
        dtype='<U15')},
 1: {'train': array(['ARAVIS', 'ARAVIS', 'ARAVIS', ..., 'VERCORS', 'VERCORS', 'VERCORS'],
        dtype='<U15'),
  'test': array(['BAUGES', 'BAUGES', 'BAUGES', ..., 'BELLEDONNE', 'BELLEDONNE',
         'BELLEDONNE'], dtype='<U15')},
 2: {'train': array(['ARAVIS', 'ARAVIS', 'ARAVIS', ..., 'VERCORS', 'VERCORS', 'VERCORS'],
        dtype='<U15'),
  'test': array(['GRANDES-ROUSSES', 'GRANDES-ROUSSES', 'GRANDES-ROUSSES', ...,
         'MAURIENNE', 'MAURIENNE', 'MAURIENNE'], dtype='<U15')},
 3: {'train': array(['ARAVIS', 'ARAVIS', 'ARAVIS', ..., 'VANOISE', 'VANOISE', 'VANOISE'],
        dtype='<U15'),
  'test': array(['BELLEDONNE', 'BELLEDONNE', 'BELLEDONNE', ..., 'VERCORS',
         'VERCORS', 'VERCORS'], dtype='<U15')},
 4: {'train': array(['BAUGES', 'BAUGES', 'BAUGES', ...

In [32]:
import joblib
joblib.load("/home/listic/Bureau/cortes_stage/ML-WetSnowSAR_pipeline_stage/pipeline/results/pipeline/run_3/group_0/models/KNN_direct/KNN_direct_fold1.joblib")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [10]:
np.__version__

'1.26.3'