In [2]:
import sys, os, time
import numpy as np

parent_dir = os.path.abspath("..")
sys.path.append(parent_dir)

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from estimators.statistical_descriptor import Nagler_WS
# from plot.figure_roc import ROC_plot
from utils.dataset_management import load_train, load_test, parser_pipeline, BFold
from utils.dataset_load import  save_h5_II, load_data_h5, load_info_h5, shuffle_data, Dataset_loader
from utils.files_management import (
    load_yaml,
    dump_pkl,
    init_logger,
    open_param_set_dir,
    report_prediction,
    report_metric_from_log,
    write_report,
)


In [3]:

#def Nagler_estimation(data_path):
def Nagler_estimation(X_trainU, y_train, X_test, y_test, label_encoder):
    y_est_save = {}
    '''X_trainU, y_train, label_encoder = load_train(
        data_path, -1, balanced=False, shffle=True, encode=True
    )
    X_test, y_test = load_test(
        data_path, -1, balanced=True, shffle=True, encoder=label_encoder
    )
    '''
    pos_class = label_encoder.transform(["wet"])[0]

    NGS_VV = Nagler_WS(bands=6)
    name_pip = "Nagler_VV"
    prob_test = NGS_VV.predict_proba(X_test)[:, pos_class]
    prob_train = NGS_VV.predict_proba(X_trainU)[:, pos_class]
    y_prob = np.concatenate([prob_train, prob_test])
    y_true = np.concatenate([y_train, y_test])

    y_est_save[name_pip] = {"y_true": y_true, "y_est": y_prob}

    NGS_VH = Nagler_WS(bands=7)
    name_pip = "Nagler_VH"
    prob_test = NGS_VH.predict_proba(X_test)[:, pos_class]
    prob_train = NGS_VH.predict_proba(X_trainU)[:, pos_class]
    y_prob = np.concatenate([prob_train, prob_test])
    y_true = np.concatenate([y_train, y_test])

    y_est_save[name_pip] = {"y_true": y_true, "y_est": y_prob}

    return y_est_save


In [5]:
param_path = "../parameter/config_pipeline.yml"
pipeline_param = load_yaml(param_path)

match pipeline_param["type"]:
    case "local":
        local_param_path = "../parameter/config_data_local.yml"
        data_param = load_yaml(local_param_path)
    case "global":
        global_param_path = "pipeline/parameter/config_data_global.yml"
        data_param = load_yaml(global_param_path)
    case _:
        f"no such type : {pipeline_param["type"]}"

try:
    data_path = "../data/dataset/dataset_AD_08200821_14Mas3Top3Phy_W15_corrected.h5"
    out_dir = pipeline_param["out_dir"]
    seed = pipeline_param["seed"]
    BANDS_MAX = pipeline_param["BANDS_MAX"]
except KeyError as e:
    print("KeyError: %s undefine" % e)

start_line = 0

# y_nagler = Nagler_estimation(data_path)
# print(y_nagler)

dtst_ld = Dataset_loader(
    data_path,
    shuffle=False,
    descrp=[
        "date",
        "massif",
        "aquisition",
        "aquisition2",
        "elevation",
        "slope",
        "orientation",
        "tmin",
        "hsnow",
        "tel",
    ],
)
dtst_ld.infos

Unnamed: 0,date,massif,aquisition,aquisition2,elevation,slope,orientation,tmin,hsnow,tel
0,2020-12-25,ARAVIS,ASC,ASC,900.0,20.0,45.0,-2.896717,0.166419,3.488328
1,2020-12-25,ARAVIS,ASC,ASC,900.0,20.0,45.0,-2.896717,0.166419,3.488328
2,2020-12-25,ARAVIS,ASC,ASC,1200.0,45.0,225.0,-4.667171,0.024536,0.336520
3,2020-12-25,ARAVIS,ASC,ASC,1200.0,45.0,135.0,-5.906706,0.156946,2.868350
4,2020-12-25,ARAVIS,ASC,ASC,1200.0,45.0,135.0,-5.906706,0.156946,2.868350
...,...,...,...,...,...,...,...,...,...,...
1288436,2021-01-05,HTE-MAURIE,DSC,DSC,3000.0,45.0,180.0,-20.930857,0.518277,0.000000
1288437,2021-01-05,HTE-MAURIE,DSC,DSC,3000.0,20.0,45.0,-21.431910,0.980208,0.000000
1288438,2021-01-05,HTE-MAURIE,DSC,DSC,2400.0,45.0,45.0,-18.423183,0.614442,0.000000
1288439,2021-01-05,HTE-MAURIE,DSC,DSC,2400.0,20.0,0.0,-18.984406,0.452196,0.000000


In [9]:
print(dtst_ld.infos.head())  # Check the first few rows and column names

        date  massif aquisition aquisition2  elevation  slope  orientation  \
0 2020-12-25  ARAVIS        ASC         ASC      900.0   20.0         45.0   
1 2020-12-25  ARAVIS        ASC         ASC      900.0   20.0         45.0   
2 2020-12-25  ARAVIS        ASC         ASC     1200.0   45.0        225.0   
3 2020-12-25  ARAVIS        ASC         ASC     1200.0   45.0        135.0   
4 2020-12-25  ARAVIS        ASC         ASC     1200.0   45.0        135.0   

       tmin     hsnow       tel  
0 -2.896717  0.166419  3.488328  
1 -2.896717  0.166419  3.488328  
2 -4.667171  0.024536  0.336520  
3 -5.906706  0.156946  2.868350  
4 -5.906706  0.156946  2.868350  


In [6]:
# Example of request
rq1 = "massif == 'VERCORS' and \
        ((date.dt.month == 3 and date.dt.day== 1) or \
        (elevation > 3000 and hsnow < 0.25))"

rq2 = "massif == 'ARAVIS' & aquisition == 'ASC' & \
        elev == 900.0 & slope == 20 & theta == 45 "

rq3 = 'massif == "ARAVIS"'

x, y = dtst_ld.request_data(rq1)

Request: massif == 'VERCORS' and         ((date.dt.month == 3 and date.dt.day== 1) or         (elevation > 3000 and hsnow < 0.25)) with 197 samples
(197, 15, 15, 9)


In [10]:
y

{'metadata': array([['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
        ['20210301', 'VERCORS', 'ASC', 'ASC'],
 

In [11]:
x[0]

array([[[9.68645215e-02, 1.38681130e-02, 1.43170208e-01, ...,
         1.38833725e+00, 7.85041809e-01, 5.65454662e-01],
        [1.00032806e-01, 1.01889456e-02, 1.01856038e-01, ...,
         3.14524794e+00, 9.67372417e-01, 3.07566345e-01],
        [6.88781366e-02, 7.95030780e-03, 1.15425713e-01, ...,
         2.12536001e+00, 1.21904612e+00, 5.73571622e-01],
        ...,
        [3.18068154e-02, 3.30183953e-02, 1.03809178e+00, ...,
         1.12347293e+00, 2.12978101e+00, 1.89571190e+00],
        [1.63510814e-02, 2.19222270e-02, 1.34072030e+00, ...,
         4.29202259e-01, 1.20986307e+00, 2.81886435e+00],
        [1.27079170e-02, 1.72232054e-02, 1.35531306e+00, ...,
         1.03041601e+00, 1.14154851e+00, 1.10785210e+00]],

       [[8.96717906e-02, 4.63698581e-02, 5.17106414e-01, ...,
         1.35334337e+00, 1.85402286e+00, 1.36995745e+00],
        [7.61217922e-02, 1.23284692e-02, 1.61957160e-01, ...,
         1.66736674e+00, 6.80547953e-01, 4.08157319e-01],
        [5.15821874e-02, 

In [30]:
X_trainU, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed)

# Assuming you have a label encoder from your existing pipeline
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

y_nagler = Nagler_estimation(X_trainU, y_train_encoded, X_test, y_test_encoded, label_encoder)
print(y_nagler)




ValueError: Found input variables with inconsistent numbers of samples: [1288441, 3]