In [15]:
import sys, os, time
import numpy as np
from utils.fold_management import fold_management
from utils.label_management import label_management

parent_dir = os.path.abspath("..")
sys.path.append(parent_dir)

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from estimators.statistical_descriptor import Nagler_WS
# from plot.figure_roc import ROC_plot
from utils.dataset_management import load_train, load_test, parser_pipeline, BFold
from utils.dataset_load import  save_h5_II, load_data_h5, load_info_h5, shuffle_data, Dataset_loader
from utils.files_management import (
    load_yaml,
    dump_pkl,
    init_logger,
    open_param_set_dir,
    report_prediction,
    report_metric_from_log,
    write_report,
)


In [2]:

#def Nagler_estimation(data_path):
def Nagler_estimation(X_trainU, y_train, X_test, y_test, label_encoder):
    y_est_save = {}
    '''X_trainU, y_train, label_encoder = load_train(
        data_path, -1, balanced=False, shffle=True, encode=True
    )
    X_test, y_test = load_test(
        data_path, -1, balanced=True, shffle=True, encoder=label_encoder
    )
    '''
    pos_class = label_encoder.transform(["wet"])[0]

    NGS_VV = Nagler_WS(bands=6)
    name_pip = "Nagler_VV"
    prob_test = NGS_VV.predict_proba(X_test)[:, pos_class]
    prob_train = NGS_VV.predict_proba(X_trainU)[:, pos_class]
    y_prob = np.concatenate([prob_train, prob_test])
    y_true = np.concatenate([y_train, y_test])

    y_est_save[name_pip] = {"y_true": y_true, "y_est": y_prob}

    NGS_VH = Nagler_WS(bands=7)
    name_pip = "Nagler_VH"
    prob_test = NGS_VH.predict_proba(X_test)[:, pos_class]
    prob_train = NGS_VH.predict_proba(X_trainU)[:, pos_class]
    y_prob = np.concatenate([prob_train, prob_test])
    y_true = np.concatenate([y_train, y_test])

    y_est_save[name_pip] = {"y_true": y_true, "y_est": y_prob}

    return y_est_save


In [3]:
param_path = "../parameter/config_pipeline.yml"
pipeline_param = load_yaml(param_path)

match pipeline_param["type"]:
    case "local":
        local_param_path = "../parameter/config_data_local.yml"
        data_param = load_yaml(local_param_path)
    case "global":
        global_param_path = "pipeline/parameter/config_data_global.yml"
        data_param = load_yaml(global_param_path)
    case _:
        f"no such type : {pipeline_param["type"]}"

try:
    data_path = "../data/dataset/dataset_AD_08200821_14Mas3Top3Phy_W15_corrected.h5"
    out_dir = pipeline_param["out_dir"]
    seed = pipeline_param["seed"]
    BANDS_MAX = pipeline_param["BANDS_MAX"]
except KeyError as e:
    print("KeyError: %s undefine" % e)

start_line = 0

# y_nagler = Nagler_estimation(data_path)
# print(y_nagler)

dtst_ld = Dataset_loader(
    data_path,
    shuffle=False,
)
dtst_ld.infos

Unnamed: 0,date,massif,aquisition,aquisition2,elevation,slope,orientation,tmin,tel,hsnow
0,2020-12-25,ARAVIS,ASC,ASC,900.0,20.0,45.0,-2.896717,0.166419,3.488328
1,2020-12-25,ARAVIS,ASC,ASC,900.0,20.0,45.0,-2.896717,0.166419,3.488328
2,2020-12-25,ARAVIS,ASC,ASC,1200.0,45.0,225.0,-4.667171,0.024536,0.336520
3,2020-12-25,ARAVIS,ASC,ASC,1200.0,45.0,135.0,-5.906706,0.156946,2.868350
4,2020-12-25,ARAVIS,ASC,ASC,1200.0,45.0,135.0,-5.906706,0.156946,2.868350
...,...,...,...,...,...,...,...,...,...,...
1288436,2021-01-05,HTE-MAURIE,DSC,DSC,3000.0,45.0,180.0,-20.930857,0.518277,0.000000
1288437,2021-01-05,HTE-MAURIE,DSC,DSC,3000.0,20.0,45.0,-21.431910,0.980208,0.000000
1288438,2021-01-05,HTE-MAURIE,DSC,DSC,2400.0,45.0,45.0,-18.423183,0.614442,0.000000
1288439,2021-01-05,HTE-MAURIE,DSC,DSC,2400.0,20.0,0.0,-18.984406,0.452196,0.000000


In [4]:
dtst_ld.infos[(dtst_ld.infos['tmin'] > 0) & (dtst_ld.infos['hsnow'] > 40)]

Unnamed: 0,date,massif,aquisition,aquisition2,elevation,slope,orientation,tmin,tel,hsnow
5632,2021-06-11,BELLEDONNE,ASC,ASC,2400.0,45.0,0.0,0.01,1.764194,45.176708
5639,2021-06-11,BELLEDONNE,ASC,ASC,2400.0,45.0,45.0,0.01,2.185814,59.180344
5640,2021-06-11,BELLEDONNE,ASC,ASC,2400.0,45.0,0.0,0.01,1.764194,45.176708
5648,2021-06-11,BELLEDONNE,ASC,ASC,2400.0,45.0,0.0,0.01,1.764194,45.176708
5650,2021-06-11,BELLEDONNE,ASC,ASC,2400.0,45.0,0.0,0.01,1.764194,45.176708
...,...,...,...,...,...,...,...,...,...,...
1283710,2021-05-29,GRANDES-ROUSSES,DSC,DSC,2400.0,45.0,45.0,0.01,1.595417,47.645008
1283719,2021-05-29,GRANDES-ROUSSES,DSC,DSC,2400.0,45.0,45.0,0.01,1.595417,47.645008
1283738,2021-05-29,GRANDES-ROUSSES,DSC,DSC,2400.0,45.0,45.0,0.01,1.595417,47.645008
1283759,2021-05-29,GRANDES-ROUSSES,DSC,DSC,2400.0,45.0,45.0,0.01,1.595417,47.645008


In [20]:
# Example of request
rq1 = "(date.dt.month == 3 and date.dt.day== 1) and (elevation > 2000)"

x, y = dtst_ld.request_data(rq1)

Request: (date.dt.month == 3 and date.dt.day== 1) and (elevation > 2000) with 9097 samples
(9097, 15, 15, 9)


In [21]:
np.unique(y["metadata"][:,1])

array(['ARAVIS', 'BAUGES', 'BEAUFORTAIN', 'BELLEDONNE', 'CHARTEUSE',
       'GRANDES-ROUSSES', 'HTE-MAURIE', 'HTE-TARENT', 'MAURIENNE',
       'MONT-BLANC', 'THABOR', 'VANOISE'], dtype='<U15')

In [22]:
fold =  fold_management(methode = "mFold", shuffle=False, random_state=42, train_aprox_size=0.80)
lab_man = label_management(methode = "crocus") 
targets = lab_man.transform(y)

In [23]:
np.unique(targets, return_counts=True)

(array([0, 1]), array([8130,  967]))

In [27]:
for i, (train_index, test_index) in enumerate(fold.split(x, y)):

    print(f"Fold {i}:")

    print(f"  Train: index={train_index}")

    print(f"  Test:  index={test_index}")
    print(f"  distribution class train: {np.unique(targets[train_index], return_counts=True)}")
    print(f"  distribution class test: {np.unique(targets[test_index], return_counts=True)}")

{'BELLEDONNE': {'count': 553, 'indices': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213

In [25]:
massives_count = {}

for index, name in enumerate(y['metadata'][:, 1]):
    if name not in massives_count:
        massives_count[name] = {'count': 0, 'indices': []}
    massives_count[name]['count'] += 1

massives_count

{'BELLEDONNE': {'count': 553, 'indices': []},
 'MONT-BLANC': {'count': 290, 'indices': []},
 'HTE-MAURIE': {'count': 850, 'indices': []},
 'BAUGES': {'count': 10, 'indices': []},
 'HTE-TARENT': {'count': 1141, 'indices': []},
 'MAURIENNE': {'count': 757, 'indices': []},
 'VANOISE': {'count': 2670, 'indices': []},
 'BEAUFORTAIN': {'count': 751, 'indices': []},
 'GRANDES-ROUSSES': {'count': 1304, 'indices': []},
 'ARAVIS': {'count': 51, 'indices': []},
 'CHARTEUSE': {'count': 1, 'indices': []},
 'THABOR': {'count': 719, 'indices': []}}

In [11]:
massives_count = {}

for index, name in enumerate(y['metadata'][:, 1]):
    if name not in massives_count:
        massives_count[name] = {'count': 0, 'indices': []}
    massives_count[name]['count'] += 1
    massives_count[name]['indices'].append(index)

massives_count

{'BELLEDONNE': {'count': 71,
  'indices': [0,
   1,
   2,
   3,
   4,
   5,
   6,
   7,
   8,
   9,
   10,
   11,
   12,
   13,
   14,
   15,
   16,
   17,
   18,
   19,
   20,
   21,
   22,
   23,
   24,
   25,
   26,
   27,
   28,
   29,
   30,
   31,
   32,
   33,
   34,
   35,
   36,
   37,
   38,
   39,
   40,
   41,
   42,
   43,
   44,
   45,
   46,
   47,
   48,
   49,
   50,
   51,
   52,
   53,
   54,
   55,
   56,
   57,
   58,
   59,
   60,
   61,
   62,
   63,
   64,
   65,
   66,
   67,
   68,
   69,
   70]},
 'MONT-BLANC': {'count': 148,
  'indices': [71,
   72,
   73,
   74,
   75,
   76,
   77,
   78,
   79,
   80,
   81,
   82,
   83,
   84,
   85,
   86,
   87,
   88,
   89,
   90,
   91,
   92,
   93,
   94,
   95,
   96,
   97,
   98,
   99,
   100,
   101,
   102,
   103,
   104,
   105,
   106,
   107,
   108,
   109,
   110,
   111,
   112,
   113,
   114,
   115,
   116,
   117,
   118,
   119,
   120,
   121,
   122,
   123,
   124,
   125,
   126,
   127,
   

In [13]:
lab_man = label_management(methode = "crocus") 
lab_man.transform(y)

array([0, 0, 0, ..., 0, 0, 0])

In [14]:
for i in lab_man.transform(y):
    print(i)

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
