In [None]:
### imports
from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy
import pickle
import matplotlib as mpl
import time
import pyreadr
# import shap
# import glob
# import seaborn as sns
# import math
# import os

from pathlib import Path

# scaling and train test split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import StandardScaler

# # creating a model
# import tensorflow as tf
# from tensorflow import keras
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Activation, Dropout
# from tensorflow.keras.optimizers import Adam, SGD
# from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
# from tensorflow.keras import regularizers
# from tensorflow.keras import layers, models
# from tensorflow.keras.metrics import Precision, Recall, Accuracy, BinaryAccuracy, CategoricalAccuracy, FalsePositives, FalseNegatives

# evaluation on test data
# from sklearn.metrics import mean_squared_error,mean_absolute_error,explained_variance_score
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import make_scorer, f1_score, roc_curve
from sklearn.model_selection import StratifiedKFold

from joblib import dump, load

import neural_networks as nn
import importlib

pd.set_option('display.max.columns', 150)
pd.set_option('display.max.rows', 150)


In [None]:
df_red = pyreadr.read_r('F:/PROJEKTE/FARM/Daten/Datenanalyse/ML/data/spatial_features_all_compounds/preprocessed_red_features_sentinel.RDS')[None] # reduced features
df_all = pyreadr.read_r('F:/PROJEKTE/FARM/Daten/Datenanalyse/ML/data/spatial_features_all_compounds/preprocessed_all_features_sentinel.RDS')[None] # all features

In [25]:
# different data for TFA with class boundaries 0.1 and 1 ug/L:
# df_red = pyreadr.read_r('F:/PROJEKTE/FARM/Daten/Datenanalyse/ML/data/spatial_features_all_compounds/preprocessed_red_features_sentinel_TFA.RDS')[None]
# df_all = pyreadr.read_r('F:/PROJEKTE/FARM/Daten/Datenanalyse/ML/data/spatial_features_all_compounds/preprocessed_all_features_sentinel_TFA.RDS')[None]

In [None]:
# correct encoding
for col in df_red.select_dtypes(['object', 'category']).columns:
    df_red[col] = df_red[col].str.replace('Ã¤', 'ä')
    df_red[col] = df_red[col].str.replace('Ã¼', 'ü')
    df_red[col] = df_red[col].str.replace('Ã¶', 'ö')
    df_red[col] = df_red[col].str.replace('ÃŸ', 'ß')

for col in df_all.select_dtypes(['object', 'category']).columns:
    df_all[col] = df_all[col].str.replace('Ã¤', 'ä')
    df_all[col] = df_all[col].str.replace('Ã¼', 'ü')
    df_all[col] = df_all[col].str.replace('Ã¶', 'ö')
    df_all[col] = df_all[col].str.replace('ÃŸ', 'ß')

In [29]:
### filter messnetz for model for paper
# df_all = df_all[df_all.messnetz=='Landesmessnetz']
# df_red = df_red[df_red.messnetz=='Landesmessnetz']

In [None]:
### drop rows with missing conc_group

df_red = df_red[~df_red.conc_group.isna()]
df_red.conc_group = df_red.conc_group.astype('int')

df_all = df_all[~df_all.conc_group.isna()]
df_all.conc_group = df_all.conc_group.astype('int')

In [33]:
### select number of classes
n_classes = 2
if n_classes == 2:
    dfr = df_red.copy()
    dfr.loc[(dfr.lawa_name!='trifluoressigsaeure') & (dfr.conc_group==3), 'conc_group'] = 2
    dfr.loc[(dfr.lawa_name=='trifluoressigsaeure') & (dfr.conc_group==2), 'conc_group'] = 1
    dfr.loc[(dfr.lawa_name=='trifluoressigsaeure') & (dfr.conc_group==3), 'conc_group'] = 2
elif n_classes == 3:
    dfr = df_red.copy()

if n_classes == 2:
    dfa = df_all.copy()
    dfa.loc[(dfa.lawa_name!='trifluoressigsaeure') & (dfa.conc_group==3), 'conc_group'] = 2
    dfa.loc[(dfa.lawa_name=='trifluoressigsaeure') & (dfa.conc_group==2), 'conc_group'] = 1
    dfa.loc[(dfa.lawa_name=='trifluoressigsaeure') & (dfa.conc_group==3), 'conc_group'] = 2
elif n_classes == 3:
    dfa = df_all.copy()

In [None]:
### select prioritary compounds
prio_compounds = ['desphenyl-chloridazon', 'metazachlor esa', 'metazachlorsäure', 'methyl-desphenylchloridazon', 'metolachlor esa', 'metolachlor-ca', 's-metolachlor-metabolit noa 413173', 'trifluoressigsaeure']

In [None]:
# define crops (sentinel ids) for which the compound / parent of the metabolites is applied
relevant_sids = {
    'desphenyl-chloridazon': [14, 16], 
    'metazachlor esa': [12, 16, 20], 
    'metazachlorsäure': [12, 16, 20],
    'methyl-desphenylchloridazon': [14, 16], 
    'metolachlor esa': [7, 9, 10], 
    'metolachlor-ca': [7, 9, 10], 
    's-metolachlor-metabolit noa 413173': [7, 9, 10], 
    'trifluoressigsaeure': [1, 2, 3, 4, 7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
}

In [None]:
### feature selection
# feature selection A: process-oriented features (=reduced features)
fs_a = ['filter_ok_unter_gok', 'sand_depth_weighted_mean', 'corg_gehalt_depth_weighted_mean', 'Makroporen_rounded', 'gwn_mean', 'HA', 'kf_bez'] 
# + relevant areas

# feature selection B: # not used anymore
fs_b = fs_a + ['GR_NR', 'airtemp_mean_mean_Decade_2010_2019', 'mean_precip_summer', 'mean_precip_winter', 'precip_ratio_as_om', 'elevation_elevation_1KMmd', 'slope_slope_1KMmd', 'GA', 'GC', 'DSD_1', 'DSD_2', 'LP_1', 'LP_2', 'SD_1', 'SD_2', 'schluff_depth_weighted_mean', 'ton_depth_weighted_mean', 'swr_mean'] 
# + all areas

# feature selection C: # (=all features)
fs_c = fs_b + ['mean_nitrat', 'mean_ammonium', 'mean_magnesium', 'mean_sulfat', 'mean_chlorid', 'mean_kalium', 'mean_natrium', 'arable_land_new']

feature_selections = ['A', 'C']

In [None]:
### set parameters for nested cross-validation
folder_name = '18_2classesMTOcorrection' # output folder

# number of folds
n_inner_folds = 5
n_outer_folds = 5

# hyperparameter space (example)
batch_size = [512, 1024]
max_epochs = [500, 1000]
hyperparameter = dict(batch_size=batch_size, max_epochs=max_epochs)

# calculate number of runs
n_runs = (n_inner_folds * n_outer_folds * len(batch_size) * len(max_epochs) + n_outer_folds) * len(feature_selections) * len(prio_compounds)
print('number of model runs:', n_runs)

number of model runs: 630


In [None]:
### loop over compounds and feature selections
run_count = 1

for compound_name in prio_compounds[:]:
    print('\n', '*'*50, '\n\n', compound_name.upper(), '\n')
    
    for feature_selection in feature_selections:
        print('feature_selection:', feature_selection)
        # output folder
        output_folder = f'F:/PROJEKTE/FARM/Ephraim_Erkens/bgr-grwv/develoments_BGR/EE/output/nestedCV/{folder_name}/{compound_name}/feature_selection_{feature_selection}/'
        for subfolder in ['data', 'models', 'fig']:
            directory = f'{output_folder}{subfolder}/'
            Path(directory).mkdir(parents=True, exist_ok=True)
        Path(f'{directory}pdp/').mkdir(parents=True, exist_ok=True)
        
        # select data (compound and features)
        if feature_selection=='A':
            relevant_areas = [f'area_sentinel_id_{sid}' for sid in relevant_sids[compound_name]]
            print('relevant areas:', relevant_areas)
            selected_columns = fs_a + relevant_areas + ['conc_group']
            data = dfr.loc[dfr.lawa_name==compound_name, selected_columns]    
            dff = dfr.copy()
        if feature_selection=='C':
            relevant_areas = [f'area_sentinel_id_{sid}' for sid in relevant_sids[compound_name]]
            print('relevant areas:', relevant_areas)
            selected_columns = fs_c + relevant_areas + ['conc_group']
            data = dfa.loc[dfa.lawa_name==compound_name, selected_columns]
            dff = dfa.copy()

        # replace categorical strings by True/False for each category
        dummy_columns = data.select_dtypes(['object', 'category']).columns
        data = pd.get_dummies(data, columns=dummy_columns)
        # Remove rows with any NaN values
        data = data.dropna()
        
        # determine features and labels
        y = data.pop('conc_group')
        y = y.reset_index(drop=True) # labels
        X = data.copy() # features
        feature_names = X.columns.to_list()
        # np.save(f'{path}data/{compound_name}/feature_names_{compound_name}.npy', np.array(feature_names))
        
        # binarize y
        label_binarizer = LabelBinarizer()
        one_hot_encoded_y = label_binarizer.fit_transform(y)
        
        # save feature names
        feature_names = X.columns.to_list()
        # np.save(f'{output_folder}data/feature_names.npy', np.array(feature_names))

        
        ### perform nested cross-validation
        # importlib.reload(nn) # only needed if nn was modified
        # performance data frame
        performance = pd.DataFrame(columns=['compound_name', 'feature_selection', 'test_set', 'avg_time_per_fold', 'batch_size', 'max_epochs', 'outer_fold_index', 'f1_untuned', 'threshold', 'f1_tuned', 'f1_tuning_list']) 
        performance.f1_tuning_list = performance.f1_tuning_list.astype('object')
        t_start = time.time()

        print('shape of feature matrix:', X.shape)
        
        # save feature names
        np.save(f'{output_folder}data/feature_names.npy', X.columns.values)
        pd.DataFrame(feature_names).T.to_csv(f'{output_folder}data/feature_names.csv', index=False)
        
        # split data into outer folds
        outer_kfold = StratifiedKFold(n_outer_folds, shuffle=True, random_state=0)
        for outer_fold_index, (outer_train_index, outer_test_index) in enumerate(outer_kfold.split(X, y)):
            print(time.asctime(time.localtime(time.time())), f'{run_count}/{n_runs}', 'outer fold', outer_fold_index)
            X_outer_train = X.iloc[outer_train_index]
            X_outer_test = X.iloc[outer_test_index]
            y_outer_train = one_hot_encoded_y[outer_train_index]
            y_outer_test = one_hot_encoded_y[outer_test_index]
            mid_outer_train = dff.loc[data.index, 'messstellen_id'].reset_index().loc[outer_train_index, 'messstellen_id'] # messstellen_id
            mid_outer_test = dff.loc[data.index, 'messstellen_id'].reset_index().loc[outer_test_index, 'messstellen_id'] # messstellen_id
        
            # iterate over hyperparameter combinations (grid search)
            for batch_size in hyperparameter['batch_size'][:]:
                for max_epochs in hyperparameter['max_epochs'][:]:
                    print(time.asctime(time.localtime(time.time())), f'{run_count}/{n_runs}', '  batch size', batch_size)
                    print(time.asctime(time.localtime(time.time())), f'{run_count}/{n_runs}', '  max_epochs', max_epochs)
        
                    inner_test_proba = pd.DataFrame(columns=['inner_fold_index', 'true_conc_group', 'probability'], dtype='float') # dataframe to save prediction probabilities for threshold optimization after iterating over all inner folds
                    t1_inner_fold = time.time()
                    # split data into inner folds
                    inner_kfold = StratifiedKFold(n_inner_folds, shuffle=True, random_state=1)
                    for inner_fold_index, (inner_train_index, inner_test_index) in enumerate(inner_kfold.split(X_outer_train, y_outer_train)):
                        print(time.asctime(time.localtime(time.time())), f'{run_count}/{n_runs}', '    inner fold', inner_fold_index, end=' ' if inner_fold_index+1==n_inner_folds else '\n')
                        
                        # binarize y
                        label_binarizer = LabelBinarizer()
                        y_outer_train_1hot = label_binarizer.fit_transform(y_outer_train)
                        
                        X_inner_train = X_outer_train.iloc[inner_train_index]
                        X_inner_test = X_outer_train.iloc[inner_test_index]
                        y_inner_train = y_outer_train[inner_train_index]
                        y_inner_test = y_outer_train[inner_test_index]
                        mid_inner_test = mid_outer_train.reset_index().loc[inner_test_index, 'messstellen_id']
        
                        # split train data into train and validation data
                        X_inner_train, X_inner_val, y_inner_train, y_inner_val = train_test_split(X_inner_train, y_inner_train, test_size=0.2, random_state=2, shuffle=True, stratify=y_inner_train)
        
                        # Standardize features
                        scaler = StandardScaler()
                        X_inner_train = scaler.fit_transform(X_inner_train)
                        X_inner_val = scaler.fit_transform(X_inner_val)
                        X_inner_test = scaler.transform(X_inner_test)
                        # save scaler 
                        dump(scaler, f'{output_folder}data/StdScaler_innerFold{inner_fold_index}_outerFold{outer_fold_index}.bin')

                        # save data
                        for arr, filename in zip([X_inner_train, y_inner_train, X_inner_val, y_inner_val, X_inner_test, y_inner_test], ['X_inner_train', 'y_inner_train', 'X_inner_val', 'y_inner_val', 'X_inner_test', 'y_inner_test']):
                            np.save(f'{output_folder}data/{filename}_nCV_innerFold{inner_fold_index}_outerFold{outer_fold_index}.npy', arr)
                            pd.DataFrame(arr).to_csv(f'{output_folder}data/{filename}_nCV_innerFold{inner_fold_index}_outerFold{outer_fold_index}.csv', index=True)
        

                        ## train model using hyperparameter selection
                        # initialize classifier
                        fnn = nn.FNNClassifier(input_shape=(X_inner_train.shape[1],), num_classes=n_classes, compound_name=compound_name, class_names=np.arange(n_classes)+1)
                        # build model
                        fnn.build_model()
                        # train model
                        fnn.train(
                            X_inner_train,
                            y_inner_train,
                            X_inner_val,
                            y_inner_val,
                            batch_size=batch_size,
                            max_epochs=max_epochs
                        )
                        # save model
                        fnn.model.save(f'{output_folder}models/fnn_nCV_innerFold{inner_fold_index}_outerFold{outer_fold_index}.keras')
                        # np.save(f'{output_folder}models/cm_nCV_innerFold{inner_fold_index}_outerFold{outer_fold_index}.npy', fnn.confusion)
                        
                        # make predictions
                        proba = fnn.model.predict(X_inner_test, verbose=0)
                        inner_fold_proba = pd.DataFrame(columns=['messstellen_id', 'inner_fold_index', 'true_conc_group', 'probability', 'pred_conc_group_untuned', 'pred_conc_group_tuned'])
                        inner_fold_proba.true_conc_group = y_inner_test.reshape(-1)
                        inner_fold_proba.probability = proba.reshape(-1)
                        inner_fold_proba.inner_fold_index = inner_fold_index
                        inner_fold_proba.pred_conc_group_untuned = (inner_fold_proba.probability>=.5).astype('int')
                        inner_fold_proba.messstellen_id = mid_inner_test
                        # add prediction probabilities to data frame
                        inner_test_proba = pd.concat([inner_test_proba if not inner_test_proba.empty else None, inner_fold_proba], ignore_index=True)
                        run_count += 1
        
                    # calculate f1-score before threshold tuning
                    f1_untuned = f1_score(inner_test_proba.true_conc_group.values, inner_test_proba.pred_conc_group_untuned.values, pos_label=1, average='macro')
                    
                    ## optimize threshold
                    thresholds = np.arange(0, 1.01, 0.01)
                    f1_scores_tuning = []
                    for t in thresholds:
                        y_pred_t = (inner_test_proba.probability>=t).astype('int')
                        f1_t = f1_score(inner_test_proba.true_conc_group.values, y_pred_t, pos_label=1, average='macro')
                        f1_scores_tuning.append(f1_t)
                    # find highest f1-score after threshold tuning
                    i_maxf1 = np.argmax(f1_scores_tuning)
                    threshold_tuned = thresholds[i_maxf1]
                    f1_tuned = f1_scores_tuning[i_maxf1]
                    print(f'f1_tuned: {f1_tuned:.3f}')
                    inner_test_proba.pred_conc_group_tuned = (inner_test_proba.probability>=threshold_tuned).astype('int')
                    # safe predictions
                    inner_test_proba.to_csv(f'{output_folder}data/predictions_innerFoldsCombined_outerFold{outer_fold_index}.csv', index=False)
                    
                    cm_tuned = confusion_matrix(inner_test_proba.true_conc_group, inner_test_proba.pred_conc_group_tuned)
                    # safe confusion matrix of tuned predictions
                    pd.DataFrame(cm_tuned, columns=['1_pred', '2_pred'], index=['1_true', '2_true']).to_csv(f'{output_folder}data/cm_stacked_innerFoldsCombined_outerFold{outer_fold_index}.csv', index=True)
        
                    # time per inner fold
                    t2_inner_fold = time.time()
                    t_inner_fold = (t2_inner_fold-t1_inner_fold)/n_inner_folds
            
                    # save performance measures (f1-score etc.)
                    fold_performance = dict(compound_name=compound_name, feature_selection=feature_selection, test_set='inner_combined', batch_size=batch_size, max_epochs=max_epochs, outer_fold_index=outer_fold_index, f1_untuned=f1_untuned, threshold_tuned=threshold_tuned, f1_tuned=f1_tuned, f1_tuning_list=None, avg_time_per_fold=t_inner_fold)
                    fold_performance = pd.DataFrame(fold_performance, index=[0])
                    performance = pd.concat([performance if not performance.empty else None, fold_performance], ignore_index=True)
                    performance.f1_tuning_list = performance.f1_tuning_list.astype('object')
                    performance.at[performance.index[-1], 'f1_tuning_list'] = f1_scores_tuning
        
            t1_outer_fold = time.time()
                        
            # compare performance measure and get best hyperparameter combination and corresponding threshold
            best_batch_size = performance.loc[performance[performance.outer_fold_index==outer_fold_index].f1_tuned.idxmax(), 'batch_size']
            best_max_epochs = performance.loc[performance[performance.outer_fold_index==outer_fold_index].f1_tuned.idxmax(), 'max_epochs']
            best_threshold = performance.loc[performance[performance.outer_fold_index==outer_fold_index].f1_tuned.idxmax(), 'threshold_tuned']
        
            # split train data into train and validation data
            X_outer_train, X_outer_val, y_outer_train, y_outer_val = train_test_split(X_outer_train, y_outer_train, test_size=0.2, random_state=42, shuffle=True, stratify=y_outer_train)
        
            # Standardize features
            scaler = StandardScaler()
            X_outer_train = scaler.fit_transform(X_outer_train)
            X_outer_val = scaler.transform(X_outer_val)
            X_outer_test = scaler.transform(X_outer_test)
            # save scaler 
            dump(scaler, f'{output_folder}data/StdScaler_outerFold{outer_fold_index}.bin')

            # save data
            for arr, filename in zip([X_outer_train, y_outer_train, X_outer_val, y_outer_val, X_outer_test, y_outer_test], ['X_outer_train', 'y_outer_train', 'X_outer_val', 'y_outer_val', 'X_outer_test', 'y_outer_test']):
                np.save(f'{output_folder}data/{filename}_nCV_outerFold{outer_fold_index}.npy', arr)
                pd.DataFrame(arr).to_csv(f'{output_folder}data/{filename}_nCV_outerFold{outer_fold_index}.csv', index=True)
     
            print(time.asctime(time.localtime(time.time())), f'{run_count}/{n_runs}', '  outer fold', outer_fold_index, 'with tuned HP', end=' ')
            # train outer fold with best hyperparameter combination
            fnn = nn.FNNClassifier(input_shape=(X_inner_train.shape[1],), num_classes=n_classes, compound_name=compound_name, class_names=np.arange(n_classes)+1)
            fnn.build_model()
            fnn.train(
                X_outer_train,
                y_outer_train,
                X_outer_val,
                y_outer_val,
                batch_size=best_batch_size,
                max_epochs=best_max_epochs
            )
            # evaluate performance
            fnn.evaluate(
                X_outer_test,
                y_outer_test,
                # threshold=best_threshold
            )
            # safe model
            fnn.model.save(f'{output_folder}models/fnn_nCV_outerFold{outer_fold_index}.keras')
            # np.save(f'{output_folder}models/cm_nCV_outerFold{outer_fold_index}.npy', fnn.confusion)
            
            
            proba = fnn.model.predict(X_outer_test, verbose=0)
            y_predicted = np.array([1 if x >= 0.5 else 0 for x in proba])
            f1_untuned = f1_score(y_outer_test, y_predicted, pos_label=1, average='macro')
            y_predicted_bestThresh = np.array([1 if x >= best_threshold else 0 for x in proba])
            f1_tuned = f1_score(y_outer_test, y_predicted_bestThresh, pos_label=1, average='macro')
            print(f'f1_tuned: {f1_tuned:.3f}')

            # safe predictions
            outer_test_proba = pd.DataFrame(columns=['messstellen_id', 'outer_fold_index', 'true_conc_group', 'probability', 'pred_conc_group_untuned', 'pred_conc_group_tuned', 'used_best_threshold', 'pred_conc_group_tuned_new', 'tuned_threshold_new_unused'])
            outer_test_proba.outer_fold_index = outer_fold_index
            outer_test_proba.true_conc_group = y_outer_test.reshape(-1)
            outer_test_proba.probability = proba
            outer_test_proba.pred_conc_group_untuned = y_predicted
            outer_test_proba.pred_conc_group_tuned = y_predicted_bestThresh
            outer_test_proba.used_best_threshold = best_threshold
            outer_test_proba.messstellen_id = mid_outer_test
                    
            cm_tuned = confusion_matrix(inner_test_proba.true_conc_group, inner_test_proba.pred_conc_group_tuned)
            # safe confusion matrix of tuned predictions
            pd.DataFrame(cm_tuned, columns=['1_pred', '2_pred'], index=['1_true', '2_true']).to_csv(f'{output_folder}data/cm_stacked_outerFold_outerFold{outer_fold_index}.csv', index=True)
        
            # new threshold tuning (just for comparison)
            f1_scores_tuning = []
            for t in thresholds:
                y_pred_t = np.array([1 if x >= t else 0 for x in proba])
                f1_t = f1_score(y_outer_test, y_pred_t, pos_label=1, average='macro')
                f1_scores_tuning.append(f1_t)
            i_max_f1 = np.argmax(f1_scores_tuning)
            tuned_threshold_new_unused = thresholds[i_max_f1]
            y_pred_new = np.array([1 if x >= tuned_threshold_new_unused else 0 for x in proba])

            outer_test_proba.pred_conc_group_tuned_new = y_pred_new
            outer_test_proba.tuned_threshold_new_unused = tuned_threshold_new_unused
            outer_test_proba.to_csv(f'{output_folder}data/predictions_outerFold_outerFold{outer_fold_index}.csv', index=False)
        
            t2_outer_fold = time.time()
            t_outer_fold = t2_outer_fold-t1_outer_fold
            
            # save performance measures (f1-score etc.)
            fold_performance = dict(compound_name=compound_name, feature_selection=feature_selection, test_set='outer', batch_size=best_batch_size, max_epochs=best_max_epochs, outer_fold_index=outer_fold_index, f1_untuned=f1_untuned, threshold_tuned=best_threshold, f1_tuned=f1_tuned, f1_tuning_list=None, avg_time_per_fold=t_outer_fold) 
            fold_performance = pd.DataFrame(fold_performance, index=[0])
            performance = pd.concat([performance if not performance.empty else None, fold_performance], ignore_index=True)
            performance.f1_tuning_list = performance.f1_tuning_list.astype('object')
            performance.at[performance.index[-1], 'f1_tuning_list'] = f1_scores_tuning
        
            run_count += 1
        
        t_end = time.time()
        
        # safe performance data frame
        performance.to_csv(f'{output_folder}performances.csv', index=False)
        
        # print(f'total time: {(t_end-t_start)/n_runs} seconds')
        # print(f'average time per model run: {(t_end-t_start)/n_runs:.0f} seconds')


 ************************************************** 

 METOLACHLOR ESA 

feature_selection: A
relevant areas: ['area_sentinel_id_7', 'area_sentinel_id_9', 'area_sentinel_id_10']
shape of feature matrix: (8757, 17)
Thu Dec 12 08:57:24 2024 1/630 outer fold 0
Thu Dec 12 08:57:24 2024 1/630   batch size 512
Thu Dec 12 08:57:24 2024 1/630   max_epochs 500
Thu Dec 12 08:57:24 2024 1/630     inner fold 0
Thu Dec 12 08:58:23 2024 2/630     inner fold 1
Thu Dec 12 08:58:42 2024 3/630     inner fold 2
Thu Dec 12 08:59:40 2024 4/630     inner fold 3
Thu Dec 12 09:00:04 2024 5/630     inner fold 4 f1_tuned: 0.711
Thu Dec 12 09:00:30 2024 6/630   batch size 512
Thu Dec 12 09:00:30 2024 6/630   max_epochs 1000
Thu Dec 12 09:00:30 2024 6/630     inner fold 0
Thu Dec 12 09:01:38 2024 7/630     inner fold 1
Thu Dec 12 09:02:00 2024 8/630     inner fold 2
Thu Dec 12 09:03:58 2024 9/630     inner fold 3
Thu Dec 12 09:04:21 2024 10/630     inner fold 4 f1_tuned: 0.712
Thu Dec 12 09:04:46 2024 11/630   b