In [1]:
import pandas as pd
import numpy as np
import random
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import matplotlib
import warnings
import pickle
import geopandas as gpd
import os

# matplotlib.style.use('ggplot')
# plt.rcParams['axes.facecolor']='w'
warnings.filterwarnings('ignore')

import time

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.utils import class_weight
from sklearn.model_selection import KFold


In [None]:
# import sys
# import argparse
# parser = argparse.ArgumentParser(
#                     prog='Test Multiple Model',
#                     description='For Classification & Hurdle tasks, test the model performace',
#                     epilog='')
# parser.add_argument('-sp', '--species')      # option that takes a value
# parser.add_argument('-year','--year')
# parser.add_argument('-size', '--sample_size')  # on/off flag
# parser.add_argument('-o','--output_path', default='./Model_Performance/')
# args = parser.parse_args()
# print(args)

# sp = args.species
# year = int(args.year)
# SAMPLE_SIZE = int(args.sample_size)
# OUTPUT_DIR = os.path.join(args.output_path, sp)

# if not os.path.exists(OUTPUT_DIR):
#     os.makedirs(OUTPUT_DIR)

In [2]:
sp = 'Mallard'
year = 2020
SAMPLE_SIZE = 1000
OUTPUT_DIR = f'./Model_Performance/{sp}'

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)


In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
from BirdSTEM.utils.plot_gif import make_sample_gif, make_sample_gif_hexagon


# load training data


In [5]:
checklist_data = pd.read_csv(f'./BirdSTEM/dataset/test_data/checklist_data/checklist_data_filtered_{year}.csv')

### mallard 2020
with open(f'./BirdSTEM/dataset/test_data/sp_data/{sp}/{sp}_{year}.pkl','rb') as f:
    sp_data = pickle.load(f)
    
checklist_data = checklist_data.merge(sp_data, on='sampling_event_identifier', how='left')
checklist_data['count'] = checklist_data['count'].fillna(0)


# Train test split

In [6]:
from sklearn.model_selection import train_test_split
from BirdSTEM.dataset.get_test_x_names import get_test_x_names

x_names = get_test_x_names()
X = checklist_data[['sampling_event_identifier','longitude','latitude'] + x_names]
y = checklist_data['count'].values

_, X, _, y = train_test_split(X, y, test_size=SAMPLE_SIZE, stratify=np.where(y>0, 1, 0))
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.3,
                                                    stratify = np.where(y>0, 1, 0), shuffle=True)




# Test model

## Task1: Classification (modeling occurrence)

### First, without AdaSTEM wrapper

In [7]:
from BirdSTEM.model.AdaSTEM import AdaSTEM, AdaSTEMClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB, MultinomialNB, ComplementNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
from elapid import MaxentModel


In [8]:

cls_model_set_dict = {
    'LogisticRegression':LogisticRegression(random_state=42),
    'SGDClassifier':SGDClassifier(random_state=42),
    'GaussianNB':GaussianNB(),
    'KNeighborsClassifier':KNeighborsClassifier(),
    'DecisionTreeClassifier':DecisionTreeClassifier(random_state=42),
    # 'SVC_linear':SVC(kernel='linear',random_state=42),
    'SVC_rbf':SVC(kernel='rbf',random_state=42),
    'RandomForestClassifier':RandomForestClassifier(random_state=42),
    'QuadraticDiscriminantAnalysis':QuadraticDiscriminantAnalysis(),
    'MLPClassifier':MLPClassifier(random_state=42),
    'MaxEnt':MaxentModel(transform='cloglog', beta_multiplier=2.0),
    'XGBClassifier':XGBClassifier(tree_method='hist',random_state=42, n_jobs=1),
    'ComplementNB':ComplementNB(),
}


In [9]:
K=5

cls_metric_df_list = []
for item in list(cls_model_set_dict.keys())[::-1]:
    model_name = item
    model = cls_model_set_dict[model_name]
    
    kf = KFold(n_splits=K, shuffle=True, random_state=42).split(X, y)
    for kf_count, (train_index, test_index) in tqdm(enumerate(kf), desc=f'{model_name}', total=K):
        
        try:
            X_train = X.iloc[train_index].replace(-1,np.nan)
            imputer = SimpleImputer().fit(X_train[x_names])
            X_train[x_names] = imputer.transform(X_train[x_names])
            scaler = MinMaxScaler().fit(X_train[x_names])
            X_train[x_names] = scaler.transform(X_train[x_names])
            
            y_train = np.where(y[train_index]>0, 1, 0)
            
            X_test = X.iloc[test_index].replace(-1,np.nan)
            X_test[x_names] = imputer.transform(X_test[x_names])
            X_test[x_names] = scaler.transform(X_test[x_names])
            y_test = np.where(y[test_index]>0, 1, 0)
            
            
            sample_weights = class_weight.compute_sample_weight(class_weight='balanced',y=y_train)
            
            a = time.time()

            try:
                start_time = time.time()
                model.fit(X_train[x_names], y_train, sample_weight=sample_weights)
                finish_time = time.time()
                training_time = finish_time - start_time
            except:
                start_time = time.time()
                model.fit(X_train[x_names], y_train)
                finish_time = time.time()
                training_time = finish_time - start_time
                
            start_time = time.time()
            y_pred = model.predict(X_test[x_names])
            finish_time = time.time()
            predicting_time = finish_time - start_time
            
            y_pred = np.where(y_pred<0, 0, y_pred)
            metric_df = AdaSTEM.eval_STEM_res('classification', y_test, y_pred)
            
            metric_df['model'] = model_name
            metric_df['task_type'] = 'classification'
            metric_df['iter'] = kf_count
            metric_df['sp'] = sp
            metric_df['sample_size'] = SAMPLE_SIZE
            metric_df['training_time'] = training_time
            metric_df['predicting_time'] = predicting_time
            
            cls_metric_df_list.append(metric_df)
            
            print(metric_df)
            
        except Exception as e:
            print(e)
            continue



    

ComplementNB:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.5844674287788061, 'kappa': 0.09768125065575495, 'f1': 0.3174603174603175, 'precision': 0.21505376344086022, 'recall': 0.6060606060606061, 'average_precision': 0.19533561420658196, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'ComplementNB', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.004089832305908203, 'predicting_time': 0.002395153045654297}
{'AUC': 0.6283641916396259, 'kappa': 0.13001449975833723, 'f1': 0.338235294117647, 'precision': 0.21904761904761905, 'recall': 0.7419354838709677, 'average_precision': 0.20251920122887865, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'ComplementNB', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.0030660629272460938, 'predicting_time': 0.002087831497192383}
{'AUC': 0.5386499727817093, 'kappa':

XGBClassifier:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.5215024496461621, 'kappa': 0.05870696061431746, 'f1': 0.13636363636363635, 'precision': 0.2727272727272727, 'recall': 0.09090909090909091, 'average_precision': 0.17479338842975206, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'XGBClassifier', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.1596369743347168, 'predicting_time': 0.004869937896728516}
{'AUC': 0.5566902080549723, 'kappa': 0.12137310993052719, 'f1': 0.2456140350877193, 'precision': 0.2692307692307692, 'recall': 0.22580645161290322, 'average_precision': 0.1807940446650124, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'XGBClassifier', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.16182827949523926, 'predicting_time': 0.004032135009765625}
{'AUC': 0.5910905461803666, 'kappa': 

MaxEnt:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.5897296316457993, 'kappa': 0.12697393760431375, 'f1': 0.32, 'precision': 0.23880597014925373, 'recall': 0.48484848484848486, 'average_precision': 0.20078471279963817, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'MaxEnt', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 1.035444974899292, 'predicting_time': 0.01596212387084961}
{'AUC': 0.5660431380034358, 'kappa': 0.0888090349075975, 'f1': 0.2828282828282828, 'precision': 0.20588235294117646, 'recall': 0.45161290322580644, 'average_precision': 0.17797912713472486, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'MaxEnt', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 1.047292947769165, 'predicting_time': 0.01797199249267578}
{'AUC': 0.5382870622391581, 'kappa': 0.054649054649054674, 'f1': 0.26

MLPClassifier:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.5370168753402287, 'kappa': 0.08312958435207818, 'f1': 0.2105263157894737, 'precision': 0.25, 'recall': 0.18181818181818182, 'average_precision': 0.18045454545454548, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'MLPClassifier', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 2.462960958480835, 'predicting_time': 0.005415916442871094}
{'AUC': 0.5438060698606604, 'kappa': 0.11894273127753308, 'f1': 0.19047619047619047, 'precision': 0.36363636363636365, 'recall': 0.12903225806451613, 'average_precision': 0.18192082111436952, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'MLPClassifier', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 2.9744131565093994, 'predicting_time': 0.0026140213012695312}
{'AUC': 0.5334784975503538, 'kappa': 0.09790395330

QuadraticDiscriminantAnalysis:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.5866448920341136, 'kappa': 0.06652734238941138, 'f1': 0.32323232323232326, 'precision': 0.19393939393939394, 'recall': 0.9696969696969697, 'average_precision': 0.19306244260789715, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'QuadraticDiscriminantAnalysis', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.17801904678344727, 'predicting_time': 0.011209964752197266}
{'AUC': 0.5422790608894827, 'kappa': 0.03345163482594582, 'f1': 0.2808988764044944, 'precision': 0.17006802721088435, 'recall': 0.8064516129032258, 'average_precision': 0.16715163484748738, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'QuadraticDiscriminantAnalysis', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.017436981201171875, 'predicting_time': 0.0038869380950927734}
{

RandomForestClassifier:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.5121575031754672, 'kappa': 0.03902154921374501, 'f1': 0.05714285714285715, 'precision': 0.5, 'recall': 0.030303030303030304, 'average_precision': 0.17515151515151517, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'RandomForestClassifier', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.23572683334350586, 'predicting_time': 0.007096052169799805}
{'AUC': 0.4954189730864669, 'kappa': -0.013143483023001057, 'f1': 0.05128205128205128, 'precision': 0.125, 'recall': 0.03225806451612903, 'average_precision': 0.15403225806451612, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'RandomForestClassifier', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.16563677787780762, 'predicting_time': 0.006845235824584961}
{'AUC': 0.5303030303030303, 'kappa': 0.09

SVC_rbf:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.6566866267465069, 'kappa': 0.1978916007791911, 'f1': 0.38596491228070173, 'precision': 0.2716049382716049, 'recall': 0.6666666666666666, 'average_precision': 0.23606995884773663, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'SVC_rbf', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.0267636775970459, 'predicting_time': 0.016335248947143555}
{'AUC': 0.6757014697461348, 'kappa': 0.19922086354290658, 'f1': 0.3833333333333334, 'precision': 0.25842696629213485, 'recall': 0.7419354838709677, 'average_precision': 0.23173613628126136, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'SVC_rbf', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.027545928955078125, 'predicting_time': 0.01764702796936035}
{'AUC': 0.6291961531482491, 'kappa': 0.17528311176

DecisionTreeClassifier:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.6159499183451279, 'kappa': 0.22911437791323053, 'f1': 0.35820895522388063, 'precision': 0.35294117647058826, 'recall': 0.36363636363636365, 'average_precision': 0.23334224598930484, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'DecisionTreeClassifier', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.020498991012573242, 'predicting_time': 0.002701997756958008}
{'AUC': 0.6047909906470701, 'kappa': 0.1830610203401134, 'f1': 0.3287671232876712, 'precision': 0.2857142857142857, 'recall': 0.3870967741935484, 'average_precision': 0.20559907834101382, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'DecisionTreeClassifier', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.017735004425048828, 'predicting_time': 0.002502918243408203}
{'AUC': 0.600616

KNeighborsClassifier:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.5306659408455815, 'kappa': 0.07975460122699396, 'f1': 0.1702127659574468, 'precision': 0.2857142857142857, 'recall': 0.12121212121212122, 'average_precision': 0.17963203463203464, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'KNeighborsClassifier', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.002353191375732422, 'predicting_time': 0.02277088165283203}
{'AUC': 0.5158427180759687, 'kappa': 0.04082636497786518, 'f1': 0.13333333333333333, 'precision': 0.21428571428571427, 'recall': 0.0967741935483871, 'average_precision': 0.16073732718894007, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'KNeighborsClassifier', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.002552032470703125, 'predicting_time': 0.01469111442565918}
{'AUC': 0.54563600072

GaussianNB:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.5567047722736346, 'kappa': 0.04159733777038266, 'f1': 0.3076923076923077, 'precision': 0.18285714285714286, 'recall': 0.9696969696969697, 'average_precision': 0.18231601731601732, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'GaussianNB', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.0041637420654296875, 'predicting_time': 0.0022220611572265625}
{'AUC': 0.5564993319335751, 'kappa': 0.040294037571467545, 'f1': 0.29145728643216084, 'precision': 0.17261904761904762, 'recall': 0.9354838709677419, 'average_precision': 0.17148233486943165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'GaussianNB', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.0038051605224609375, 'predicting_time': 0.0022258758544921875}
{'AUC': 0.5077118490292143, 'kappa

SGDClassifier:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.6785519869352206, 'kappa': 0.19166342033502148, 'f1': 0.39416058394160586, 'precision': 0.25961538461538464, 'recall': 0.8181818181818182, 'average_precision': 0.24241258741258745, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'SGDClassifier', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.006205081939697266, 'predicting_time': 0.0032320022583007812}
{'AUC': 0.566711204428326, 'kappa': 0.0469159003959998, 'f1': 0.29702970297029696, 'precision': 0.17543859649122806, 'recall': 0.967741935483871, 'average_precision': 0.1747792869269949, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'SGDClassifier', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.017700672149658203, 'predicting_time': 0.0029799938201904297}
{'AUC': 0.5489929232444203, 'kappa

LogisticRegression:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.6750136091453457, 'kappa': 0.21130463358527762, 'f1': 0.4, 'precision': 0.27586206896551724, 'recall': 0.7272727272727273, 'average_precision': 0.24562695924764888, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'LogisticRegression', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.020888090133666992, 'predicting_time': 0.002315998077392578}
{'AUC': 0.6888719221225424, 'kappa': 0.21100330525642386, 'f1': 0.3934426229508197, 'precision': 0.26373626373626374, 'recall': 0.7741935483870968, 'average_precision': 0.23918291386033322, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'LogisticRegression', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.032016754150390625, 'predicting_time': 0.0022771358489990234}
{'AUC': 0.5927236436218474, 'kappa': 0

In [10]:
cls_metric_df = pd.DataFrame(cls_metric_df_list)
cls_metric_df.to_csv(os.path.join(OUTPUT_DIR, f'cls_metric_df_SIZE_{SAMPLE_SIZE}_SP_{sp}_year_{year}.csv'),
                                        index=False)


### Then, with AdaSTEM wrapper

In [11]:

K=5

Ada_cls_metric_df_list = []
for item in list(cls_model_set_dict.keys())[::-1]:
    model_name = item
    
    kf = KFold(n_splits=K, shuffle=True, random_state=42).split(X, y)
    for kf_count, (train_index, test_index) in tqdm(enumerate(kf), desc=f'AdaSTEM + {model_name}', total=K):
        
        try:
            X_train = X.iloc[train_index].replace(-1,np.nan)
            
            new_x_names = list(set(x_names) - set(['DOY']))
            
            imputer = SimpleImputer().fit(X_train[new_x_names])
            X_train[new_x_names] = imputer.transform(X_train[new_x_names])
            scaler = MinMaxScaler().fit(X_train[new_x_names])
            X_train[new_x_names] = scaler.transform(X_train[new_x_names])
            
            y_train = np.where(y[train_index]>0, 1, 0)
            X_test = X.iloc[test_index].replace(-1,np.nan)
            X_test[new_x_names] = imputer.transform(X_test[new_x_names])
            X_test[new_x_names] = scaler.transform(X_test[new_x_names])
            y_test = np.where(y[test_index]>0, 1, 0)
            
            model = AdaSTEMClassifier(base_model=cls_model_set_dict[model_name], 
                                        sample_weights_for_classifier=True,
                                        ensemble_fold=5,
                                        min_ensemble_require=3,
                                        grid_len_lon_upper_threshold=25,
                                        grid_len_lon_lower_threshold=5,
                                        grid_len_lat_upper_threshold=25,
                                        grid_len_lat_lower_threshold=5,
                                        points_lower_threshold=50,
                                        stixel_training_size_threshold=50,
                                        temporal_start = 1,
                                        temporal_end = 367, 
                                        temporal_step = 30.5, 
                                        temporal_bin_interval = 30.5,
                                        save_tmp=False, 
                                        save_gridding_plot=False)
            
            try:
                start_time = time.time()
                model.fit(X_train[new_x_names + ['DOY','longitude', 'latitude']], y_train)
                finish_time = time.time()
                training_time = finish_time - start_time
            except:
                start_time = time.time()
                model.set_params(**{'sample_weights_for_classifier':False})
                model.fit(X_train[new_x_names + ['DOY','longitude', 'latitude']], y_train)
                finish_time = time.time()
                training_time = finish_time - start_time
                
            start_time = time.time()
            y_pred = model.predict(X_test[new_x_names + ['DOY','longitude', 'latitude']])
            finish_time = time.time()
            predicting_time = finish_time - start_time
            
            y_pred = np.where(y_pred<0, 0, y_pred)
            metric_df = AdaSTEM.eval_STEM_res('classification', y_test, y_pred)
            
            metric_df['model'] = 'AdaSTEM_' + model_name
            metric_df['task_type'] = 'classification'
            metric_df['iter'] = kf_count
            metric_df['sp'] = sp
            metric_df['sample_size'] = SAMPLE_SIZE
            metric_df['training_time'] = training_time
            metric_df['predicting_time'] = predicting_time
        
            Ada_cls_metric_df_list.append(metric_df)
            print(metric_df)
            
        except Exception as e:
            print(e)
            continue



    

AdaSTEM + ComplementNB:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 22.70it/s]
training: 100%|██████████| 1231/1231 [00:00<00:00, 9645.45it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_ComplementNB', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3953828811645508, 'predicting_time': 0.720020055770874}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.96it/s]
training: 100%|██████████| 1170/1170 [00:00<00:00, 9625.34it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_ComplementNB', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3399977684020996, 'predicting_time': 0.7334439754486084}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 20.47it/s]
training: 100%|██████████| 1223/1223 [00:00<00:00, 9434.69it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_ComplementNB', 'task_type': 'classification', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.4179251194000244, 'predicting_time': 0.7439708709716797}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 24.55it/s]
training: 100%|██████████| 1220/1220 [00:00<00:00, 9559.19it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_ComplementNB', 'task_type': 'classification', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.37309789657592773, 'predicting_time': 0.7677369117736816}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.91it/s]
training: 100%|██████████| 1220/1220 [00:00<00:00, 9287.23it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_ComplementNB', 'task_type': 'classification', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3518059253692627, 'predicting_time': 0.79168701171875}


AdaSTEM + XGBClassifier:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 25.01it/s]
training: 100%|██████████| 1235/1235 [00:00<00:00, 8905.13it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_XGBClassifier', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.4044680595397949, 'predicting_time': 0.7440550327301025}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.44it/s]
training: 100%|██████████| 1196/1196 [00:00<00:00, 9577.97it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_XGBClassifier', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.34852004051208496, 'predicting_time': 0.7439398765563965}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 20.86it/s]
training: 100%|██████████| 1151/1151 [00:00<00:00, 9277.69it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_XGBClassifier', 'task_type': 'classification', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.40432286262512207, 'predicting_time': 0.7159640789031982}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 24.52it/s]
training: 100%|██████████| 1227/1227 [00:00<00:00, 9010.47it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_XGBClassifier', 'task_type': 'classification', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3902711868286133, 'predicting_time': 0.7511563301086426}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 25.56it/s]
training: 100%|██████████| 1246/1246 [00:00<00:00, 9124.07it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_XGBClassifier', 'task_type': 'classification', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3775019645690918, 'predicting_time': 0.7524428367614746}


AdaSTEM + MaxEnt:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble:   0%|          | 0/5 [00:00<?, ?it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 25.07it/s]
training: 100%|██████████| 1228/1228 [00:00<00:00, 8642.84it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_MaxEnt', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3871312141418457, 'predicting_time': 0.798738956451416}


Generating Ensemble:  40%|████      | 2/5 [00:00<00:00, 17.37it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 25.37it/s]
training: 100%|██████████| 1152/1152 [00:00<00:00, 9105.20it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_MaxEnt', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.37087202072143555, 'predicting_time': 0.7103831768035889}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 20.30it/s]
training: 100%|██████████| 1221/1221 [00:00<00:00, 9376.07it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_MaxEnt', 'task_type': 'classification', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.4224119186401367, 'predicting_time': 0.7341132164001465}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 23.80it/s]
training: 100%|██████████| 1197/1197 [00:00<00:00, 9432.56it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_MaxEnt', 'task_type': 'classification', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3785362243652344, 'predicting_time': 0.7348892688751221}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 22.00it/s]
training: 100%|██████████| 1210/1210 [00:00<00:00, 9715.45it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_MaxEnt', 'task_type': 'classification', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3923330307006836, 'predicting_time': 0.7117419242858887}


AdaSTEM + MLPClassifier:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 28.41it/s]
training: 100%|██████████| 1240/1240 [00:00<00:00, 9644.40it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_MLPClassifier', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.34571003913879395, 'predicting_time': 0.7510709762573242}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.79it/s]
training: 100%|██████████| 1182/1182 [00:00<00:00, 9780.81it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_MLPClassifier', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.34702587127685547, 'predicting_time': 0.7190248966217041}


Generating Ensemble:  80%|████████  | 4/5 [00:00<00:00, 17.96it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 28.74it/s]
training: 100%|██████████| 1250/1250 [00:00<00:00, 9875.57it/s] 


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_MLPClassifier', 'task_type': 'classification', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.34479427337646484, 'predicting_time': 0.7314629554748535}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 28.24it/s]
training: 100%|██████████| 1214/1214 [00:00<00:00, 9703.01it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_MLPClassifier', 'task_type': 'classification', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.34229111671447754, 'predicting_time': 0.7030420303344727}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 29.35it/s]
training: 100%|██████████| 1240/1240 [00:00<00:00, 9782.87it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_MLPClassifier', 'task_type': 'classification', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.38658714294433594, 'predicting_time': 0.7398800849914551}


AdaSTEM + QuadraticDiscriminantAnalysis:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 28.14it/s]
training: 100%|██████████| 1183/1183 [00:00<00:00, 9196.93it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_QuadraticDiscriminantAnalysis', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.34559082984924316, 'predicting_time': 0.7141501903533936}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.43it/s]
training: 100%|██████████| 1180/1180 [00:00<00:00, 8774.26it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_QuadraticDiscriminantAnalysis', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.35723400115966797, 'predicting_time': 0.6939578056335449}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 19.92it/s]
training: 100%|██████████| 1143/1143 [00:00<00:00, 9534.39it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_QuadraticDiscriminantAnalysis', 'task_type': 'classification', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.4152870178222656, 'predicting_time': 0.6862289905548096}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.34it/s]
training: 100%|██████████| 1232/1232 [00:00<00:00, 9618.08it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_QuadraticDiscriminantAnalysis', 'task_type': 'classification', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3534870147705078, 'predicting_time': 0.7314209938049316}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 26.26it/s]
training: 100%|██████████| 1208/1208 [00:00<00:00, 9457.09it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_QuadraticDiscriminantAnalysis', 'task_type': 'classification', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.36132001876831055, 'predicting_time': 0.7208240032196045}


AdaSTEM + RandomForestClassifier:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble:  80%|████████  | 4/5 [00:00<00:00, 17.06it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.78it/s]
training: 100%|██████████| 1195/1195 [00:00<00:00, 9144.14it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_RandomForestClassifier', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.35151100158691406, 'predicting_time': 0.7249720096588135}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 28.55it/s]
training: 100%|██████████| 1161/1161 [00:00<00:00, 8831.86it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_RandomForestClassifier', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.36213207244873047, 'predicting_time': 0.7138481140136719}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 21.21it/s]
training: 100%|██████████| 1168/1168 [00:00<00:00, 9566.95it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_RandomForestClassifier', 'task_type': 'classification', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3994443416595459, 'predicting_time': 0.7126262187957764}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.52it/s]
training: 100%|██████████| 1202/1202 [00:00<00:00, 9596.03it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_RandomForestClassifier', 'task_type': 'classification', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.35016512870788574, 'predicting_time': 0.7354559898376465}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 21.86it/s]
training: 100%|██████████| 1244/1244 [00:00<00:00, 9478.08it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_RandomForestClassifier', 'task_type': 'classification', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.40203213691711426, 'predicting_time': 0.7360851764678955}


AdaSTEM + SVC_rbf:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 25.50it/s]
training: 100%|██████████| 1220/1220 [00:00<00:00, 9343.23it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_SVC_rbf', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.37074995040893555, 'predicting_time': 0.7690229415893555}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 22.76it/s]
training: 100%|██████████| 1209/1209 [00:00<00:00, 9684.08it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_SVC_rbf', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.39814233779907227, 'predicting_time': 0.7390668392181396}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 17.44it/s]
training: 100%|██████████| 1256/1256 [00:00<00:00, 9268.35it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_SVC_rbf', 'task_type': 'classification', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.46923303604125977, 'predicting_time': 0.7618248462677002}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.19it/s]
training: 100%|██████████| 1240/1240 [00:00<00:00, 9394.57it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_SVC_rbf', 'task_type': 'classification', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3584129810333252, 'predicting_time': 0.7705631256103516}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 22.63it/s]
training: 100%|██████████| 1188/1188 [00:00<00:00, 9137.48it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_SVC_rbf', 'task_type': 'classification', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3973579406738281, 'predicting_time': 0.7764317989349365}


AdaSTEM + DecisionTreeClassifier:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 20.97it/s]
training: 100%|██████████| 1212/1212 [00:00<00:00, 9268.74it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_DecisionTreeClassifier', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.4123530387878418, 'predicting_time': 0.7467050552368164}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.20it/s]
training: 100%|██████████| 1167/1167 [00:00<00:00, 9453.53it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_DecisionTreeClassifier', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.34935903549194336, 'predicting_time': 0.7179009914398193}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 21.92it/s]
training: 100%|██████████| 1191/1191 [00:00<00:00, 9446.92it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_DecisionTreeClassifier', 'task_type': 'classification', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3985252380371094, 'predicting_time': 0.7365777492523193}


Generating Ensemble:   0%|          | 0/5 [00:00<?, ?it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 26.48it/s]
training: 100%|██████████| 1284/1284 [00:00<00:00, 6848.95it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_DecisionTreeClassifier', 'task_type': 'classification', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.4221160411834717, 'predicting_time': 0.8117110729217529}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 25.27it/s]
training: 100%|██████████| 1246/1246 [00:00<00:00, 8901.71it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_DecisionTreeClassifier', 'task_type': 'classification', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3820497989654541, 'predicting_time': 0.7443130016326904}


AdaSTEM + KNeighborsClassifier:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 19.83it/s]
training: 100%|██████████| 1205/1205 [00:00<00:00, 9352.17it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_KNeighborsClassifier', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.42682409286499023, 'predicting_time': 0.7568790912628174}


Generating Ensemble:  80%|████████  | 4/5 [00:00<00:00, 20.51it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 26.25it/s]
training: 100%|██████████| 1216/1216 [00:00<00:00, 9432.74it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_KNeighborsClassifier', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3665587902069092, 'predicting_time': 0.7804107666015625}


Generating Ensemble:   0%|          | 0/5 [00:00<?, ?it/s]
Generating Ensemble:  80%|████████  | 4/5 [00:00<00:00, 22.90it/s]


zero-size array to reduction operation maximum which has no identity


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 25.78it/s]
training: 100%|██████████| 1197/1197 [00:00<00:00, 9087.27it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_KNeighborsClassifier', 'task_type': 'classification', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3708369731903076, 'predicting_time': 0.7389659881591797}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 19.95it/s]
training: 100%|██████████| 1209/1209 [00:00<00:00, 8988.99it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_KNeighborsClassifier', 'task_type': 'classification', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.42641592025756836, 'predicting_time': 0.7114191055297852}


AdaSTEM + GaussianNB:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 25.18it/s]
training: 100%|██████████| 1198/1198 [00:00<00:00, 9367.00it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_GaussianNB', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3773651123046875, 'predicting_time': 0.6975350379943848}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 28.20it/s]
training: 100%|██████████| 1134/1134 [00:00<00:00, 9486.69it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_GaussianNB', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.33600401878356934, 'predicting_time': 0.7067949771881104}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 19.75it/s]
training: 100%|██████████| 1278/1278 [00:00<00:00, 9094.35it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_GaussianNB', 'task_type': 'classification', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.43790388107299805, 'predicting_time': 0.8675720691680908}


Generating Ensemble:   0%|          | 0/5 [00:00<?, ?it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 22.42it/s]
training: 100%|██████████| 1183/1183 [00:00<00:00, 7559.35it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_GaussianNB', 'task_type': 'classification', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.425616979598999, 'predicting_time': 0.7566490173339844}


Generating Ensemble:   0%|          | 0/5 [00:00<?, ?it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 16.95it/s]
training: 100%|██████████| 1150/1150 [00:00<00:00, 8958.71it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_GaussianNB', 'task_type': 'classification', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.47662973403930664, 'predicting_time': 0.7099237442016602}


AdaSTEM + SGDClassifier:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.15it/s]
training: 100%|██████████| 1192/1192 [00:00<00:00, 9494.17it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_SGDClassifier', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.35045623779296875, 'predicting_time': 0.761681079864502}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 25.93it/s]
training: 100%|██████████| 1187/1187 [00:00<00:00, 9370.89it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_SGDClassifier', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.36292386054992676, 'predicting_time': 0.7569379806518555}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 20.55it/s]
training: 100%|██████████| 1152/1152 [00:00<00:00, 9265.43it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_SGDClassifier', 'task_type': 'classification', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.4103519916534424, 'predicting_time': 0.6788530349731445}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 25.97it/s]
training: 100%|██████████| 1276/1276 [00:00<00:00, 9613.28it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_SGDClassifier', 'task_type': 'classification', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.36809706687927246, 'predicting_time': 0.7397670745849609}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 26.66it/s]
training: 100%|██████████| 1247/1247 [00:00<00:00, 9226.85it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_SGDClassifier', 'task_type': 'classification', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.41705799102783203, 'predicting_time': 0.733971118927002}


AdaSTEM + LogisticRegression:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble:  20%|██        | 1/5 [00:00<00:00, 13.63it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 26.58it/s]
training: 100%|██████████| 1196/1196 [00:00<00:00, 9209.79it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_LogisticRegression', 'task_type': 'classification', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.35836315155029297, 'predicting_time': 0.6936187744140625}


Generating Ensemble:  40%|████      | 2/5 [00:00<00:00, 16.21it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 21.54it/s]
training: 100%|██████████| 1171/1171 [00:00<00:00, 9372.42it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_LogisticRegression', 'task_type': 'classification', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3970770835876465, 'predicting_time': 0.6957738399505615}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 28.75it/s]
training: 100%|██████████| 1225/1225 [00:00<00:00, 9639.89it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_LogisticRegression', 'task_type': 'classification', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.34155988693237305, 'predicting_time': 0.7393488883972168}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.38it/s]
training: 100%|██████████| 1206/1206 [00:00<00:00, 9892.07it/s] 


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_LogisticRegression', 'task_type': 'classification', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3452491760253906, 'predicting_time': 0.7037067413330078}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 22.74it/s]
training: 100%|██████████| 1245/1245 [00:00<00:00, 9639.07it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': nan, 'MAE': nan, 'MSE': nan, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_LogisticRegression', 'task_type': 'classification', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.39054298400878906, 'predicting_time': 0.7296769618988037}


In [12]:
Ada_cls_metric_df = pd.DataFrame(Ada_cls_metric_df_list)
Ada_cls_metric_df.to_csv(os.path.join(OUTPUT_DIR, f'Ada_cls_metric_df_SIZE_{SAMPLE_SIZE}_SP_{sp}_year_{year}.csv'),
                                        index=False)


## Task2: Regression (Hurdle)

### First, without AdaSTEM wrapper

In [13]:
from BirdSTEM.model.AdaSTEM import AdaSTEM, AdaSTEMRegressor, AdaSTEMHurdle
from BirdSTEM.model.Hurdle import Hurdle
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import Lasso, LinearRegression, BayesianRidge, SGDRegressor
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBClassifier, XGBRegressor


In [14]:
reg_model_set_dict = {
    'Hurdle_SGDClassifier_SGDRegressor':Hurdle(classifier=SGDClassifier(random_state=42), 
                                               regressor=SGDRegressor(random_state=42)),
    'Hurdle_Logistic_Linear':Hurdle(classifier=LogisticRegression(random_state=42), regressor=LinearRegression()),
    'Hurdle_SVC_SVR':Hurdle(classifier=SVC(kernel='rbf'), regressor=SVR(kernel='rbf')),
    'Hurdle_DecisionTreeClassifier_DecisionTreeRegressor':Hurdle(classifier=DecisionTreeClassifier(random_state=42),
                                                regressor=DecisionTreeRegressor(random_state=42)),
    'Hurdle_RandomforestClassifier_RandomforestRegressor':Hurdle(classifier=RandomForestClassifier(random_state=42),
                                                regressor=RandomForestRegressor(random_state=42)),
    'Hurdle_MLPClassifier_MLPRegressor': Hurdle(classifier=MLPClassifier(random_state=42),
                                                regressor=MLPRegressor(random_state=42)),
    'Hurdle_XGBClassifier_XGBregressor': Hurdle(classifier=XGBClassifier(tree_method='hist',n_jobs=1), 
                    regressor=XGBRegressor(tree_method='hist',n_jobs=1))
}

In [15]:
K=5

reg_metric_df_list = []
for item in list(reg_model_set_dict.keys())[::-1]:
    model_name = item
    model = reg_model_set_dict[model_name]
    
    kf = KFold(n_splits=K, shuffle=True, random_state=42).split(X, y)
    for kf_count, (train_index, test_index) in tqdm(enumerate(kf), desc=f'{model_name}', total=K):
        X_train = X.iloc[train_index].replace(-1,np.nan)
        imputer = SimpleImputer().fit(X_train[x_names])
        X_train[x_names] = imputer.transform(X_train[x_names])
        scaler = MinMaxScaler().fit(X_train[x_names])
        X_train[x_names] = scaler.transform(X_train[x_names])
        
        y_train = np.where(y[train_index]>0, 1, 0)
        X_test = X.iloc[test_index].replace(-1,np.nan)
        X_test[x_names] = imputer.transform(X_test[x_names])
        X_test[x_names] = scaler.transform(X_test[x_names])
        y_test = np.where(y[test_index]>0, 1, 0)
        
        sample_weights = class_weight.compute_sample_weight(class_weight='balanced',y=np.where(y_train>0,1,0))
        
        
        try:
            start_time = time.time()
            model.fit(X_train[x_names], y_train, sample_weight=sample_weights)
            finish_time = time.time()
            training_time = finish_time - start_time
        except:
            start_time = time.time()
            model.fit(X_train[x_names], y_train)
            finish_time = time.time()
            training_time = finish_time - start_time
            
        start_time = time.time()
        y_pred = model.predict(X_test[x_names])
        y_pred = np.where(y_pred<0, 0, y_pred)
        finish_time = time.time()
        predicting_time = finish_time - start_time
        
        metric_df = AdaSTEM.eval_STEM_res('hurdle', y_test, np.array(y_pred).flatten())
        metric_df['model'] = model_name
        metric_df['task_type'] = 'hurdle'
        metric_df['iter'] = kf_count
        metric_df['sp'] = sp
        metric_df['sample_size'] = SAMPLE_SIZE
        metric_df['training_time'] = training_time
        metric_df['predicting_time'] = predicting_time
        
        reg_metric_df_list.append(metric_df)
        print(metric_df)


    

Hurdle_XGBClassifier_XGBregressor:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.5215024496461621, 'kappa': 0.05870696061431746, 'f1': 0.13636363636363635, 'precision': 0.2727272727272727, 'recall': 0.09090909090909091, 'average_precision': 0.17479338842975206, 'Spearman_r': 0.07001738646266024, 'Pearson_r': 0.07001738646266048, 'R2': -0.3790600270849471, 'MAE': 0.1899999985098839, 'MSE': 0.18999999523162864, 'poisson_deviance_explained': -1.0524139392974612, 'model': 'Hurdle_XGBClassifier_XGBregressor', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.12179875373840332, 'predicting_time': 0.005148887634277344}
{'AUC': 0.5566902080549723, 'kappa': 0.12137310993052719, 'f1': 0.2456140350877193, 'precision': 0.2692307692307692, 'recall': 0.22580645161290322, 'average_precision': 0.1807940446650124, 'Spearman_r': 0.1220115509320933, 'Pearson_r': 0.12201155093209345, 'R2': -0.6415345575500515, 'MAE': 0.2149999964237213, 'MSE': 0.21499998867511796, 'poisson_deviance_explained': -1.068521343709548, 'model': 'Hurdle_XGBC

Hurdle_MLPClassifier_MLPRegressor:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.5370168753402287, 'kappa': 0.08312958435207818, 'f1': 0.2105263157894737, 'precision': 0.25, 'recall': 0.18181818181818182, 'average_precision': 0.18045454545454548, 'Spearman_r': 0.07275893398882982, 'Pearson_r': 0.07611319322640758, 'R2': -0.6485258530139979, 'MAE': 0.22789166247862927, 'MSE': 0.2271256493990036, 'poisson_deviance_explained': -1.1857145328122525, 'model': 'Hurdle_MLPClassifier_MLPRegressor', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 2.663719892501831, 'predicting_time': 0.005728006362915039}
{'AUC': 0.5438060698606604, 'kappa': 0.11894273127753308, 'f1': 0.19047619047619047, 'precision': 0.36363636363636365, 'recall': 0.12903225806451613, 'average_precision': 0.18192082111436952, 'Spearman_r': 0.1432451850855742, 'Pearson_r': 0.14467289887478968, 'R2': -0.2891652070826616, 'MAE': 0.1699103551111154, 'MSE': 0.16884841299765158, 'poisson_deviance_explained': -0.6992782798593224, 'model': 'Hurdle_MLPClassifier_MLP

Hurdle_RandomforestClassifier_RandomforestRegressor:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.5121575031754672, 'kappa': 0.03902154921374501, 'f1': 0.05714285714285715, 'precision': 0.5, 'recall': 0.030303030303030304, 'average_precision': 0.17515151515151517, 'Spearman_r': 0.09070724987208367, 'Pearson_r': 0.09070724987208345, 'R2': -0.19760479041916135, 'MAE': 0.165, 'MSE': 0.165, 'poisson_deviance_explained': -0.4426950408889634, 'model': 'Hurdle_RandomforestClassifier_RandomforestRegressor', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.24950194358825684, 'predicting_time': 0.01049494743347168}
{'AUC': 0.4954189730864669, 'kappa': -0.013143483023001057, 'f1': 0.05128205128205128, 'precision': 0.125, 'recall': 0.03225806451612903, 'average_precision': 0.15403225806451612, 'Spearman_r': -0.0169208209640023, 'Pearson_r': -0.01692082096400254, 'R2': -0.412483298339378, 'MAE': 0.185, 'MSE': 0.185, 'poisson_deviance_explained': -2.3662884287409147, 'model': 'Hurdle_RandomforestClassifier_RandomforestRegressor', 'task_type': '

Hurdle_DecisionTreeClassifier_DecisionTreeRegressor:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.6159499183451279, 'kappa': 0.22911437791323053, 'f1': 0.35820895522388063, 'precision': 0.35294117647058826, 'recall': 0.36363636363636365, 'average_precision': 0.23334224598930484, 'Spearman_r': 0.2291512052259617, 'Pearson_r': 0.22915120522596222, 'R2': -0.5605153329704224, 'MAE': 0.215, 'MSE': 0.215, 'poisson_deviance_explained': -0.7603596065508249, 'model': 'Hurdle_DecisionTreeClassifier_DecisionTreeRegressor', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.018878936767578125, 'predicting_time': 0.0034258365631103516}
{'AUC': 0.6047909906470701, 'kappa': 0.1830610203401134, 'f1': 0.3287671232876712, 'precision': 0.2857142857142857, 'recall': 0.3870967741935484, 'average_precision': 0.20559907834101382, 'Spearman_r': 0.18621941252002078, 'Pearson_r': 0.18621941252002122, 'R2': -0.87058598969269, 'MAE': 0.245, 'MSE': 0.245, 'poisson_deviance_explained': -0.9955890003698202, 'model': 'Hurdle_DecisionTreeClassifier_DecisionTreeRegr

Hurdle_SVC_SVR:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.6566866267465069, 'kappa': 0.1978916007791911, 'f1': 0.38596491228070173, 'precision': 0.2716049382716049, 'recall': 0.6666666666666666, 'average_precision': 0.23606995884773663, 'Spearman_r': 0.23695241640037012, 'Pearson_r': 0.2369524164003698, 'R2': -1.5403737978588272, 'MAE': 0.35, 'MSE': 0.35, 'poisson_deviance_explained': -1.0575451845646606, 'model': 'Hurdle_SVC_SVR', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.06715130805969238, 'predicting_time': 0.0372159481048584}
{'AUC': 0.6757014697461348, 'kappa': 0.19922086354290658, 'f1': 0.3833333333333334, 'precision': 0.25842696629213485, 'recall': 0.7419354838709677, 'average_precision': 0.23173613628126136, 'Spearman_r': 0.25590165614489674, 'Pearson_r': 0.2559016561448975, 'R2': -1.824966596678756, 'MAE': 0.37, 'MSE': 0.37, 'poisson_deviance_explained': -1.1206679648006124, 'model': 'Hurdle_SVC_SVR', 'task_type': 'hurdle', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'tr

Hurdle_Logistic_Linear:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.6750136091453457, 'kappa': 0.21130463358527762, 'f1': 0.4, 'precision': 0.27586206896551724, 'recall': 0.7272727272727273, 'average_precision': 0.24562695924764888, 'Spearman_r': 0.26207052220492566, 'Pearson_r': 0.26207052220492627, 'R2': -1.6129559063690793, 'MAE': 0.36, 'MSE': 0.36, 'poisson_deviance_explained': -1.0382740686071994, 'model': 'Hurdle_Logistic_Linear', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.5157120227813721, 'predicting_time': 0.02067875862121582}
{'AUC': 0.6888719221225424, 'kappa': 0.21100330525642386, 'f1': 0.3934426229508197, 'precision': 0.26373626373626374, 'recall': 0.7741935483870968, 'average_precision': 0.23918291386033322, 'Spearman_r': 0.27452863446707226, 'Pearson_r': 0.274528634467073, 'R2': -1.824966596678756, 'MAE': 0.37, 'MSE': 0.37, 'poisson_deviance_explained': -1.094578914678094, 'model': 'Hurdle_Logistic_Linear', 'task_type': 'hurdle', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 't

Hurdle_SGDClassifier_SGDRegressor:   0%|          | 0/5 [00:00<?, ?it/s]

{'AUC': 0.6785519869352206, 'kappa': 0.19166342033502148, 'f1': 0.39416058394160586, 'precision': 0.25961538461538464, 'recall': 0.8181818181818182, 'average_precision': 0.24241258741258745, 'Spearman_r': 0.2663251094697319, 'Pearson_r': 0.2684835157064531, 'R2': -2.109564812123868, 'MAE': 0.4281144706537539, 'MSE': 0.428420291990366, 'poisson_deviance_explained': -1.1447135965306656, 'model': 'Hurdle_SGDClassifier_SGDRegressor', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.0177462100982666, 'predicting_time': 0.013032197952270508}
{'AUC': 0.566711204428326, 'kappa': 0.0469159003959998, 'f1': 0.29702970297029696, 'precision': 0.17543859649122806, 'recall': 0.967741935483871, 'average_precision': 0.1747792869269949, 'Spearman_r': -0.06363098131705121, 'Pearson_r': 0.10058117411042188, 'R2': -4.471377724261052, 'MAE': 0.7189944774368067, 'MSE': 0.7166136974350912, 'poisson_deviance_explained': -1.7058023170438, 'model': 'Hurdle_SGDClassifier_

In [16]:
reg_metric_df = pd.DataFrame(reg_metric_df_list)
reg_metric_df.to_csv(os.path.join(OUTPUT_DIR, f'hurdle_metric_df_SIZE_{SAMPLE_SIZE}_SP_{sp}_year_{year}.csv'),
                                        index=False)


### Then, with AdaSTEM wrapper

In [17]:
K=5

reg_metric_df_list = []
for item in list(reg_model_set_dict.keys())[::-1]:
    model_name = item
    model = reg_model_set_dict[model_name]

    kf = KFold(n_splits=K, shuffle=True, random_state=42).split(X, y)
    for kf_count, (train_index, test_index) in tqdm(enumerate(kf), desc=f'AdaSTEM + {model_name}', total=K):
        
        try:
            X_train = X.iloc[train_index].replace(-1,np.nan)
            
            new_x_names = list(set(x_names) - set(['DOY']))

            imputer = SimpleImputer().fit(X_train[new_x_names])
            X_train[new_x_names] = imputer.transform(X_train[new_x_names])
            scaler = MinMaxScaler().fit(X_train[new_x_names])
            X_train[new_x_names] = scaler.transform(X_train[new_x_names])

            y_train = y[train_index]
            X_test = X.iloc[test_index].replace(-1,np.nan)
            X_test[new_x_names] = imputer.transform(X_test[new_x_names])
            X_test[new_x_names] = scaler.transform(X_test[new_x_names])
            y_test = y[test_index]

            model = AdaSTEMClassifier(base_model=reg_model_set_dict[model_name], 
                                            sample_weights_for_classifier=True,
                                            ensemble_fold=5,
                                            min_ensemble_require=3,
                                            grid_len_lon_upper_threshold=25,
                                            grid_len_lon_lower_threshold=5,
                                            grid_len_lat_upper_threshold=25,
                                            grid_len_lat_lower_threshold=5,
                                            points_lower_threshold=50,
                                            stixel_training_size_threshold=50,
                                            temporal_start = 1,
                                            temporal_end = 367, 
                                            temporal_step = 30.5, 
                                            temporal_bin_interval = 30.5,
                                            save_tmp=False, 
                                            save_gridding_plot=False)
            
            try:
                start_time = time.time()
                model.fit(X_train[new_x_names + ['DOY','longitude', 'latitude']], y_train)
                finish_time = time.time()
                training_time = finish_time - start_time
                
            except:
                start_time = time.time()
                model.set_params(**{'sample_weights_for_classifier':False})
                model.fit(X_train[new_x_names + ['DOY','longitude', 'latitude']], y_train)
                finish_time = time.time()
                training_time = finish_time - start_time
                
            start_time = time.time()
            y_pred = model.predict(X_test[new_x_names + ['DOY','longitude', 'latitude']])
            y_pred = np.where(y_pred<0, 0, y_pred)
            finish_time = time.time()
            predicting_time_time = finish_time - start_time
                
            metric_df = AdaSTEM.eval_STEM_res('hurdle', y_test, np.array(y_pred).flatten())
            
            metric_df['model'] = 'AdaSTEM_' + model_name
            metric_df['task_type'] = 'hurdle'
            metric_df['iter'] = kf_count
            metric_df['sp'] = sp
            metric_df['sample_size'] = SAMPLE_SIZE
            metric_df['training_time'] = training_time
            metric_df['predicting_time'] = predicting_time
            
            reg_metric_df_list.append(metric_df)
            print(metric_df)
            
        except Exception as e:
            print(e)
            continue


    

AdaSTEM + Hurdle_XGBClassifier_XGBregressor:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 13.60it/s]
training: 100%|██████████| 1191/1191 [00:00<00:00, 8819.93it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.03706800603393301, 'MAE': 2.025, 'MSE': 114.725, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_XGBClassifier_XGBregressor', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.5594649314880371, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 26.31it/s]
training: 100%|██████████| 1177/1177 [00:00<00:00, 9457.29it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06496439224344774, 'MAE': 1.705, 'MSE': 47.655, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_XGBClassifier_XGBregressor', 'task_type': 'hurdle', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3567509651184082, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 28.23it/s]
training: 100%|██████████| 1194/1194 [00:00<00:00, 7063.43it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.023765411003576542, 'MAE': 6.64, 'MSE': 1899.29, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_XGBClassifier_XGBregressor', 'task_type': 'hurdle', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3876960277557373, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.47it/s]
training: 100%|██████████| 1245/1245 [00:00<00:00, 9405.51it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.02458410258839927, 'MAE': 4.105, 'MSE': 702.295, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_XGBClassifier_XGBregressor', 'task_type': 'hurdle', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.35794496536254883, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.64it/s]
training: 100%|██████████| 1173/1173 [00:00<00:00, 9797.45it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06920289560797221, 'MAE': 4.23, 'MSE': 276.45, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_XGBClassifier_XGBregressor', 'task_type': 'hurdle', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.33987975120544434, 'predicting_time': 0.017216205596923828}


AdaSTEM + Hurdle_MLPClassifier_MLPRegressor:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble:  20%|██        | 1/5 [00:00<00:00,  7.95it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 23.89it/s]
training: 100%|██████████| 1202/1202 [00:00<00:00, 9521.26it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.03706800603393301, 'MAE': 2.025, 'MSE': 114.725, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_MLPClassifier_MLPRegressor', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3807551860809326, 'predicting_time': 0.017216205596923828}


Generating Ensemble:  20%|██        | 1/5 [00:00<00:00, 15.69it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 29.44it/s]
training: 100%|██████████| 1106/1106 [00:00<00:00, 9715.99it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06496439224344774, 'MAE': 1.705, 'MSE': 47.655, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_MLPClassifier_MLPRegressor', 'task_type': 'hurdle', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.32257509231567383, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 22.02it/s]
training: 100%|██████████| 1154/1154 [00:00<00:00, 9463.97it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.023765411003576542, 'MAE': 6.64, 'MSE': 1899.29, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_MLPClassifier_MLPRegressor', 'task_type': 'hurdle', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3931388854980469, 'predicting_time': 0.017216205596923828}


Generating Ensemble:  20%|██        | 1/5 [00:00<00:00, 14.73it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.17it/s]
training: 100%|██████████| 1246/1246 [00:00<00:00, 9632.34it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.02458410258839927, 'MAE': 4.105, 'MSE': 702.295, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_MLPClassifier_MLPRegressor', 'task_type': 'hurdle', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3574380874633789, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 21.96it/s]
training: 100%|██████████| 1221/1221 [00:00<00:00, 9743.52it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06920289560797221, 'MAE': 4.23, 'MSE': 276.45, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_MLPClassifier_MLPRegressor', 'task_type': 'hurdle', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3942139148712158, 'predicting_time': 0.017216205596923828}


AdaSTEM + Hurdle_RandomforestClassifier_RandomforestRegressor:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 26.90it/s]
training: 100%|██████████| 1235/1235 [00:00<00:00, 9686.51it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.03706800603393301, 'MAE': 2.025, 'MSE': 114.725, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_RandomforestClassifier_RandomforestRegressor', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.35517120361328125, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 28.98it/s]
training: 100%|██████████| 1198/1198 [00:00<00:00, 9547.87it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06496439224344774, 'MAE': 1.705, 'MSE': 47.655, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_RandomforestClassifier_RandomforestRegressor', 'task_type': 'hurdle', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.33792877197265625, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 21.63it/s]
training: 100%|██████████| 1212/1212 [00:00<00:00, 9712.52it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.023765411003576542, 'MAE': 6.64, 'MSE': 1899.29, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_RandomforestClassifier_RandomforestRegressor', 'task_type': 'hurdle', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3976879119873047, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.86it/s]
training: 100%|██████████| 1222/1222 [00:00<00:00, 9928.17it/s] 


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.02458410258839927, 'MAE': 4.105, 'MSE': 702.295, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_RandomforestClassifier_RandomforestRegressor', 'task_type': 'hurdle', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.34912919998168945, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 20.32it/s]
training: 100%|██████████| 1209/1209 [00:00<00:00, 8999.15it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06920289560797221, 'MAE': 4.23, 'MSE': 276.45, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_RandomforestClassifier_RandomforestRegressor', 'task_type': 'hurdle', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.42404699325561523, 'predicting_time': 0.017216205596923828}


AdaSTEM + Hurdle_DecisionTreeClassifier_DecisionTreeRegressor:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 24.27it/s]
training: 100%|██████████| 1248/1248 [00:00<00:00, 9861.87it/s] 


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.03706800603393301, 'MAE': 2.025, 'MSE': 114.725, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_DecisionTreeClassifier_DecisionTreeRegressor', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.37648892402648926, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.29it/s]
training: 100%|██████████| 1169/1169 [00:00<00:00, 8884.66it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06496439224344774, 'MAE': 1.705, 'MSE': 47.655, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_DecisionTreeClassifier_DecisionTreeRegressor', 'task_type': 'hurdle', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.36539387702941895, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 20.38it/s]
training: 100%|██████████| 1225/1225 [00:00<00:00, 9602.13it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.023765411003576542, 'MAE': 6.64, 'MSE': 1899.29, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_DecisionTreeClassifier_DecisionTreeRegressor', 'task_type': 'hurdle', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.4183921813964844, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 29.09it/s]
training: 100%|██████████| 1184/1184 [00:00<00:00, 9994.44it/s] 


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.02458410258839927, 'MAE': 4.105, 'MSE': 702.295, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_DecisionTreeClassifier_DecisionTreeRegressor', 'task_type': 'hurdle', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3329920768737793, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 26.70it/s]
training: 100%|██████████| 1206/1206 [00:00<00:00, 9555.50it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06920289560797221, 'MAE': 4.23, 'MSE': 276.45, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_DecisionTreeClassifier_DecisionTreeRegressor', 'task_type': 'hurdle', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.35375499725341797, 'predicting_time': 0.017216205596923828}


AdaSTEM + Hurdle_SVC_SVR:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble:   0%|          | 0/5 [00:00<?, ?it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.06it/s]
training: 100%|██████████| 1196/1196 [00:00<00:00, 9382.97it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.03706800603393301, 'MAE': 2.025, 'MSE': 114.725, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_SVC_SVR', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.35434985160827637, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 22.86it/s]
training: 100%|██████████| 1222/1222 [00:00<00:00, 9166.06it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06496439224344774, 'MAE': 1.705, 'MSE': 47.655, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_SVC_SVR', 'task_type': 'hurdle', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.4001450538635254, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 21.54it/s]
training: 100%|██████████| 1202/1202 [00:00<00:00, 9543.77it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.023765411003576542, 'MAE': 6.64, 'MSE': 1899.29, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_SVC_SVR', 'task_type': 'hurdle', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.406296968460083, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 28.16it/s]
training: 100%|██████████| 1191/1191 [00:00<00:00, 9726.41it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.02458410258839927, 'MAE': 4.105, 'MSE': 702.295, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_SVC_SVR', 'task_type': 'hurdle', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3403739929199219, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.43it/s]
training: 100%|██████████| 1180/1180 [00:00<00:00, 9659.94it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06920289560797221, 'MAE': 4.23, 'MSE': 276.45, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_SVC_SVR', 'task_type': 'hurdle', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3476238250732422, 'predicting_time': 0.017216205596923828}


AdaSTEM + Hurdle_Logistic_Linear:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 18.50it/s]
training: 100%|██████████| 1213/1213 [00:00<00:00, 9639.30it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.03706800603393301, 'MAE': 2.025, 'MSE': 114.725, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_Logistic_Linear', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.439436674118042, 'predicting_time': 0.017216205596923828}


Generating Ensemble:  80%|████████  | 4/5 [00:00<00:00, 19.82it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 26.16it/s]
training: 100%|██████████| 1166/1166 [00:00<00:00, 9461.21it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06496439224344774, 'MAE': 1.705, 'MSE': 47.655, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_Logistic_Linear', 'task_type': 'hurdle', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.35648202896118164, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 22.28it/s]
training: 100%|██████████| 1172/1172 [00:00<00:00, 9673.79it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.023765411003576542, 'MAE': 6.64, 'MSE': 1899.29, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_Logistic_Linear', 'task_type': 'hurdle', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.38605213165283203, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 28.68it/s]
training: 100%|██████████| 1207/1207 [00:00<00:00, 9860.07it/s] 


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.02458410258839927, 'MAE': 4.105, 'MSE': 702.295, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_Logistic_Linear', 'task_type': 'hurdle', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3379628658294678, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 28.95it/s]
training: 100%|██████████| 1207/1207 [00:00<00:00, 10106.09it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06920289560797221, 'MAE': 4.23, 'MSE': 276.45, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_Logistic_Linear', 'task_type': 'hurdle', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.3334231376647949, 'predicting_time': 0.017216205596923828}


AdaSTEM + Hurdle_SGDClassifier_SGDRegressor:   0%|          | 0/5 [00:00<?, ?it/s]

Generating Ensemble:   0%|          | 0/5 [00:00<?, ?it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 28.14it/s]
training: 100%|██████████| 1200/1200 [00:00<00:00, 9848.73it/s] 


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.03706800603393301, 'MAE': 2.025, 'MSE': 114.725, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_SGDClassifier_SGDRegressor', 'task_type': 'hurdle', 'iter': 0, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.34221601486206055, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 27.43it/s]
training: 100%|██████████| 1168/1168 [00:00<00:00, 9569.77it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06496439224344774, 'MAE': 1.705, 'MSE': 47.655, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_SGDClassifier_SGDRegressor', 'task_type': 'hurdle', 'iter': 1, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.34859704971313477, 'predicting_time': 0.017216205596923828}


Generating Ensemble:  20%|██        | 1/5 [00:00<00:00, 13.41it/s]
Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 21.10it/s]
training: 100%|██████████| 1214/1214 [00:00<00:00, 9432.07it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.165, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.023765411003576542, 'MAE': 6.64, 'MSE': 1899.29, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_SGDClassifier_SGDRegressor', 'task_type': 'hurdle', 'iter': 2, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.40903663635253906, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 26.56it/s]
training: 100%|██████████| 1136/1136 [00:00<00:00, 9444.87it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.155, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.02458410258839927, 'MAE': 4.105, 'MSE': 702.295, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_SGDClassifier_SGDRegressor', 'task_type': 'hurdle', 'iter': 3, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.349153995513916, 'predicting_time': 0.017216205596923828}


Generating Ensemble: 100%|██████████| 5/5 [00:00<00:00, 18.06it/s]
training: 100%|██████████| 1243/1243 [00:00<00:00, 9330.50it/s]


{'AUC': 0.5, 'kappa': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0, 'average_precision': 0.205, 'Spearman_r': nan, 'Pearson_r': nan, 'R2': -0.06920289560797221, 'MAE': 4.23, 'MSE': 276.45, 'poisson_deviance_explained': nan, 'model': 'AdaSTEM_Hurdle_SGDClassifier_SGDRegressor', 'task_type': 'hurdle', 'iter': 4, 'sp': 'Mallard', 'sample_size': 1000, 'training_time': 0.4523739814758301, 'predicting_time': 0.017216205596923828}


In [18]:
reg_metric_df = pd.DataFrame(reg_metric_df_list)
reg_metric_df.to_csv(os.path.join(OUTPUT_DIR, f'Ada_hurdle_metric_df_SIZE_{SAMPLE_SIZE}_SP_{sp}_year_{year}.csv'),
                                        index=False)


In [19]:
all_metrics = pd.concat([
    pd.read_csv(os.path.join(OUTPUT_DIR, f'cls_metric_df_SIZE_{SAMPLE_SIZE}_SP_{sp}_year_{year}.csv')),
    pd.read_csv(os.path.join(OUTPUT_DIR, f'Ada_cls_metric_df_SIZE_{SAMPLE_SIZE}_SP_{sp}_year_{year}.csv')),
    pd.read_csv(os.path.join(OUTPUT_DIR, f'hurdle_metric_df_SIZE_{SAMPLE_SIZE}_SP_{sp}_year_{year}.csv')),
    pd.read_csv(os.path.join(OUTPUT_DIR, f'Ada_hurdle_metric_df_SIZE_{SAMPLE_SIZE}_SP_{sp}_year_{year}.csv')),
], axis=0)


In [20]:
all_metrics.to_csv(os.path.join(OUTPUT_DIR, f'ALL_metric_df_SIZE_{SAMPLE_SIZE}_SP_{sp}_year_{year}.csv'))

