In [1]:
#ngboost and modelling libraries
from ngboost import NGBClassifier
from ngboost.distns import Bernoulli
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import roc_auc_score, roc_curve, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
from collections import Counter

#data manipulation libraries
import pandas as pd
import numpy as np

from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

#ignore warnings
import warnings
warnings.filterwarnings('ignore')

#import functions and constants
from utils import (register_imputation, preprocess_df, 
                    scaling_values_df, clean_outliers, get_sample_weights, 
                  process_unit_cost, cost_score, generate_y_pred_with_custom_threshold, 
                  calculate_cost_score_with_learning, calculate_learning_cost, process_learning_unit_cost,)
from plot_utils import (correlation_heatmap, visualize_permutation_feature_importances, 
                        plot_target_balance, plot_trace_line, plot_scatter_matrix, 
                        plot_feature_importances, visualize_roc_curve, color_negative_red, )

from constants import cols, cols_with_missing_indicators

In [2]:
SEED=2020

## READING DATAFRAME

In [3]:
df = pd.read_csv("./data/cs-training.csv", usecols =cols)

In [4]:
df.head()

Unnamed: 0,SeriousDlqin2yrs,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents
0,1,0.766127,45,2,0.802982,9120.0,13,0,6,0,2.0
1,0,0.957151,40,0,0.121876,2600.0,4,0,0,0,1.0
2,0,0.65818,38,1,0.085113,3042.0,2,1,0,0,0.0
3,0,0.23381,30,0,0.03605,3300.0,5,0,0,0,0.0
4,0,0.907239,49,1,0.024926,63588.0,7,0,1,0,0.0


In [5]:
len(df)

150000

## FEATURE ENGINEERING

In [6]:

def preprocess_extra(df):
    """
    Extra cleanup process of some outliers.
    """
    df.loc[
        (df["RevolvingUtilizationOfUnsecuredLines"] > 1),
        "RevolvingUtilizationOfUnsecuredLines",
    ] = 0
    df.loc[(df["DebtRatio"] > 10), "DebtRatio"] = 0
    df.loc[(df["age"] < 18), "age"] = 0

In [7]:
from sklearn.neighbors import LocalOutlierFactor
def clean_outliers(df, flag_filter=False):
    """ 
    Register imputations, identify outliers with LOF and clean them. Also it process an extra function to clean some outliers.

    Args:
        - df (DataFrame Object): dataframe to be processed
        - flag_filter (boolean): Flag that indicates if the process requres an extra cleanup of outliers.

    Returns dataframe without outliers
    """
    #df = preprocess_df(df.copy())
    df = register_imputation(df.copy())
    local_outlier_factor = LocalOutlierFactor(contamination=0.1)
    is_outlier = local_outlier_factor.fit_predict(df[cols[1:]]) == -1
    data_outlier_excluded = df.loc[~is_outlier, :]
    if flag_filter:
        preprocess_extra(data_outlier_excluded)
    return data_outlier_excluded

In [8]:
df = clean_outliers(df.copy(), True)

In [9]:
cols_f = [c for c in df.columns if 'dummy' not in c.lower()]

df_cleaned=df.copy()[cols_f].reset_index(drop=True)

In [10]:
del df
df = df_cleaned.copy()
df

Unnamed: 0,SeriousDlqin2yrs,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents
0,1,0.766127,45,2,0.802982,9120.0,13,0,6,0,2.0
1,0,0.957151,40,0,0.121876,2600.0,4,0,0,0,1.0
2,0,0.658180,38,1,0.085113,3042.0,2,1,0,0,0.0
3,0,0.233810,30,0,0.036050,3300.0,5,0,0,0,0.0
4,0,0.907239,49,1,0.024926,63588.0,7,0,1,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
134995,0,0.040674,74,0,0.225131,2100.0,4,0,1,0,0.0
134996,0,0.299745,44,0,0.716562,5584.0,4,0,1,0,2.0
134997,0,0.246044,58,0,0.000000,0.0,18,0,1,0,0.0
134998,0,0.000000,30,0,0.000000,5716.0,4,0,0,0,0.0


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 135000 entries, 0 to 134999
Data columns (total 11 columns):
SeriousDlqin2yrs                        135000 non-null int64
RevolvingUtilizationOfUnsecuredLines    135000 non-null float64
age                                     135000 non-null int64
NumberOfTime30-59DaysPastDueNotWorse    135000 non-null int64
DebtRatio                               135000 non-null float64
MonthlyIncome                           135000 non-null float64
NumberOfOpenCreditLinesAndLoans         135000 non-null int64
NumberOfTimes90DaysLate                 135000 non-null int64
NumberRealEstateLoansOrLines            135000 non-null int64
NumberOfTime60-89DaysPastDueNotWorse    135000 non-null int64
NumberOfDependents                      135000 non-null float64
dtypes: float64(4), int64(7)
memory usage: 11.3 MB


In [12]:
print(df.isnull().sum())

SeriousDlqin2yrs                        0
RevolvingUtilizationOfUnsecuredLines    0
age                                     0
NumberOfTime30-59DaysPastDueNotWorse    0
DebtRatio                               0
MonthlyIncome                           0
NumberOfOpenCreditLinesAndLoans         0
NumberOfTimes90DaysLate                 0
NumberRealEstateLoansOrLines            0
NumberOfTime60-89DaysPastDueNotWorse    0
NumberOfDependents                      0
dtype: int64


In [13]:
df.describe()

Unnamed: 0,SeriousDlqin2yrs,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents
count,135000.0,135000.0,135000.0,135000.0,135000.0,135000.0,135000.0,135000.0,135000.0,135000.0,135000.0
mean,0.060726,0.294336,52.143319,0.280696,0.328392,5089.181052,8.098289,0.134,0.996363,0.11717,0.724215
std,0.238828,0.335301,14.336519,2.515978,0.67352,5747.339847,4.526645,2.472258,1.045003,2.459597,1.084068
min,0.0,0.0,21.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.024932,41.0,0.0,0.003842,1251.75,5.0,0.0,0.0,0.0,0.0
50%,0.0,0.132987,52.0,0.0,0.214178,4250.0,7.0,0.0,1.0,0.0,0.0
75%,0.0,0.499763,62.0,0.0,0.412272,7200.0,11.0,0.0,2.0,0.0,1.0
max,1.0,1.0,99.0,98.0,10.0,151855.0,57.0,98.0,32.0,98.0,10.0


## NGBOOST MODEL

In [14]:
import random
random.seed(2019)
list_of_ids_train=random.sample(range(0,len(df)), int(len(df) * 0.2))

In [15]:
df['NumberRealEstateLoansOrLines'] = df['NumberRealEstateLoansOrLines'].astype(float)
df['NumberOfOpenCreditLinesAndLoans'] = df['NumberOfOpenCreditLinesAndLoans'].astype(float)

In [16]:
for idx in list_of_ids_train:
    for col in ['MonthlyIncome', 'NumberRealEstateLoansOrLines', 'NumberOfOpenCreditLinesAndLoans']: #en el segundo lugar estaba dummy_MonthlyIncome
        df.at[idx, col] = np.nan

In [17]:
def register_imputation(df):
    """
    Register imputations of certain df
    Args:
        - df (DataFrame): Dataframe to be computed
    Return df with filled values and booleans that indicate if each row was changed
    """
    for c in cols[1:]:
        # df[f"{c}_amputado"] = df[f"{c}"].fillna(0)
        df[f"{c}_imputed"] = df[f"{c}"].fillna(0)
        df[f"dummy_{c}"] = (df[f"{c}"] != df[f"{c}_imputed"]).astype(int)
        del df[f"{c}"]
        df.columns = df.columns.str.replace(f"{c}_imputed", f"{c}")
    return df


In [18]:
df = register_imputation(df.copy())

In [20]:
df.isnull().any().any()

False

In [20]:
X = df.drop(columns=['SeriousDlqin2yrs']).values
y = df.SeriousDlqin2yrs.values

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
df

Unnamed: 0,SeriousDlqin2yrs,RevolvingUtilizationOfUnsecuredLines,dummy_RevolvingUtilizationOfUnsecuredLines,age,dummy_age,NumberOfTime30-59DaysPastDueNotWorse,dummy_NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,dummy_DebtRatio,MonthlyIncome,...,NumberOfOpenCreditLinesAndLoans,dummy_NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,dummy_NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,dummy_NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,dummy_NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents,dummy_NumberOfDependents
0,1,0.766127,0,45,0,2,0,0.802982,0,9120.0,...,13.0,0,0,0,6.0,0,0,0,2.0,0
1,0,0.957151,0,40,0,0,0,0.121876,0,2600.0,...,4.0,0,0,0,0.0,0,0,0,1.0,0
2,0,0.658180,0,38,0,1,0,0.085113,0,3042.0,...,2.0,0,1,0,0.0,0,0,0,0.0,0
3,0,0.233810,0,30,0,0,0,0.036050,0,3300.0,...,5.0,0,0,0,0.0,0,0,0,0.0,0
4,0,0.907239,0,49,0,1,0,0.024926,0,63588.0,...,7.0,0,0,0,1.0,0,0,0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
134995,0,0.040674,0,74,0,0,0,0.225131,0,0.0,...,0.0,1,0,0,0.0,1,0,0,0.0,0
134996,0,0.299745,0,44,0,0,0,0.716562,0,5584.0,...,4.0,0,0,0,1.0,0,0,0,2.0,0
134997,0,0.246044,0,58,0,0,0,0.000000,0,0.0,...,18.0,0,0,0,1.0,0,0,0,0.0,0
134998,0,0.000000,0,30,0,0,0,0.000000,0,5716.0,...,4.0,0,0,0,0.0,0,0,0,0.0,0


In [23]:
nm_common =SMOTE(random_state=2019)
X_train_resampled, y_train_resampled = nm_common.fit_sample(X_train, y_train)
print("Resampled dataset shape {}".format(Counter(y_train_resampled)))

Resampled dataset shape Counter({0: 101492, 1: 101492})


In [24]:
#base0 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=4)
from utils import check_counts
base1 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=4, random_state = SEED)
base2 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=8, random_state = SEED)
base3 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=10, random_state = SEED)
base4 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=12, random_state = SEED)

In [26]:
#FIX MEEEEEEEEEEE: REFACTOR PLZZ !!! second model learning_cost
df_collector = pd.DataFrame(columns=  ["hyperparams", "estimators", 
                                       "learning_rate", "max_depth", "threshold", "count_zero", "count_one",
                                       "cost", 
                                       "train_auc", "test_auc"])
df_collector['hyperparams'] = df_collector['hyperparams'].astype('object')
k = 1131
for estimator in [150, 200, 250,300,350]:
    for lr in [0.01]:
        for baset in [base1,base2,base3,base4]:
            ngb_clf = NGBClassifier(Dist=Bernoulli,
                                      verbose=True, Base=baset, n_estimators = estimator, random_state=SEED,
                                    learning_rate = lr, verbose_eval = 0
                                     )
            print(ngb_clf)
            ngb_clf.fit(X_train_resampled, y_train_resampled, sample_weight= get_sample_weights(y_train, y_train_resampled))
            #en el primer iteraba el threashold desde 0.1 a 0.9 obviamente cuanto mas menor me daba peor.. entonces itero
            #solamente d esde 0.70 a 0.95
            for threshold in list(np.arange(0.20, 0.35, 0.05)):
                count_zero, count_one = check_counts(ngb_clf, X_test, threshold)
                if (count_zero < 3000):
                    pass
                else:
                    if True:
                        df_aux = pd.DataFrame(X_test, columns=cols_with_missing_indicators)
                        df_aux['predicted'] = generate_y_pred_with_custom_threshold(ngb_clf, X_test, threshold)
                        df_aux['real'] = list(y_test)
                        df_aux['LoanPrincipal'] = df_aux.MonthlyIncome * 2
                        cost = cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)
                        if True:
                            df_collector.ix[str(k),'hyperparams'] = ngb_clf
                            df_collector.ix[str(k),'estimators'] = estimator
                            df_collector.ix[str(k),'learning_rate'] = lr
                            df_collector.ix[str(k),'max_depth'] = baset.max_depth
                            threshold = round(threshold,2)
                            print("k: "+str(k))
                            print(threshold)
                            print("cost "+str(cost))
                            print("th "+str(threshold))
                            df_collector.ix[str(k),'threshold'] = threshold
                            count_zero, count_one = check_counts(ngb_clf, X_test, threshold)
                            df_collector.ix[str(k),'count_zero'] = count_zero
                            df_collector.ix[str(k),'count_one'] = count_one
                            df_collector.ix[str(k),'cost'] = cost
                            # predict probabilities
                            train_probs = ngb_clf.predict_proba(X_train)
                            test_probs = ngb_clf.predict_proba(X_test)
                            # keep probabilities for the positive outcome only
                            train_probs = train_probs[:, 1]
                            test_probs = test_probs[:, 1]
                            # calculate scores
                            train_auc = roc_auc_score(y_train, train_probs)
                            test_auc = roc_auc_score(y_test, test_probs)
                            df_collector.ix[str(k),'train_auc'] = train_auc
                            df_collector.ix[str(k),'test_auc'] = test_auc
                            k+=1
                            print("sumando k")
                            print("---------------------------------")
            del ngb_clf
df_collector['diff_auc'] = df_collector['train_auc'] - df_collector['test_auc']

NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=4,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=2020, splitter='best'),
              Dist=<class 'ngboost.distns.categorical.k_categorical.<locals>.Categorical'>,
              Score=<class 'ngboost.scores.LogScore'>, col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=150,
              natural_gradient=True,
              random_state=RandomState(MT1

count_zero 7656
count_one 19344
k: 1143
0.2
cost 2021797.9199999913
th 0.2
sumando k
---------------------------------
count_zero 8924
count_one 18076
k: 1144
0.25
cost 2130654.1399999936
th 0.25
sumando k
---------------------------------
count_zero 10105
count_one 16895
k: 1145
0.3
cost 2351377.619999994
th 0.3
sumando k
---------------------------------
NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=12,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                

NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=8,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=2020, splitter='best'),
              Dist=<class 'ngboost.distns.categorical.k_categorical.<locals>.Categorical'>,
              Score=<class 'ngboost.scores.LogScore'>, col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=300,
              natural_gradient=True,
              random_state=RandomState(MT1

k: 1172
0.3
cost 2390016.859999995
th 0.3
sumando k
---------------------------------
NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=12,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=2020, splitter='best'),
              Dist=<class 'ngboost.distns.categorical.k_categorical.<locals>.Categorical'>,
              Score=<class 'ngboost.scores.LogScore'>, col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimato

In [29]:
df_results = df_collector.reset_index(drop=True)

In [31]:
df_results.sort_values("cost").to_csv("resultados_primera_parte_segundo_exp_a_rellenar.csv", index=False)

In [24]:
from plot_utils import visualize_roc_curve
visualize_roc_curve(ngb_clf, X_train, y_train, X_test, y_test)

TRAIN: ROC AUC=0.837
TEST: ROC AUC=0.815


In [25]:
df_aux = pd.DataFrame(X_test, columns=cols_with_missing_indicators)
df_aux['predicted'] = generate_y_pred_with_custom_threshold(ngb_clf, X_test, 0.2)
df_aux['real'] = list(y_test)
df_aux['LoanPrincipal'] = df_aux.MonthlyIncome * 2
cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)

count_zero 5108
count_one 21892


1858635.87999999

In [26]:
ngb_clf = NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=8,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=2019, splitter='best'),
             col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=350,
              natural_gradient=True, tol=0.0001,
              verbose=True, verbose_eval=0)
ngb_clf.fit(X_train_resampled, y_train_resampled, sample_weight= get_sample_weights(y_train, y_train_resampled))

NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=8,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=2019, splitter='best'),
              Dist=<class 'ngboost.distns.categorical.k_categorical.<locals>.Categorical'>,
              Score=<class 'ngboost.scores.LogScore'>, col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=350,
              natural_gradient=True,
              random_state=RandomState(MT1

In [None]:
from plot_utils import visualize_roc_curve
visualize_roc_curve(ngb_clf, X_train, y_train, X_test, y_test)

In [None]:
df_aux = pd.DataFrame(X_test, columns=cols_with_missing_indicators)
df_aux['predicted'] = generate_y_pred_with_custom_threshold(ngb_clf, X_test, 0.2)
df_aux['real'] = list(y_test)
df_aux['LoanPrincipal'] = df_aux.MonthlyIncome * 2
cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)

In [203]:
ngb_clf = NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=8,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=None, splitter='best'),
             col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=100,
              natural_gradient=True, tol=0.0001,
              verbose=True, verbose_eval=0)
ngb_clf.fit(X_train_resampled, y_train_resampled, sample_weight= get_sample_weights(y_train, y_train_resampled))

NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=8,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=None, splitter='best'),
              Dist=<class 'ngboost.distns.categorical.k_categorical.<locals>.Categorical'>,
              Score=<class 'ngboost.scores.LogScore'>, col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=100,
              natural_gradient=True,
              random_state=RandomState(MT1

In [204]:
from plot_utils import visualize_roc_curve
visualize_roc_curve(ngb_clf, df_train.values, y_train, df_test.values, y_test)

TRAIN: ROC AUC=0.835
TEST: ROC AUC=0.814


In [205]:
df_aux = pd.DataFrame(df_test.values, columns=cols_with_missing_indicators)
df_aux['predicted'] = generate_y_pred_with_custom_threshold(ngb_clf, df_test.values, 0.25)
df_aux['real'] = list(y_test)
df_aux['LoanPrincipal'] = df_aux.MonthlyIncome * 2
cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)

count_zero 4921
count_one 22079


1917134.7799999865

In [206]:
#FIX MEEEEEEEEEEE: REFACTOR PLZZ !!!
df_collector = pd.DataFrame(columns=  ["threshold", "cost", "train_auc", "test_auc"])
k = 0
for threshold in list(np.arange(0.05, 1, 0.05)):
    threshold = round(threshold,2)
    print("k: "+str(k))
    df_aux = pd.DataFrame(df_test.values, columns=cols_with_missing_indicators)
    df_aux['predicted'] = generate_y_pred_with_custom_threshold(ngb_clf, X_test, threshold)
    df_aux['real'] = list(y_test)
    df_aux['LoanPrincipal'] = df_aux.MonthlyIncome * 2
    print(threshold)
    print("cost "+str(cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)))
    print("th "+str(threshold))
    df_collector.ix[str(k),'threshold'] = threshold
    df_collector.ix[str(k),'cost'] = cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)
    k+=1
    print("sumando k")
    print("---------------------------------")


k: 0
count_zero 6
count_one 26994
0.05
cost 1975990.1999999885
th 0.05
sumando k
---------------------------------
k: 1
count_zero 494
count_one 26506
0.1
cost 1934855.0799999884
th 0.1
sumando k
---------------------------------
k: 2
count_zero 1914
count_one 25086
0.15
cost 1907305.519999986
th 0.15
sumando k
---------------------------------
k: 3
count_zero 3424
count_one 23576
0.2
cost 1909677.5399999868
th 0.2
sumando k
---------------------------------
k: 4
count_zero 4941
count_one 22059
0.25
cost 1909137.7399999858
th 0.25
sumando k
---------------------------------
k: 5
count_zero 6172
count_one 20828
0.3
cost 1983154.459999987
th 0.3
sumando k
---------------------------------
k: 6
count_zero 7476
count_one 19524
0.35
cost 2026049.3999999883
th 0.35
sumando k
---------------------------------
k: 7
count_zero 8894
count_one 18106
0.4
cost 2112190.67999999
th 0.4
sumando k
---------------------------------
k: 8
count_zero 9862
count_one 17138
0.45
cost 2141857.899999995
th 0.45

In [209]:

import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
def plot_trace_line_cost(df):
    """
    Plot lines with 'age' column as x axis and custom column (to compute sum/avg).
    
    Args:
        - df (pd.DataFrame): DataFrame to be represented
    """

    # Create and style traces
    trace0 = go.Scatter(
        x=df.threshold,
        y=df.cost,
        name="Responsible (target=0)",
        line=dict(color="rgb(86,157, 242)", width=4),
    )
    data = [trace0]

    # Edit the layout
    layout = dict(
        title="Evolution of cost according to thresholds",
        xaxis=dict(title="threshold"),
        yaxis=dict(title="cost")
    )

    fig = dict(data=data, layout=layout)
    iplot(fig)

In [210]:
plot_trace_line_cost(df_collector)

In [74]:
lista_probas = []
idx_p = 1
for p in list(ngb_clf.predict_proba(X_test)):
    lista_probas.append(p[idx_p])

In [76]:
len(lista_probas)

43310

In [142]:
pd.DataFrame(X_test)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.524218,71.0,0.0,0.489049,4565.0,5.0,0.0,2.0,0.0,0.0
1,0.015158,67.0,0.0,0.003222,9000.0,6.0,0.0,0.0,0.0,1.0
2,0.004222,42.0,0.0,0.380055,11250.0,6.0,0.0,2.0,0.0,2.0
3,0.950403,45.0,0.0,0.468553,10000.0,8.0,0.0,3.0,0.0,2.0
4,0.031566,66.0,0.0,0.000000,0.0,9.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
26995,0.546383,30.0,0.0,0.829894,4167.0,6.0,0.0,2.0,0.0,2.0
26996,0.014533,57.0,0.0,0.008213,2556.0,3.0,0.0,0.0,0.0,0.0
26997,0.206702,41.0,0.0,0.307175,8333.0,6.0,0.0,1.0,0.0,2.0
26998,0.054332,40.0,0.0,0.248834,,,0.0,,0.0,2.0


In [211]:
base0 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=4)
base1 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=6)
base2 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=8)
base3 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=12)

In [None]:
def generate_y_pred_with_custom_threshold(model, x_data, threshold):
    """
    Generates new y_predictions according to a threshold.
    
    Args:
        - model (NGBoost model): NGBoost model that was trained.
        - x_data (np.ndarray): Data on which we predict probabilities
        - threshold (float): Float value to determine 1 or 0 for new predictions
    
    Returns updated y_predictions
    """
    y_predictions = model.predict_proba(x_data)
    y_pred = []
    count_zero = 0
    count_one = 0
    for i in range(len(list(y_predictions))):
        if y_predictions[i][1] > threshold:
            y_pred.append(0)
            count_zero += 1
        else:
            y_pred.append(1)
            count_one += 1
    print("count_zero " + str(count_zero))
    print("count_one " + str(count_one))
    return y_pred

In [None]:
PRIMER EXPERIMENTO
THRESHOLD ERA PARA Y = 0 Y DIO UN VALOR DE 0.75 (P (X | Y) = 0)
NO TOMAMOS PROBAS PREDICHAS

In [None]:
SEGUNDO EXPERIMENTO
THRESHOLD ERA PARA Y = 0 Y DIO VALORES 0.8, 0.85
PROBAS PREDICHAS POR NGBOOST PARA Y = 1

In [None]:
SEGUNDO EXPERIMENTO
THRESHOLD Y = 0 O Y = 1??? POR QUE DEBERIA DAR MAYOR SI TOMO Y = 1, SI EN EL ANTEIROR EXPERIMENTO TOME Y=0.
RTA = Y = 1 (DELINQUIR, NO ASIGNO) E ITERO VALORES 0.7 A 0.9

In [None]:
THRESHOLD Y = 1 || 0.7 Y 0.9

In [None]:
#THRESHOLD: CONSIDERO YHAT = 0. Y TOMO VALORES DESDE 0.7 A 0.9. -- PROBAS POR THRESHOLD (CADA ROW, ME DA EL VALOR DE Y)
#PROBAS PREDICHAS POR NGBOOST PARA Y = 1. (ESTO ENTRA EN LA FORMULA DE UNA, PARAMETRO)

In [212]:
from utils import check_counts
count_zero, count_one = check_counts(ngb_clf, X_test, threshold)

In [213]:
count_zero

6

In [214]:
count_one

26994

In [217]:
#FIX MEEEEEEEEEEE: REFACTOR PLZZ !!!
df_collector = pd.DataFrame(columns=  ["alpha","hyperparams", "estimators", 
                                       "learning_rate", "max_depth", "threshold", "count_zero", "count_one",
                                       "cost", 
                                       "learning_cost",
                                       "train_auc", "test_auc"])
df_collector['hyperparams'] = df_collector['hyperparams'].astype('object')
k = 0
for estimator in [50, 75, 100, 120, 150, 200]:
    for lr in [0.01]:
        for baset in [base0,base1,base2,base3]:
            ngb_clf = NGBClassifier(Dist=Bernoulli,
                                      verbose=True, Base=baset, n_estimators = estimator, 
                                    learning_rate = lr, verbose_eval = 0
                                     )
            print(ngb_clf)
            ngb_clf.fit(X_train_resampled, y_train_resampled, sample_weight= get_sample_weights(y_train, y_train_resampled))
            index_th = 0
            #en el primer iteraba el threashold desde 0.1 a 0.9 obviamente cuanto mas menor me daba peor.. entonces itero
            #solamente d esde 0.70 a 0.95
            for kix in range(0,50):
                alpha = random.randint(10000, 180000)
                for threshold in list(np.arange(0.15, 0.4, 0.05)):
                    #alpha = random.randint(10000, 700000)
                    df_collector.ix[str(k),'alpha'] = alpha
                    df_collector.ix[str(k),'hyperparams'] = ngb_clf
                    df_collector.ix[str(k),'estimators'] = estimator
                    df_collector.ix[str(k),'learning_rate'] = lr
                    df_collector.ix[str(k),'max_depth'] = baset
                    threshold = round(threshold,2)
                    print("k: "+str(k))
                    df_aux = pd.DataFrame(X_test, columns=cols_with_missing_indicators)
                    df_aux['predicted'] = generate_y_pred_with_custom_threshold(ngb_clf, X_test, threshold)
                    df_aux['proba_predicted'] = [proba[1] for proba in ngb_clf.predict_proba(X_test)]
                    df_aux['real'] = list(y_test)
                    df_aux['LoanPrincipal'] = df_aux.MonthlyIncome * 2
                    print(threshold)
                    print("cost "+str(cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)))
                    print("th "+str(threshold))
                    df_collector.ix[str(k),'threshold'] = threshold
                    count_zero, count_one = check_counts(ngb_clf, X_test, threshold)
                    df_collector.ix[str(k),'count_zero'] = count_zero
                    df_collector.ix[str(k),'count_one'] = count_one
                    df_collector.ix[str(k),'cost'] = cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real) #predicted tomo threshold (p(x|y=0))
                    df_collector.ix[str(k),'learning_cost'] = calculate_learning_cost(df_aux.predicted,df_aux.proba_predicted, alpha) #predicted tomo threshold (p(x|y=0)), probas predichas por el modelo para y = 1 y alpha
                    # predict probabilities
                    train_probs = ngb_clf.predict_proba(X_train)
                    test_probs = ngb_clf.predict_proba(X_test)
                    # keep probabilities for the positive outcome only
                    train_probs = train_probs[:, 1]
                    test_probs = test_probs[:, 1]
                    # calculate scores
                    train_auc = roc_auc_score(y_train, train_probs)
                    test_auc = roc_auc_score(y_test, test_probs)
                    df_collector.ix[str(k),'train_auc'] = train_auc
                    df_collector.ix[str(k),'test_auc'] = test_auc
                    k+=1
                    print("sumando k")
                    print("---------------------------------")
            del ngb_clf
df_collector['final_cost'] = df_collector['cost'] - df_collector['learning_cost']

NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=4,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=None, splitter='best'),
              Dist=<class 'ngboost.distns.categorical.k_categorical.<locals>.Categorical'>,
              Score=<class 'ngboost.scores.LogScore'>, col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=50,
              natural_gradient=True,
              random_state=RandomState(MT19

0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 63
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 64
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 65
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 66
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 67
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 68
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 69
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 70
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
-------------------------------

0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 134
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 135
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 136
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 137
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 138
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 139
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 140
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 141
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
-------------------------

0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 205
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 206
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 207
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 208
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 209
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 210
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 211
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 212
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
-----------------------

sumando k
---------------------------------
k: 266
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 267
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 268
count_zero 332
count_one 26668
0.3
cost 1967078.1799999904
th 0.3
sumando k
---------------------------------
k: 269
count_zero 2383
count_one 24617
0.35
cost 1859867.5799999908
th 0.35
sumando k
---------------------------------
k: 270
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 271
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 272
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 273
count_zero 332
count_one 26668
0.3
cost 1967078.1799999904
th 0.3
sumando k
---------------------------------
k: 274
count_zero 238

sumando k
---------------------------------
k: 336
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 337
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 338
count_zero 332
count_one 26668
0.3
cost 1967078.1799999904
th 0.3
sumando k
---------------------------------
k: 339
count_zero 2383
count_one 24617
0.35
cost 1859867.5799999908
th 0.35
sumando k
---------------------------------
k: 340
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 341
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 342
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 343
count_zero 332
count_one 26668
0.3
cost 1967078.1799999904
th 0.3
sumando k
---------------------------------
k: 344
count_zero 238

sumando k
---------------------------------
k: 406
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 407
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 408
count_zero 332
count_one 26668
0.3
cost 1967078.1799999904
th 0.3
sumando k
---------------------------------
k: 409
count_zero 2383
count_one 24617
0.35
cost 1859867.5799999908
th 0.35
sumando k
---------------------------------
k: 410
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 411
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 412
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 413
count_zero 332
count_one 26668
0.3
cost 1967078.1799999904
th 0.3
sumando k
---------------------------------
k: 414
count_zero 238

sumando k
---------------------------------
k: 476
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 477
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 478
count_zero 332
count_one 26668
0.3
cost 1967078.1799999904
th 0.3
sumando k
---------------------------------
k: 479
count_zero 2383
count_one 24617
0.35
cost 1859867.5799999908
th 0.35
sumando k
---------------------------------
k: 480
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 481
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 482
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 483
count_zero 332
count_one 26668
0.3
cost 1967078.1799999904
th 0.3
sumando k
---------------------------------
k: 484
count_zero 238

0.2
cost 1939660.179999992
th 0.2
sumando k
---------------------------------
k: 537
count_zero 2500
count_one 24500
0.25
cost 1905728.0199999898
th 0.25
sumando k
---------------------------------
k: 538
count_zero 4231
count_one 22769
0.3
cost 1887112.4799999914
th 0.3
sumando k
---------------------------------
k: 539
count_zero 5620
count_one 21380
0.35
cost 1856278.2199999914
th 0.35
sumando k
---------------------------------
k: 540
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 541
count_zero 812
count_one 26188
0.2
cost 1939660.179999992
th 0.2
sumando k
---------------------------------
k: 542
count_zero 2500
count_one 24500
0.25
cost 1905728.0199999898
th 0.25
sumando k
---------------------------------
k: 543
count_zero 4231
count_one 22769
0.3
cost 1887112.4799999914
th 0.3
sumando k
---------------------------------
k: 544
count_zero 5620
count_one 21380
0.35
cost 1856278.2199999914
th 0.35
sumando k
-------

count_zero 812
count_one 26188
0.2
cost 1939660.179999992
th 0.2
sumando k
---------------------------------
k: 607
count_zero 2500
count_one 24500
0.25
cost 1905728.0199999898
th 0.25
sumando k
---------------------------------
k: 608
count_zero 4231
count_one 22769
0.3
cost 1887112.4799999914
th 0.3
sumando k
---------------------------------
k: 609
count_zero 5620
count_one 21380
0.35
cost 1856278.2199999914
th 0.35
sumando k
---------------------------------
k: 610
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 611
count_zero 812
count_one 26188
0.2
cost 1939660.179999992
th 0.2
sumando k
---------------------------------
k: 612
count_zero 2500
count_one 24500
0.25
cost 1905728.0199999898
th 0.25
sumando k
---------------------------------
k: 613
count_zero 4231
count_one 22769
0.3
cost 1887112.4799999914
th 0.3
sumando k
---------------------------------
k: 614
count_zero 5620
count_one 21380
0.35
cost 1856278.21999

sumando k
---------------------------------
k: 676
count_zero 812
count_one 26188
0.2
cost 1939660.179999992
th 0.2
sumando k
---------------------------------
k: 677
count_zero 2500
count_one 24500
0.25
cost 1905728.0199999898
th 0.25
sumando k
---------------------------------
k: 678
count_zero 4231
count_one 22769
0.3
cost 1887112.4799999914
th 0.3
sumando k
---------------------------------
k: 679
count_zero 5620
count_one 21380
0.35
cost 1856278.2199999914
th 0.35
sumando k
---------------------------------
k: 680
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 681
count_zero 812
count_one 26188
0.2
cost 1939660.179999992
th 0.2
sumando k
---------------------------------
k: 682
count_zero 2500
count_one 24500
0.25
cost 1905728.0199999898
th 0.25
sumando k
---------------------------------
k: 683
count_zero 4231
count_one 22769
0.3
cost 1887112.4799999914
th 0.3
sumando k
---------------------------------
k: 684
coun

0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 746
count_zero 812
count_one 26188
0.2
cost 1939660.179999992
th 0.2
sumando k
---------------------------------
k: 747
count_zero 2500
count_one 24500
0.25
cost 1905728.0199999898
th 0.25
sumando k
---------------------------------
k: 748
count_zero 4231
count_one 22769
0.3
cost 1887112.4799999914
th 0.3
sumando k
---------------------------------
k: 749
count_zero 5620
count_one 21380
0.35
cost 1856278.2199999914
th 0.35
sumando k
---------------------------------
NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=12,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                

0.15
cost 2162160.1999999923
th 0.15
sumando k
---------------------------------
k: 806
count_zero 5433
count_one 21567
0.2
cost 2254673.7599999914
th 0.2
sumando k
---------------------------------
k: 807
count_zero 7508
count_one 19492
0.25
cost 2355417.739999995
th 0.25
sumando k
---------------------------------
k: 808
count_zero 9350
count_one 17650
0.3
cost 2617899.719999994
th 0.3
sumando k
---------------------------------
k: 809
count_zero 10988
count_one 16012
0.35
cost 2983891.9999999967
th 0.35
sumando k
---------------------------------
k: 810
count_zero 3463
count_one 23537
0.15
cost 2162160.1999999923
th 0.15
sumando k
---------------------------------
k: 811
count_zero 5433
count_one 21567
0.2
cost 2254673.7599999914
th 0.2
sumando k
---------------------------------
k: 812
count_zero 7508
count_one 19492
0.25
cost 2355417.739999995
th 0.25
sumando k
---------------------------------
k: 813
count_zero 9350
count_one 17650
0.3
cost 2617899.719999994
th 0.3
sumando k
----

0.35
cost 2983891.9999999967
th 0.35
sumando k
---------------------------------
k: 875
count_zero 3463
count_one 23537
0.15
cost 2162160.1999999923
th 0.15
sumando k
---------------------------------
k: 876
count_zero 5433
count_one 21567
0.2
cost 2254673.7599999914
th 0.2
sumando k
---------------------------------
k: 877
count_zero 7508
count_one 19492
0.25
cost 2355417.739999995
th 0.25
sumando k
---------------------------------
k: 878
count_zero 9350
count_one 17650
0.3
cost 2617899.719999994
th 0.3
sumando k
---------------------------------
k: 879
count_zero 10988
count_one 16012
0.35
cost 2983891.9999999967
th 0.35
sumando k
---------------------------------
k: 880
count_zero 3463
count_one 23537
0.15
cost 2162160.1999999923
th 0.15
sumando k
---------------------------------
k: 881
count_zero 5433
count_one 21567
0.2
cost 2254673.7599999914
th 0.2
sumando k
---------------------------------
k: 882
count_zero 7508
count_one 19492
0.25
cost 2355417.739999995
th 0.25
sumando k
-

0.3
cost 2617899.719999994
th 0.3
sumando k
---------------------------------
k: 944
count_zero 10988
count_one 16012
0.35
cost 2983891.9999999967
th 0.35
sumando k
---------------------------------
k: 945
count_zero 3463
count_one 23537
0.15
cost 2162160.1999999923
th 0.15
sumando k
---------------------------------
k: 946
count_zero 5433
count_one 21567
0.2
cost 2254673.7599999914
th 0.2
sumando k
---------------------------------
k: 947
count_zero 7508
count_one 19492
0.25
cost 2355417.739999995
th 0.25
sumando k
---------------------------------
k: 948
count_zero 9350
count_one 17650
0.3
cost 2617899.719999994
th 0.3
sumando k
---------------------------------
k: 949
count_zero 10988
count_one 16012
0.35
cost 2983891.9999999967
th 0.35
sumando k
---------------------------------
k: 950
count_zero 3463
count_one 23537
0.15
cost 2162160.1999999923
th 0.15
sumando k
---------------------------------
k: 951
count_zero 5433
count_one 21567
0.2
cost 2254673.7599999914
th 0.2
sumando k
--

0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 1004
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 1005
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1006
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 1007
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 1008
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 1009
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 1010
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1011
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
-----------------

0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 1074
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 1075
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1076
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 1077
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 1078
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 1079
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 1080
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1081
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
-----------------

0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 1144
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 1145
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1146
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 1147
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 1148
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 1149
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 1150
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1151
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
-----------------

0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 1214
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 1215
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1216
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 1217
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 1218
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 1219
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 1220
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1221
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
-----------------

count_zero 3692
count_one 23308
0.35
cost 1890624.5999999908
th 0.35
sumando k
---------------------------------
k: 1275
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1276
count_zero 7
count_one 26993
0.2
cost 1982169.6999999906
th 0.2
sumando k
---------------------------------
k: 1277
count_zero 625
count_one 26375
0.25
cost 1915569.2199999914
th 0.25
sumando k
---------------------------------
k: 1278
count_zero 2141
count_one 24859
0.3
cost 1861539.9999999912
th 0.3
sumando k
---------------------------------
k: 1279
count_zero 3692
count_one 23308
0.35
cost 1890624.5999999908
th 0.35
sumando k
---------------------------------
k: 1280
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1281
count_zero 7
count_one 26993
0.2
cost 1982169.6999999906
th 0.2
sumando k
---------------------------------
k: 1282
count_zero 625
count_one 26375
0.25
cost 1915569.21

count_zero 2141
count_one 24859
0.3
cost 1861539.9999999912
th 0.3
sumando k
---------------------------------
k: 1344
count_zero 3692
count_one 23308
0.35
cost 1890624.5999999908
th 0.35
sumando k
---------------------------------
k: 1345
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1346
count_zero 7
count_one 26993
0.2
cost 1982169.6999999906
th 0.2
sumando k
---------------------------------
k: 1347
count_zero 625
count_one 26375
0.25
cost 1915569.2199999914
th 0.25
sumando k
---------------------------------
k: 1348
count_zero 2141
count_one 24859
0.3
cost 1861539.9999999912
th 0.3
sumando k
---------------------------------
k: 1349
count_zero 3692
count_one 23308
0.35
cost 1890624.5999999908
th 0.35
sumando k
---------------------------------
k: 1350
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1351
count_zero 7
count_one 26993
0.2
cost 1982169.69

count_zero 625
count_one 26375
0.25
cost 1915569.2199999914
th 0.25
sumando k
---------------------------------
k: 1413
count_zero 2141
count_one 24859
0.3
cost 1861539.9999999912
th 0.3
sumando k
---------------------------------
k: 1414
count_zero 3692
count_one 23308
0.35
cost 1890624.5999999908
th 0.35
sumando k
---------------------------------
k: 1415
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1416
count_zero 7
count_one 26993
0.2
cost 1982169.6999999906
th 0.2
sumando k
---------------------------------
k: 1417
count_zero 625
count_one 26375
0.25
cost 1915569.2199999914
th 0.25
sumando k
---------------------------------
k: 1418
count_zero 2141
count_one 24859
0.3
cost 1861539.9999999912
th 0.3
sumando k
---------------------------------
k: 1419
count_zero 3692
count_one 23308
0.35
cost 1890624.5999999908
th 0.35
sumando k
---------------------------------
k: 1420
count_zero 0
count_one 27000
0.15
cost 1983300

count_zero 7
count_one 26993
0.2
cost 1982169.6999999906
th 0.2
sumando k
---------------------------------
k: 1482
count_zero 625
count_one 26375
0.25
cost 1915569.2199999914
th 0.25
sumando k
---------------------------------
k: 1483
count_zero 2141
count_one 24859
0.3
cost 1861539.9999999912
th 0.3
sumando k
---------------------------------
k: 1484
count_zero 3692
count_one 23308
0.35
cost 1890624.5999999908
th 0.35
sumando k
---------------------------------
k: 1485
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 1486
count_zero 7
count_one 26993
0.2
cost 1982169.6999999906
th 0.2
sumando k
---------------------------------
k: 1487
count_zero 625
count_one 26375
0.25
cost 1915569.2199999914
th 0.25
sumando k
---------------------------------
k: 1488
count_zero 2141
count_one 24859
0.3
cost 1861539.9999999912
th 0.3
sumando k
---------------------------------
k: 1489
count_zero 3692
count_one 23308
0.35
cost 1890624.5

sumando k
---------------------------------
k: 1541
count_zero 2600
count_one 24400
0.2
cost 1865791.2199999914
th 0.2
sumando k
---------------------------------
k: 1542
count_zero 4273
count_one 22727
0.25
cost 1842901.2799999905
th 0.25
sumando k
---------------------------------
k: 1543
count_zero 5593
count_one 21407
0.3
cost 1832616.5399999884
th 0.3
sumando k
---------------------------------
k: 1544
count_zero 6914
count_one 20086
0.35
cost 2013672.4999999905
th 0.35
sumando k
---------------------------------
k: 1545
count_zero 1214
count_one 25786
0.15
cost 1908677.8199999921
th 0.15
sumando k
---------------------------------
k: 1546
count_zero 2600
count_one 24400
0.2
cost 1865791.2199999914
th 0.2
sumando k
---------------------------------
k: 1547
count_zero 4273
count_one 22727
0.25
cost 1842901.2799999905
th 0.25
sumando k
---------------------------------
k: 1548
count_zero 5593
count_one 21407
0.3
cost 1832616.5399999884
th 0.3
sumando k
------------------------------

count_zero 6914
count_one 20086
0.35
cost 2013672.4999999905
th 0.35
sumando k
---------------------------------
k: 1610
count_zero 1214
count_one 25786
0.15
cost 1908677.8199999921
th 0.15
sumando k
---------------------------------
k: 1611
count_zero 2600
count_one 24400
0.2
cost 1865791.2199999914
th 0.2
sumando k
---------------------------------
k: 1612
count_zero 4273
count_one 22727
0.25
cost 1842901.2799999905
th 0.25
sumando k
---------------------------------
k: 1613
count_zero 5593
count_one 21407
0.3
cost 1832616.5399999884
th 0.3
sumando k
---------------------------------
k: 1614
count_zero 6914
count_one 20086
0.35
cost 2013672.4999999905
th 0.35
sumando k
---------------------------------
k: 1615
count_zero 1214
count_one 25786
0.15
cost 1908677.8199999921
th 0.15
sumando k
---------------------------------
k: 1616
count_zero 2600
count_one 24400
0.2
cost 1865791.2199999914
th 0.2
sumando k
---------------------------------
k: 1617
count_zero 4273
count_one 22727
0.25
c

0.25
cost 1842901.2799999905
th 0.25
sumando k
---------------------------------
k: 1678
count_zero 5593
count_one 21407
0.3
cost 1832616.5399999884
th 0.3
sumando k
---------------------------------
k: 1679
count_zero 6914
count_one 20086
0.35
cost 2013672.4999999905
th 0.35
sumando k
---------------------------------
k: 1680
count_zero 1214
count_one 25786
0.15
cost 1908677.8199999921
th 0.15
sumando k
---------------------------------
k: 1681
count_zero 2600
count_one 24400
0.2
cost 1865791.2199999914
th 0.2
sumando k
---------------------------------
k: 1682
count_zero 4273
count_one 22727
0.25
cost 1842901.2799999905
th 0.25
sumando k
---------------------------------
k: 1683
count_zero 5593
count_one 21407
0.3
cost 1832616.5399999884
th 0.3
sumando k
---------------------------------
k: 1684
count_zero 6914
count_one 20086
0.35
cost 2013672.4999999905
th 0.35
sumando k
---------------------------------
k: 1685
count_zero 1214
count_one 25786
0.15
cost 1908677.8199999921
th 0.15
s

sumando k
---------------------------------
k: 1746
count_zero 2600
count_one 24400
0.2
cost 1865791.2199999914
th 0.2
sumando k
---------------------------------
k: 1747
count_zero 4273
count_one 22727
0.25
cost 1842901.2799999905
th 0.25
sumando k
---------------------------------
k: 1748
count_zero 5593
count_one 21407
0.3
cost 1832616.5399999884
th 0.3
sumando k
---------------------------------
k: 1749
count_zero 6914
count_one 20086
0.35
cost 2013672.4999999905
th 0.35
sumando k
---------------------------------
NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=12,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
 

count_zero 6397
count_one 20603
0.15
cost 2134998.5199999884
th 0.15
sumando k
---------------------------------
k: 1806
count_zero 8311
count_one 18689
0.2
cost 2335023.759999994
th 0.2
sumando k
---------------------------------
k: 1807
count_zero 9871
count_one 17129
0.25
cost 2682558.479999994
th 0.25
sumando k
---------------------------------
k: 1808
count_zero 11272
count_one 15728
0.3
cost 2949114.199999998
th 0.3
sumando k
---------------------------------
k: 1809
count_zero 12579
count_one 14421
0.35
cost 3208000.419999995
th 0.35
sumando k
---------------------------------
k: 1810
count_zero 6397
count_one 20603
0.15
cost 2134998.5199999884
th 0.15
sumando k
---------------------------------
k: 1811
count_zero 8311
count_one 18689
0.2
cost 2335023.759999994
th 0.2
sumando k
---------------------------------
k: 1812
count_zero 9871
count_one 17129
0.25
cost 2682558.479999994
th 0.25
sumando k
---------------------------------
k: 1813
count_zero 11272
count_one 15728
0.3
cost 

sumando k
---------------------------------
k: 1874
count_zero 12579
count_one 14421
0.35
cost 3208000.419999995
th 0.35
sumando k
---------------------------------
k: 1875
count_zero 6397
count_one 20603
0.15
cost 2134998.5199999884
th 0.15
sumando k
---------------------------------
k: 1876
count_zero 8311
count_one 18689
0.2
cost 2335023.759999994
th 0.2
sumando k
---------------------------------
k: 1877
count_zero 9871
count_one 17129
0.25
cost 2682558.479999994
th 0.25
sumando k
---------------------------------
k: 1878
count_zero 11272
count_one 15728
0.3
cost 2949114.199999998
th 0.3
sumando k
---------------------------------
k: 1879
count_zero 12579
count_one 14421
0.35
cost 3208000.419999995
th 0.35
sumando k
---------------------------------
k: 1880
count_zero 6397
count_one 20603
0.15
cost 2134998.5199999884
th 0.15
sumando k
---------------------------------
k: 1881
count_zero 8311
count_one 18689
0.2
cost 2335023.759999994
th 0.2
sumando k
-------------------------------

count_zero 9871
count_one 17129
0.25
cost 2682558.479999994
th 0.25
sumando k
---------------------------------
k: 1943
count_zero 11272
count_one 15728
0.3
cost 2949114.199999998
th 0.3
sumando k
---------------------------------
k: 1944
count_zero 12579
count_one 14421
0.35
cost 3208000.419999995
th 0.35
sumando k
---------------------------------
k: 1945
count_zero 6397
count_one 20603
0.15
cost 2134998.5199999884
th 0.15
sumando k
---------------------------------
k: 1946
count_zero 8311
count_one 18689
0.2
cost 2335023.759999994
th 0.2
sumando k
---------------------------------
k: 1947
count_zero 9871
count_one 17129
0.25
cost 2682558.479999994
th 0.25
sumando k
---------------------------------
k: 1948
count_zero 11272
count_one 15728
0.3
cost 2949114.199999998
th 0.3
sumando k
---------------------------------
k: 1949
count_zero 12579
count_one 14421
0.35
cost 3208000.419999995
th 0.35
sumando k
---------------------------------
k: 1950
count_zero 6397
count_one 20603
0.15
cost

sumando k
---------------------------------
k: 2002
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 2003
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 2004
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 2005
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 2006
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 2007
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 2008
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 2009
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 2010
count_zero

sumando k
---------------------------------
k: 2072
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 2073
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 2074
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 2075
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 2076
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 2077
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 2078
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 2079
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 2080
count_zero

sumando k
---------------------------------
k: 2142
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 2143
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 2144
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 2145
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 2146
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 2147
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 2148
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 2149
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 2150
count_zero

sumando k
---------------------------------
k: 2212
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 2213
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 2214
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 2215
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 2216
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 2217
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 2218
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 2219
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 2220
count_zero

count_zero 1602
count_one 25398
0.25
cost 1908487.0799999912
th 0.25
sumando k
---------------------------------
k: 2273
count_zero 2872
count_one 24128
0.3
cost 1891192.4999999907
th 0.3
sumando k
---------------------------------
k: 2274
count_zero 4428
count_one 22572
0.35
cost 1881536.339999992
th 0.35
sumando k
---------------------------------
k: 2275
count_zero 3
count_one 26997
0.15
cost 1982735.5199999907
th 0.15
sumando k
---------------------------------
k: 2276
count_zero 395
count_one 26605
0.2
cost 1931177.9799999925
th 0.2
sumando k
---------------------------------
k: 2277
count_zero 1602
count_one 25398
0.25
cost 1908487.0799999912
th 0.25
sumando k
---------------------------------
k: 2278
count_zero 2872
count_one 24128
0.3
cost 1891192.4999999907
th 0.3
sumando k
---------------------------------
k: 2279
count_zero 4428
count_one 22572
0.35
cost 1881536.339999992
th 0.35
sumando k
---------------------------------
k: 2280
count_zero 3
count_one 26997
0.15
cost 19827

count_zero 395
count_one 26605
0.2
cost 1931177.9799999925
th 0.2
sumando k
---------------------------------
k: 2342
count_zero 1602
count_one 25398
0.25
cost 1908487.0799999912
th 0.25
sumando k
---------------------------------
k: 2343
count_zero 2872
count_one 24128
0.3
cost 1891192.4999999907
th 0.3
sumando k
---------------------------------
k: 2344
count_zero 4428
count_one 22572
0.35
cost 1881536.339999992
th 0.35
sumando k
---------------------------------
k: 2345
count_zero 3
count_one 26997
0.15
cost 1982735.5199999907
th 0.15
sumando k
---------------------------------
k: 2346
count_zero 395
count_one 26605
0.2
cost 1931177.9799999925
th 0.2
sumando k
---------------------------------
k: 2347
count_zero 1602
count_one 25398
0.25
cost 1908487.0799999912
th 0.25
sumando k
---------------------------------
k: 2348
count_zero 2872
count_one 24128
0.3
cost 1891192.4999999907
th 0.3
sumando k
---------------------------------
k: 2349
count_zero 4428
count_one 22572
0.35
cost 1881

count_zero 3
count_one 26997
0.15
cost 1982735.5199999907
th 0.15
sumando k
---------------------------------
k: 2411
count_zero 395
count_one 26605
0.2
cost 1931177.9799999925
th 0.2
sumando k
---------------------------------
k: 2412
count_zero 1602
count_one 25398
0.25
cost 1908487.0799999912
th 0.25
sumando k
---------------------------------
k: 2413
count_zero 2872
count_one 24128
0.3
cost 1891192.4999999907
th 0.3
sumando k
---------------------------------
k: 2414
count_zero 4428
count_one 22572
0.35
cost 1881536.339999992
th 0.35
sumando k
---------------------------------
k: 2415
count_zero 3
count_one 26997
0.15
cost 1982735.5199999907
th 0.15
sumando k
---------------------------------
k: 2416
count_zero 395
count_one 26605
0.2
cost 1931177.9799999925
th 0.2
sumando k
---------------------------------
k: 2417
count_zero 1602
count_one 25398
0.25
cost 1908487.0799999912
th 0.25
sumando k
---------------------------------
k: 2418
count_zero 2872
count_one 24128
0.3
cost 189119

count_zero 4428
count_one 22572
0.35
cost 1881536.339999992
th 0.35
sumando k
---------------------------------
k: 2480
count_zero 3
count_one 26997
0.15
cost 1982735.5199999907
th 0.15
sumando k
---------------------------------
k: 2481
count_zero 395
count_one 26605
0.2
cost 1931177.9799999925
th 0.2
sumando k
---------------------------------
k: 2482
count_zero 1602
count_one 25398
0.25
cost 1908487.0799999912
th 0.25
sumando k
---------------------------------
k: 2483
count_zero 2872
count_one 24128
0.3
cost 1891192.4999999907
th 0.3
sumando k
---------------------------------
k: 2484
count_zero 4428
count_one 22572
0.35
cost 1881536.339999992
th 0.35
sumando k
---------------------------------
k: 2485
count_zero 3
count_one 26997
0.15
cost 1982735.5199999907
th 0.15
sumando k
---------------------------------
k: 2486
count_zero 395
count_one 26605
0.2
cost 1931177.9799999925
th 0.2
sumando k
---------------------------------
k: 2487
count_zero 1602
count_one 25398
0.25
cost 190848

sumando k
---------------------------------
k: 2539
count_zero 7454
count_one 19546
0.35
cost 1946772.4199999922
th 0.35
sumando k
---------------------------------
k: 2540
count_zero 1930
count_one 25070
0.15
cost 1880706.6599999904
th 0.15
sumando k
---------------------------------
k: 2541
count_zero 3440
count_one 23560
0.2
cost 1851614.8799999892
th 0.2
sumando k
---------------------------------
k: 2542
count_zero 4945
count_one 22055
0.25
cost 1824529.6999999883
th 0.25
sumando k
---------------------------------
k: 2543
count_zero 6184
count_one 20816
0.3
cost 1879855.8399999905
th 0.3
sumando k
---------------------------------
k: 2544
count_zero 7454
count_one 19546
0.35
cost 1946772.4199999922
th 0.35
sumando k
---------------------------------
k: 2545
count_zero 1930
count_one 25070
0.15
cost 1880706.6599999904
th 0.15
sumando k
---------------------------------
k: 2546
count_zero 3440
count_one 23560
0.2
cost 1851614.8799999892
th 0.2
sumando k
----------------------------

count_zero 4945
count_one 22055
0.25
cost 1824529.6999999883
th 0.25
sumando k
---------------------------------
k: 2608
count_zero 6184
count_one 20816
0.3
cost 1879855.8399999905
th 0.3
sumando k
---------------------------------
k: 2609
count_zero 7454
count_one 19546
0.35
cost 1946772.4199999922
th 0.35
sumando k
---------------------------------
k: 2610
count_zero 1930
count_one 25070
0.15
cost 1880706.6599999904
th 0.15
sumando k
---------------------------------
k: 2611
count_zero 3440
count_one 23560
0.2
cost 1851614.8799999892
th 0.2
sumando k
---------------------------------
k: 2612
count_zero 4945
count_one 22055
0.25
cost 1824529.6999999883
th 0.25
sumando k
---------------------------------
k: 2613
count_zero 6184
count_one 20816
0.3
cost 1879855.8399999905
th 0.3
sumando k
---------------------------------
k: 2614
count_zero 7454
count_one 19546
0.35
cost 1946772.4199999922
th 0.35
sumando k
---------------------------------
k: 2615
count_zero 1930
count_one 25070
0.15
c

0.15
cost 1880706.6599999904
th 0.15
sumando k
---------------------------------
k: 2676
count_zero 3440
count_one 23560
0.2
cost 1851614.8799999892
th 0.2
sumando k
---------------------------------
k: 2677
count_zero 4945
count_one 22055
0.25
cost 1824529.6999999883
th 0.25
sumando k
---------------------------------
k: 2678
count_zero 6184
count_one 20816
0.3
cost 1879855.8399999905
th 0.3
sumando k
---------------------------------
k: 2679
count_zero 7454
count_one 19546
0.35
cost 1946772.4199999922
th 0.35
sumando k
---------------------------------
k: 2680
count_zero 1930
count_one 25070
0.15
cost 1880706.6599999904
th 0.15
sumando k
---------------------------------
k: 2681
count_zero 3440
count_one 23560
0.2
cost 1851614.8799999892
th 0.2
sumando k
---------------------------------
k: 2682
count_zero 4945
count_one 22055
0.25
cost 1824529.6999999883
th 0.25
sumando k
---------------------------------
k: 2683
count_zero 6184
count_one 20816
0.3
cost 1879855.8399999905
th 0.3
sum

sumando k
---------------------------------
k: 2744
count_zero 7454
count_one 19546
0.35
cost 1946772.4199999922
th 0.35
sumando k
---------------------------------
k: 2745
count_zero 1930
count_one 25070
0.15
cost 1880706.6599999904
th 0.15
sumando k
---------------------------------
k: 2746
count_zero 3440
count_one 23560
0.2
cost 1851614.8799999892
th 0.2
sumando k
---------------------------------
k: 2747
count_zero 4945
count_one 22055
0.25
cost 1824529.6999999883
th 0.25
sumando k
---------------------------------
k: 2748
count_zero 6184
count_one 20816
0.3
cost 1879855.8399999905
th 0.3
sumando k
---------------------------------
k: 2749
count_zero 7454
count_one 19546
0.35
cost 1946772.4199999922
th 0.35
sumando k
---------------------------------
NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=12,
                                         max_features=None, max_leaf_nodes=None,
                

count_zero 12247
count_one 14753
0.3
cost 3031278.8599999975
th 0.3
sumando k
---------------------------------
k: 2804
count_zero 13414
count_one 13586
0.35
cost 3317981.9999999944
th 0.35
sumando k
---------------------------------
k: 2805
count_zero 8009
count_one 18991
0.15
cost 2286461.819999993
th 0.15
sumando k
---------------------------------
k: 2806
count_zero 9643
count_one 17357
0.2
cost 2389318.119999998
th 0.2
sumando k
---------------------------------
k: 2807
count_zero 10971
count_one 16029
0.25
cost 2785206.9999999977
th 0.25
sumando k
---------------------------------
k: 2808
count_zero 12247
count_one 14753
0.3
cost 3031278.8599999975
th 0.3
sumando k
---------------------------------
k: 2809
count_zero 13414
count_one 13586
0.35
cost 3317981.9999999944
th 0.35
sumando k
---------------------------------
k: 2810
count_zero 8009
count_one 18991
0.15
cost 2286461.819999993
th 0.15
sumando k
---------------------------------
k: 2811
count_zero 9643
count_one 17357
0.2


0.2
cost 2389318.119999998
th 0.2
sumando k
---------------------------------
k: 2872
count_zero 10971
count_one 16029
0.25
cost 2785206.9999999977
th 0.25
sumando k
---------------------------------
k: 2873
count_zero 12247
count_one 14753
0.3
cost 3031278.8599999975
th 0.3
sumando k
---------------------------------
k: 2874
count_zero 13414
count_one 13586
0.35
cost 3317981.9999999944
th 0.35
sumando k
---------------------------------
k: 2875
count_zero 8009
count_one 18991
0.15
cost 2286461.819999993
th 0.15
sumando k
---------------------------------
k: 2876
count_zero 9643
count_one 17357
0.2
cost 2389318.119999998
th 0.2
sumando k
---------------------------------
k: 2877
count_zero 10971
count_one 16029
0.25
cost 2785206.9999999977
th 0.25
sumando k
---------------------------------
k: 2878
count_zero 12247
count_one 14753
0.3
cost 3031278.8599999975
th 0.3
sumando k
---------------------------------
k: 2879
count_zero 13414
count_one 13586
0.35
cost 3317981.9999999944
th 0.35


sumando k
---------------------------------
k: 2940
count_zero 8009
count_one 18991
0.15
cost 2286461.819999993
th 0.15
sumando k
---------------------------------
k: 2941
count_zero 9643
count_one 17357
0.2
cost 2389318.119999998
th 0.2
sumando k
---------------------------------
k: 2942
count_zero 10971
count_one 16029
0.25
cost 2785206.9999999977
th 0.25
sumando k
---------------------------------
k: 2943
count_zero 12247
count_one 14753
0.3
cost 3031278.8599999975
th 0.3
sumando k
---------------------------------
k: 2944
count_zero 13414
count_one 13586
0.35
cost 3317981.9999999944
th 0.35
sumando k
---------------------------------
k: 2945
count_zero 8009
count_one 18991
0.15
cost 2286461.819999993
th 0.15
sumando k
---------------------------------
k: 2946
count_zero 9643
count_one 17357
0.2
cost 2389318.119999998
th 0.2
sumando k
---------------------------------
k: 2947
count_zero 10971
count_one 16029
0.25
cost 2785206.9999999977
th 0.25
sumando k
----------------------------

k: 3000
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 3001
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 3002
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 3003
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 3004
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 3005
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 3006
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 3007
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 3008
count_zero 0
count_one 27000
0.3
cost 1983300.37999999

count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 3071
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 3072
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 3073
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 3074
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 3075
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 3076
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 3077
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 3078
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.

count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 3141
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 3142
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 3143
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 3144
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 3145
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 3146
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 3147
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 3148
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.

count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 3211
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 3212
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 3213
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 3214
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 3215
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 3216
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 3217
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 3218
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.

sumando k
---------------------------------
k: 3271
count_zero 678
count_one 26322
0.2
cost 1947011.7999999924
th 0.2
sumando k
---------------------------------
k: 3272
count_zero 1972
count_one 25028
0.25
cost 1903410.559999992
th 0.25
sumando k
---------------------------------
k: 3273
count_zero 3277
count_one 23723
0.3
cost 1875668.8799999908
th 0.3
sumando k
---------------------------------
k: 3274
count_zero 4599
count_one 22401
0.35
cost 1873940.519999991
th 0.35
sumando k
---------------------------------
k: 3275
count_zero 83
count_one 26917
0.15
cost 1960961.0199999914
th 0.15
sumando k
---------------------------------
k: 3276
count_zero 678
count_one 26322
0.2
cost 1947011.7999999924
th 0.2
sumando k
---------------------------------
k: 3277
count_zero 1972
count_one 25028
0.25
cost 1903410.559999992
th 0.25
sumando k
---------------------------------
k: 3278
count_zero 3277
count_one 23723
0.3
cost 1875668.8799999908
th 0.3
sumando k
---------------------------------
k: 

cost 1873940.519999991
th 0.35
sumando k
---------------------------------
k: 3340
count_zero 83
count_one 26917
0.15
cost 1960961.0199999914
th 0.15
sumando k
---------------------------------
k: 3341
count_zero 678
count_one 26322
0.2
cost 1947011.7999999924
th 0.2
sumando k
---------------------------------
k: 3342
count_zero 1972
count_one 25028
0.25
cost 1903410.559999992
th 0.25
sumando k
---------------------------------
k: 3343
count_zero 3277
count_one 23723
0.3
cost 1875668.8799999908
th 0.3
sumando k
---------------------------------
k: 3344
count_zero 4599
count_one 22401
0.35
cost 1873940.519999991
th 0.35
sumando k
---------------------------------
k: 3345
count_zero 83
count_one 26917
0.15
cost 1960961.0199999914
th 0.15
sumando k
---------------------------------
k: 3346
count_zero 678
count_one 26322
0.2
cost 1947011.7999999924
th 0.2
sumando k
---------------------------------
k: 3347
count_zero 1972
count_one 25028
0.25
cost 1903410.559999992
th 0.25
sumando k
------

0.3
cost 1875668.8799999908
th 0.3
sumando k
---------------------------------
k: 3409
count_zero 4599
count_one 22401
0.35
cost 1873940.519999991
th 0.35
sumando k
---------------------------------
k: 3410
count_zero 83
count_one 26917
0.15
cost 1960961.0199999914
th 0.15
sumando k
---------------------------------
k: 3411
count_zero 678
count_one 26322
0.2
cost 1947011.7999999924
th 0.2
sumando k
---------------------------------
k: 3412
count_zero 1972
count_one 25028
0.25
cost 1903410.559999992
th 0.25
sumando k
---------------------------------
k: 3413
count_zero 3277
count_one 23723
0.3
cost 1875668.8799999908
th 0.3
sumando k
---------------------------------
k: 3414
count_zero 4599
count_one 22401
0.35
cost 1873940.519999991
th 0.35
sumando k
---------------------------------
k: 3415
count_zero 83
count_one 26917
0.15
cost 1960961.0199999914
th 0.15
sumando k
---------------------------------
k: 3416
count_zero 678
count_one 26322
0.2
cost 1947011.7999999924
th 0.2
sumando k
--

0.25
cost 1903410.559999992
th 0.25
sumando k
---------------------------------
k: 3478
count_zero 3277
count_one 23723
0.3
cost 1875668.8799999908
th 0.3
sumando k
---------------------------------
k: 3479
count_zero 4599
count_one 22401
0.35
cost 1873940.519999991
th 0.35
sumando k
---------------------------------
k: 3480
count_zero 83
count_one 26917
0.15
cost 1960961.0199999914
th 0.15
sumando k
---------------------------------
k: 3481
count_zero 678
count_one 26322
0.2
cost 1947011.7999999924
th 0.2
sumando k
---------------------------------
k: 3482
count_zero 1972
count_one 25028
0.25
cost 1903410.559999992
th 0.25
sumando k
---------------------------------
k: 3483
count_zero 3277
count_one 23723
0.3
cost 1875668.8799999908
th 0.3
sumando k
---------------------------------
k: 3484
count_zero 4599
count_one 22401
0.35
cost 1873940.519999991
th 0.35
sumando k
---------------------------------
k: 3485
count_zero 83
count_one 26917
0.15
cost 1960961.0199999914
th 0.15
sumando k


count_zero 5299
count_one 21701
0.25
cost 1827288.2199999886
th 0.25
sumando k
---------------------------------
k: 3538
count_zero 6482
count_one 20518
0.3
cost 1872842.0399999912
th 0.3
sumando k
---------------------------------
k: 3539
count_zero 7783
count_one 19217
0.35
cost 1959668.5199999907
th 0.35
sumando k
---------------------------------
k: 3540
count_zero 2249
count_one 24751
0.15
cost 1866332.439999989
th 0.15
sumando k
---------------------------------
k: 3541
count_zero 3846
count_one 23154
0.2
cost 1844585.0199999888
th 0.2
sumando k
---------------------------------
k: 3542
count_zero 5299
count_one 21701
0.25
cost 1827288.2199999886
th 0.25
sumando k
---------------------------------
k: 3543
count_zero 6482
count_one 20518
0.3
cost 1872842.0399999912
th 0.3
sumando k
---------------------------------
k: 3544
count_zero 7783
count_one 19217
0.35
cost 1959668.5199999907
th 0.35
sumando k
---------------------------------
k: 3545
count_zero 2249
count_one 24751
0.15
co

0.15
cost 1866332.439999989
th 0.15
sumando k
---------------------------------
k: 3606
count_zero 3846
count_one 23154
0.2
cost 1844585.0199999888
th 0.2
sumando k
---------------------------------
k: 3607
count_zero 5299
count_one 21701
0.25
cost 1827288.2199999886
th 0.25
sumando k
---------------------------------
k: 3608
count_zero 6482
count_one 20518
0.3
cost 1872842.0399999912
th 0.3
sumando k
---------------------------------
k: 3609
count_zero 7783
count_one 19217
0.35
cost 1959668.5199999907
th 0.35
sumando k
---------------------------------
k: 3610
count_zero 2249
count_one 24751
0.15
cost 1866332.439999989
th 0.15
sumando k
---------------------------------
k: 3611
count_zero 3846
count_one 23154
0.2
cost 1844585.0199999888
th 0.2
sumando k
---------------------------------
k: 3612
count_zero 5299
count_one 21701
0.25
cost 1827288.2199999886
th 0.25
sumando k
---------------------------------
k: 3613
count_zero 6482
count_one 20518
0.3
cost 1872842.0399999912
th 0.3
suman

sumando k
---------------------------------
k: 3674
count_zero 7783
count_one 19217
0.35
cost 1959668.5199999907
th 0.35
sumando k
---------------------------------
k: 3675
count_zero 2249
count_one 24751
0.15
cost 1866332.439999989
th 0.15
sumando k
---------------------------------
k: 3676
count_zero 3846
count_one 23154
0.2
cost 1844585.0199999888
th 0.2
sumando k
---------------------------------
k: 3677
count_zero 5299
count_one 21701
0.25
cost 1827288.2199999886
th 0.25
sumando k
---------------------------------
k: 3678
count_zero 6482
count_one 20518
0.3
cost 1872842.0399999912
th 0.3
sumando k
---------------------------------
k: 3679
count_zero 7783
count_one 19217
0.35
cost 1959668.5199999907
th 0.35
sumando k
---------------------------------
k: 3680
count_zero 2249
count_one 24751
0.15
cost 1866332.439999989
th 0.15
sumando k
---------------------------------
k: 3681
count_zero 3846
count_one 23154
0.2
cost 1844585.0199999888
th 0.2
sumando k
------------------------------

count_zero 5299
count_one 21701
0.25
cost 1827288.2199999886
th 0.25
sumando k
---------------------------------
k: 3743
count_zero 6482
count_one 20518
0.3
cost 1872842.0399999912
th 0.3
sumando k
---------------------------------
k: 3744
count_zero 7783
count_one 19217
0.35
cost 1959668.5199999907
th 0.35
sumando k
---------------------------------
k: 3745
count_zero 2249
count_one 24751
0.15
cost 1866332.439999989
th 0.15
sumando k
---------------------------------
k: 3746
count_zero 3846
count_one 23154
0.2
cost 1844585.0199999888
th 0.2
sumando k
---------------------------------
k: 3747
count_zero 5299
count_one 21701
0.25
cost 1827288.2199999886
th 0.25
sumando k
---------------------------------
k: 3748
count_zero 6482
count_one 20518
0.3
cost 1872842.0399999912
th 0.3
sumando k
---------------------------------
k: 3749
count_zero 7783
count_one 19217
0.35
cost 1959668.5199999907
th 0.35
sumando k
---------------------------------
NGBClassifier(Base=DecisionTreeRegressor(ccp_al

count_zero 10119
count_one 16881
0.2
cost 2680322.8799999985
th 0.2
sumando k
---------------------------------
k: 3802
count_zero 11376
count_one 15624
0.25
cost 2898787.3600000003
th 0.25
sumando k
---------------------------------
k: 3803
count_zero 12580
count_one 14420
0.3
cost 3205672.759999999
th 0.3
sumando k
---------------------------------
k: 3804
count_zero 13750
count_one 13250
0.35
cost 3462989.9599999944
th 0.35
sumando k
---------------------------------
k: 3805
count_zero 8596
count_one 18404
0.15
cost 2316587.019999993
th 0.15
sumando k
---------------------------------
k: 3806
count_zero 10119
count_one 16881
0.2
cost 2680322.8799999985
th 0.2
sumando k
---------------------------------
k: 3807
count_zero 11376
count_one 15624
0.25
cost 2898787.3600000003
th 0.25
sumando k
---------------------------------
k: 3808
count_zero 12580
count_one 14420
0.3
cost 3205672.759999999
th 0.3
sumando k
---------------------------------
k: 3809
count_zero 13750
count_one 13250
0.3

count_zero 13750
count_one 13250
0.35
cost 3462989.9599999944
th 0.35
sumando k
---------------------------------
k: 3870
count_zero 8596
count_one 18404
0.15
cost 2316587.019999993
th 0.15
sumando k
---------------------------------
k: 3871
count_zero 10119
count_one 16881
0.2
cost 2680322.8799999985
th 0.2
sumando k
---------------------------------
k: 3872
count_zero 11376
count_one 15624
0.25
cost 2898787.3600000003
th 0.25
sumando k
---------------------------------
k: 3873
count_zero 12580
count_one 14420
0.3
cost 3205672.759999999
th 0.3
sumando k
---------------------------------
k: 3874
count_zero 13750
count_one 13250
0.35
cost 3462989.9599999944
th 0.35
sumando k
---------------------------------
k: 3875
count_zero 8596
count_one 18404
0.15
cost 2316587.019999993
th 0.15
sumando k
---------------------------------
k: 3876
count_zero 10119
count_one 16881
0.2
cost 2680322.8799999985
th 0.2
sumando k
---------------------------------
k: 3877
count_zero 11376
count_one 15624
0.

count_zero 11376
count_one 15624
0.25
cost 2898787.3600000003
th 0.25
sumando k
---------------------------------
k: 3938
count_zero 12580
count_one 14420
0.3
cost 3205672.759999999
th 0.3
sumando k
---------------------------------
k: 3939
count_zero 13750
count_one 13250
0.35
cost 3462989.9599999944
th 0.35
sumando k
---------------------------------
k: 3940
count_zero 8596
count_one 18404
0.15
cost 2316587.019999993
th 0.15
sumando k
---------------------------------
k: 3941
count_zero 10119
count_one 16881
0.2
cost 2680322.8799999985
th 0.2
sumando k
---------------------------------
k: 3942
count_zero 11376
count_one 15624
0.25
cost 2898787.3600000003
th 0.25
sumando k
---------------------------------
k: 3943
count_zero 12580
count_one 14420
0.3
cost 3205672.759999999
th 0.3
sumando k
---------------------------------
k: 3944
count_zero 13750
count_one 13250
0.35
cost 3462989.9599999944
th 0.35
sumando k
---------------------------------
k: 3945
count_zero 8596
count_one 18404
0.

k: 4000
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 4001
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 4002
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 4003
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 4004
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 4005
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 4006
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 4007
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 4008
count_zero 0
count_one 27000
0.3
cost 1983300.37999999

count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 4071
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 4072
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 4073
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 4074
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 4075
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 4076
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 4077
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 4078
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.

count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 4141
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 4142
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 4143
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 4144
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 4145
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 4146
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 4147
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 4148
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.

count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 4211
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 4212
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 4213
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 4214
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 4215
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 4216
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 4217
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 4218
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.

cost 1960561.359999992
th 0.15
sumando k
---------------------------------
k: 4271
count_zero 925
count_one 26075
0.2
cost 1917878.4399999909
th 0.2
sumando k
---------------------------------
k: 4272
count_zero 2215
count_one 24785
0.25
cost 1883166.799999991
th 0.25
sumando k
---------------------------------
k: 4273
count_zero 3549
count_one 23451
0.3
cost 1891720.2799999919
th 0.3
sumando k
---------------------------------
k: 4274
count_zero 5079
count_one 21921
0.35
cost 1898771.0199999933
th 0.35
sumando k
---------------------------------
k: 4275
count_zero 196
count_one 26804
0.15
cost 1960561.359999992
th 0.15
sumando k
---------------------------------
k: 4276
count_zero 925
count_one 26075
0.2
cost 1917878.4399999909
th 0.2
sumando k
---------------------------------
k: 4277
count_zero 2215
count_one 24785
0.25
cost 1883166.799999991
th 0.25
sumando k
---------------------------------
k: 4278
count_zero 3549
count_one 23451
0.3
cost 1891720.2799999919
th 0.3
sumando k
-----

count_zero 5079
count_one 21921
0.35
cost 1898771.0199999933
th 0.35
sumando k
---------------------------------
k: 4340
count_zero 196
count_one 26804
0.15
cost 1960561.359999992
th 0.15
sumando k
---------------------------------
k: 4341
count_zero 925
count_one 26075
0.2
cost 1917878.4399999909
th 0.2
sumando k
---------------------------------
k: 4342
count_zero 2215
count_one 24785
0.25
cost 1883166.799999991
th 0.25
sumando k
---------------------------------
k: 4343
count_zero 3549
count_one 23451
0.3
cost 1891720.2799999919
th 0.3
sumando k
---------------------------------
k: 4344
count_zero 5079
count_one 21921
0.35
cost 1898771.0199999933
th 0.35
sumando k
---------------------------------
k: 4345
count_zero 196
count_one 26804
0.15
cost 1960561.359999992
th 0.15
sumando k
---------------------------------
k: 4346
count_zero 925
count_one 26075
0.2
cost 1917878.4399999909
th 0.2
sumando k
---------------------------------
k: 4347
count_zero 2215
count_one 24785
0.25
cost 188

count_zero 3549
count_one 23451
0.3
cost 1891720.2799999919
th 0.3
sumando k
---------------------------------
k: 4409
count_zero 5079
count_one 21921
0.35
cost 1898771.0199999933
th 0.35
sumando k
---------------------------------
k: 4410
count_zero 196
count_one 26804
0.15
cost 1960561.359999992
th 0.15
sumando k
---------------------------------
k: 4411
count_zero 925
count_one 26075
0.2
cost 1917878.4399999909
th 0.2
sumando k
---------------------------------
k: 4412
count_zero 2215
count_one 24785
0.25
cost 1883166.799999991
th 0.25
sumando k
---------------------------------
k: 4413
count_zero 3549
count_one 23451
0.3
cost 1891720.2799999919
th 0.3
sumando k
---------------------------------
k: 4414
count_zero 5079
count_one 21921
0.35
cost 1898771.0199999933
th 0.35
sumando k
---------------------------------
k: 4415
count_zero 196
count_one 26804
0.15
cost 1960561.359999992
th 0.15
sumando k
---------------------------------
k: 4416
count_zero 925
count_one 26075
0.2
cost 1917

count_zero 2215
count_one 24785
0.25
cost 1883166.799999991
th 0.25
sumando k
---------------------------------
k: 4478
count_zero 3549
count_one 23451
0.3
cost 1891720.2799999919
th 0.3
sumando k
---------------------------------
k: 4479
count_zero 5079
count_one 21921
0.35
cost 1898771.0199999933
th 0.35
sumando k
---------------------------------
k: 4480
count_zero 196
count_one 26804
0.15
cost 1960561.359999992
th 0.15
sumando k
---------------------------------
k: 4481
count_zero 925
count_one 26075
0.2
cost 1917878.4399999909
th 0.2
sumando k
---------------------------------
k: 4482
count_zero 2215
count_one 24785
0.25
cost 1883166.799999991
th 0.25
sumando k
---------------------------------
k: 4483
count_zero 3549
count_one 23451
0.3
cost 1891720.2799999919
th 0.3
sumando k
---------------------------------
k: 4484
count_zero 5079
count_one 21921
0.35
cost 1898771.0199999933
th 0.35
sumando k
---------------------------------
k: 4485
count_zero 196
count_one 26804
0.15
cost 19

sumando k
---------------------------------
k: 4537
count_zero 5697
count_one 21303
0.25
cost 1846507.0599999889
th 0.25
sumando k
---------------------------------
k: 4538
count_zero 6903
count_one 20097
0.3
cost 1870281.8399999905
th 0.3
sumando k
---------------------------------
k: 4539
count_zero 8246
count_one 18754
0.35
cost 2065253.039999991
th 0.35
sumando k
---------------------------------
k: 4540
count_zero 2653
count_one 24347
0.15
cost 1856867.5799999896
th 0.15
sumando k
---------------------------------
k: 4541
count_zero 4327
count_one 22673
0.2
cost 1822764.0399999886
th 0.2
sumando k
---------------------------------
k: 4542
count_zero 5697
count_one 21303
0.25
cost 1846507.0599999889
th 0.25
sumando k
---------------------------------
k: 4543
count_zero 6903
count_one 20097
0.3
cost 1870281.8399999905
th 0.3
sumando k
---------------------------------
k: 4544
count_zero 8246
count_one 18754
0.35
cost 2065253.039999991
th 0.35
sumando k
------------------------------

count_zero 2653
count_one 24347
0.15
cost 1856867.5799999896
th 0.15
sumando k
---------------------------------
k: 4606
count_zero 4327
count_one 22673
0.2
cost 1822764.0399999886
th 0.2
sumando k
---------------------------------
k: 4607
count_zero 5697
count_one 21303
0.25
cost 1846507.0599999889
th 0.25
sumando k
---------------------------------
k: 4608
count_zero 6903
count_one 20097
0.3
cost 1870281.8399999905
th 0.3
sumando k
---------------------------------
k: 4609
count_zero 8246
count_one 18754
0.35
cost 2065253.039999991
th 0.35
sumando k
---------------------------------
k: 4610
count_zero 2653
count_one 24347
0.15
cost 1856867.5799999896
th 0.15
sumando k
---------------------------------
k: 4611
count_zero 4327
count_one 22673
0.2
cost 1822764.0399999886
th 0.2
sumando k
---------------------------------
k: 4612
count_zero 5697
count_one 21303
0.25
cost 1846507.0599999889
th 0.25
sumando k
---------------------------------
k: 4613
count_zero 6903
count_one 20097
0.3
cos

0.3
cost 1870281.8399999905
th 0.3
sumando k
---------------------------------
k: 4674
count_zero 8246
count_one 18754
0.35
cost 2065253.039999991
th 0.35
sumando k
---------------------------------
k: 4675
count_zero 2653
count_one 24347
0.15
cost 1856867.5799999896
th 0.15
sumando k
---------------------------------
k: 4676
count_zero 4327
count_one 22673
0.2
cost 1822764.0399999886
th 0.2
sumando k
---------------------------------
k: 4677
count_zero 5697
count_one 21303
0.25
cost 1846507.0599999889
th 0.25
sumando k
---------------------------------
k: 4678
count_zero 6903
count_one 20097
0.3
cost 1870281.8399999905
th 0.3
sumando k
---------------------------------
k: 4679
count_zero 8246
count_one 18754
0.35
cost 2065253.039999991
th 0.35
sumando k
---------------------------------
k: 4680
count_zero 2653
count_one 24347
0.15
cost 1856867.5799999896
th 0.15
sumando k
---------------------------------
k: 4681
count_zero 4327
count_one 22673
0.2
cost 1822764.0399999886
th 0.2
suman

sumando k
---------------------------------
k: 4742
count_zero 5697
count_one 21303
0.25
cost 1846507.0599999889
th 0.25
sumando k
---------------------------------
k: 4743
count_zero 6903
count_one 20097
0.3
cost 1870281.8399999905
th 0.3
sumando k
---------------------------------
k: 4744
count_zero 8246
count_one 18754
0.35
cost 2065253.039999991
th 0.35
sumando k
---------------------------------
k: 4745
count_zero 2653
count_one 24347
0.15
cost 1856867.5799999896
th 0.15
sumando k
---------------------------------
k: 4746
count_zero 4327
count_one 22673
0.2
cost 1822764.0399999886
th 0.2
sumando k
---------------------------------
k: 4747
count_zero 5697
count_one 21303
0.25
cost 1846507.0599999889
th 0.25
sumando k
---------------------------------
k: 4748
count_zero 6903
count_one 20097
0.3
cost 1870281.8399999905
th 0.3
sumando k
---------------------------------
k: 4749
count_zero 8246
count_one 18754
0.35
cost 2065253.039999991
th 0.35
sumando k
------------------------------

count_zero 10612
count_one 16388
0.2
cost 2750091.599999995
th 0.2
sumando k
---------------------------------
k: 4802
count_zero 11826
count_one 15174
0.25
cost 2965800.859999997
th 0.25
sumando k
---------------------------------
k: 4803
count_zero 12915
count_one 14085
0.3
cost 3140626.979999998
th 0.3
sumando k
---------------------------------
k: 4804
count_zero 13991
count_one 13009
0.35
cost 3484458.939999997
th 0.35
sumando k
---------------------------------
k: 4805
count_zero 9172
count_one 17828
0.15
cost 2429274.779999999
th 0.15
sumando k
---------------------------------
k: 4806
count_zero 10612
count_one 16388
0.2
cost 2750091.599999995
th 0.2
sumando k
---------------------------------
k: 4807
count_zero 11826
count_one 15174
0.25
cost 2965800.859999997
th 0.25
sumando k
---------------------------------
k: 4808
count_zero 12915
count_one 14085
0.3
cost 3140626.979999998
th 0.3
sumando k
---------------------------------
k: 4809
count_zero 13991
count_one 13009
0.35
cos

cost 3484458.939999997
th 0.35
sumando k
---------------------------------
k: 4870
count_zero 9172
count_one 17828
0.15
cost 2429274.779999999
th 0.15
sumando k
---------------------------------
k: 4871
count_zero 10612
count_one 16388
0.2
cost 2750091.599999995
th 0.2
sumando k
---------------------------------
k: 4872
count_zero 11826
count_one 15174
0.25
cost 2965800.859999997
th 0.25
sumando k
---------------------------------
k: 4873
count_zero 12915
count_one 14085
0.3
cost 3140626.979999998
th 0.3
sumando k
---------------------------------
k: 4874
count_zero 13991
count_one 13009
0.35
cost 3484458.939999997
th 0.35
sumando k
---------------------------------
k: 4875
count_zero 9172
count_one 17828
0.15
cost 2429274.779999999
th 0.15
sumando k
---------------------------------
k: 4876
count_zero 10612
count_one 16388
0.2
cost 2750091.599999995
th 0.2
sumando k
---------------------------------
k: 4877
count_zero 11826
count_one 15174
0.25
cost 2965800.859999997
th 0.25
sumando k

count_zero 12915
count_one 14085
0.3
cost 3140626.979999998
th 0.3
sumando k
---------------------------------
k: 4939
count_zero 13991
count_one 13009
0.35
cost 3484458.939999997
th 0.35
sumando k
---------------------------------
k: 4940
count_zero 9172
count_one 17828
0.15
cost 2429274.779999999
th 0.15
sumando k
---------------------------------
k: 4941
count_zero 10612
count_one 16388
0.2
cost 2750091.599999995
th 0.2
sumando k
---------------------------------
k: 4942
count_zero 11826
count_one 15174
0.25
cost 2965800.859999997
th 0.25
sumando k
---------------------------------
k: 4943
count_zero 12915
count_one 14085
0.3
cost 3140626.979999998
th 0.3
sumando k
---------------------------------
k: 4944
count_zero 13991
count_one 13009
0.35
cost 3484458.939999997
th 0.35
sumando k
---------------------------------
k: 4945
count_zero 9172
count_one 17828
0.15
cost 2429274.779999999
th 0.15
sumando k
---------------------------------
k: 4946
count_zero 10612
count_one 16388
0.2
cos

k: 5000
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 5001
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 5002
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 5003
count_zero 0
count_one 27000
0.3
cost 1983300.3799999908
th 0.3
sumando k
---------------------------------
k: 5004
count_zero 0
count_one 27000
0.35
cost 1983300.3799999908
th 0.35
sumando k
---------------------------------
k: 5005
count_zero 0
count_one 27000
0.15
cost 1983300.3799999908
th 0.15
sumando k
---------------------------------
k: 5006
count_zero 0
count_one 27000
0.2
cost 1983300.3799999908
th 0.2
sumando k
---------------------------------
k: 5007
count_zero 0
count_one 27000
0.25
cost 1983300.3799999908
th 0.25
sumando k
---------------------------------
k: 5008
count_zero 0
count_one 27000
0.3
cost 1983300.37999999

KeyboardInterrupt: 

In [218]:
df_collector['final_cost'] = df_collector['cost'] - df_collector['learning_cost']

In [220]:
df_collector['diff_auc'] = df_collector['train_auc'] - df_collector['test_auc']

In [226]:
df_collector['max_depth'] = [md.max_depth for md in df_collector['max_depth']]

In [231]:
del df_collector['hyperparams']

In [232]:
df_collector.sort_values("final_cost", ascending=False).head(20)

Unnamed: 0,alpha,estimators,learning_rate,max_depth,threshold,count_zero,count_one,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
0,37676,50,0.01,4,0.15,24158,2842,1983300.0,0,0.826083,0.824467,1983300.0,0.0016167
2061,38910,100,0.01,4,0.2,14278,12722,1983300.0,0,0.827736,0.826728,1983300.0,0.00100799
2091,149725,100,0.01,4,0.2,14278,12722,1983300.0,0,0.827736,0.826728,1983300.0,0.00100799
2090,149725,100,0.01,4,0.15,17122,9878,1983300.0,0,0.827736,0.826728,1983300.0,0.00100799
2089,11957,100,0.01,4,0.35,9979,17021,1983300.0,0,0.827736,0.826728,1983300.0,0.00100799
2088,11957,100,0.01,4,0.3,11514,15486,1983300.0,0,0.827736,0.826728,1983300.0,0.00100799
2087,11957,100,0.01,4,0.25,13034,13966,1983300.0,0,0.827736,0.826728,1983300.0,0.00100799
2086,11957,100,0.01,4,0.2,14278,12722,1983300.0,0,0.827736,0.826728,1983300.0,0.00100799
2085,11957,100,0.01,4,0.15,17122,9878,1983300.0,0,0.827736,0.826728,1983300.0,0.00100799
2084,77037,100,0.01,4,0.35,9979,17021,1983300.0,0,0.827736,0.826728,1983300.0,0.00100799


In [3]:
import pandas as pd
df_collector = pd.read_csv("results_2exp_1rstpart.csv")

In [147]:
df_collector['final_cost'] = df_collector['cost'] - df_collector['learning_cost']

In [157]:
df_collector.to_csv("raw_models_second_experiment.csv", index=False)

In [160]:
df_analysis = df_collector[(df_collector['final_cost'] >0) & (df_collector['learning_cost'] >0) & (df_collector['final_cost'] >800000)].sort_values("final_cost").reset_index(drop=True)

In [161]:
df_analysis

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,cost,learning_cost,train_auc,test_auc,final_cost
0,322247,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.9,2023660.0,1216010.0,0.960457,0.730892,807648.0
1,317847,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.9,2023660.0,1199400.0,0.960457,0.730892,824252.0
2,158285,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,75,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.9,1972520.0,1135980.0,0.83275,0.812756,836535.0
3,71990,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,75,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.8,1980290.0,1130220.0,0.828108,0.822177,850066.0
4,16394,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.7,1988080.0,1130800.0,0.826748,0.821231,857274.0
5,185450,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1987260.0,1128430.0,0.829214,0.823661,858829.0
6,304167,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.9,2023660.0,1147780.0,0.960457,0.730892,875873.0
7,300535,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.9,2023660.0,1134080.0,0.960457,0.730892,889579.0
8,22964,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1966390.0,1076400.0,0.829275,0.823502,889985.0
9,149811,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,75,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.9,1972520.0,1075170.0,0.83275,0.812756,897351.0


In [162]:
df_analysis.to_csv("data_processed_models.csv", index=False)

In [None]:
df_analysis

In [93]:
df_collector['final_cost'] = df_collector['cost'] - df_collector['learning_cost']

In [107]:
df_collector[df_collector['final_cost'] >0].query("alpha==999725")

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,cost,learning_cost,train_auc,test_auc,final_cost
3272,999725,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.8,2471670.0,0,0.826416,0.824224,2471670.0


In [112]:
df_collector[df_collector['final_cost'] >0].describe()

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,cost,learning_cost,train_auc,test_auc,final_cost
count,837,837,837,837.0,837,837.0,837.0,837.0,837.0,837.0,837.0
unique,835,8,1,3.0,4,7.0,12.0,226.0,8.0,8.0,226.0
top,386018,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.9,2471674.1,0.0,0.826416,0.824224,2471674.1
freq,2,401,837,711.0,520,316.0,612.0,612.0,401.0,401.0,612.0


In [102]:
df_collector[df_collector['final_cost'] >0].min().alpha

10079.0

In [86]:
df_collector.sort_values("cost").iloc[3051]

alpha                                                              596272
hyperparams             NGBClassifier(Base=DecisionTreeRegressor(ccp_a...
estimators                                                             90
learning_rate                                                         0.1
max_depth               DecisionTreeRegressor(ccp_alpha=0.0, criterion...
threshold                                                            0.75
cost                                                          5.70545e+06
cost_common                                                    7.6209e+06
learning_cost                                                 4.20081e+08
learning_cost_common                                          9.54968e+08
train_auc                                                         0.94028
test_auc                                                         0.762959
Name: 1952, dtype: object

In [88]:
df_collector.sort_values("cost").iloc[3051].cost - df_collector.sort_values("cost").iloc[3051].learning_cost

-414375074.8010962

In [20]:
df_analysis = pd.read_csv("data_processed_models.csv")

In [22]:
df_analysis['diff_auc'] = df_analysis['train_auc'] - df_analysis['test_auc']

In [23]:
df_analysis

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
0,322247,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.9,2023656.5,1216008.0,0.960457,0.730892,807648.0,0.229565
1,317847,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.9,2023656.5,1199405.0,0.960457,0.730892,824251.5,0.229565
2,158285,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,75,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.9,1972517.68,1135983.0,0.83275,0.812756,836534.8,0.019994
3,71990,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,75,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.8,1980286.78,1130221.0,0.828108,0.822177,850066.0,0.005931
4,16394,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.7,1988075.1,1130801.0,0.826748,0.821231,857273.8,0.005517
5,185450,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1987263.76,1128435.0,0.829214,0.823661,858829.2,0.005552
6,304167,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.9,2023656.5,1147783.0,0.960457,0.730892,875873.4,0.229565
7,300535,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.9,2023656.5,1134078.0,0.960457,0.730892,889578.9,0.229565
8,22964,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1966387.0,1076402.0,0.829275,0.823502,889984.8,0.005773
9,149811,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,75,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.9,1972517.68,1075167.0,0.83275,0.812756,897351.2,0.019994


In [24]:
df_analysis.sort_values("diff_auc")

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
10,15193,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.7,1988075.1,1047961.0,0.826748,0.821231,940114.6,0.005517
4,16394,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.7,1988075.1,1130801.0,0.826748,0.821231,857273.8,0.005517
36,83183,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1987263.76,506155.7,0.829214,0.823661,1481108.0,0.005552
29,109646,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1987263.76,667178.9,0.829214,0.823661,1320085.0,0.005552
5,185450,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1987263.76,1128435.0,0.829214,0.823661,858829.2,0.005552
31,108128,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1987263.76,657942.1,0.829214,0.823661,1329322.0,0.005552
48,25348,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1987263.76,154238.7,0.829214,0.823661,1833025.0,0.005552
47,31000,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1987263.76,188630.2,0.829214,0.823661,1798634.0,0.005552
12,164985,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1987263.76,1003908.0,0.829214,0.823661,983355.6,0.005552
25,23406,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,120,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1980368.94,757157.6,0.829289,0.823735,1223211.0,0.005554


In [76]:
df_analysis = pd.read_csv("top30_models.csv")

In [71]:
df_analysis = df_analysis.sort_values("diff_auc").head(30)

In [72]:
df_analysis[df_analysis['threshold'] >= 0.8].sort_values("diff_auc").head(28)

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
36,83183,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,,0.85,1987263.76,506155.7,0.829214,0.823661,1481108.0,0.005552
29,109646,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,,0.85,1987263.76,667178.9,0.829214,0.823661,1320085.0,0.005552
5,185450,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,,0.85,1987263.76,1128435.0,0.829214,0.823661,858829.2,0.005552
31,108128,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,,0.85,1987263.76,657942.1,0.829214,0.823661,1329322.0,0.005552
48,25348,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,,0.85,1987263.76,154238.7,0.829214,0.823661,1833025.0,0.005552
47,31000,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,,0.85,1987263.76,188630.2,0.829214,0.823661,1798634.0,0.005552
12,164985,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,,0.85,1987263.76,1003908.0,0.829214,0.823661,983355.6,0.005552
25,23406,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,120,0.01,,0.85,1980368.94,757157.6,0.829289,0.823735,1223211.0,0.005554
11,31829,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,120,0.01,,0.85,1980368.94,1029632.0,0.829289,0.823735,950736.8,0.005554
37,14789,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,120,0.01,,0.85,1980368.94,478407.4,0.829289,0.823735,1501962.0,0.005554


In [None]:
estimators: 75 a 200
learning_rate 0.01
threshold 0.85

In [67]:
df_analysis['max_depth'] = [DecisionTreeRegressor(cc).max_depth for cc in list(df_analysis['max_depth'])]

In [84]:
df_analysis.hyperparams.iloc[0]

"NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,\n                                         criterion='friedman_mse', max_depth=6,\n                                         max_features=None, max_leaf_nodes=None,\n                                         min_impurity_decrease=0.0,\n                                         min_impurity_split=None,\n                                         min_samples_leaf=1,\n                                         min_samples_split=2,\n                                         min_weight_fraction_leaf=0.0,\n                                         presort='deprecated',\n                                         random_state=None, splitter='best'),\n              Dist=<class 'ngboost.distns.categorical.k_categorical.<locals>.Categorical'>,\n              Score=<class 'ngboost.scores.LogScore'>, col_sample=1.0,\n              learning_rate=0.01, minibatch_frac=1.0, n_estimators=50,\n              natural_gradient=True,\n              random_state=R

In [106]:
df_analysis[(df_analysis['threshold'] == 0.8) |(df_analysis['threshold'] == 0.85)].sort_values(['final_cost']).alpha.unique()

array([ 71990, 185450,  22964,  31829, 164985,  56190,  54744,  23406,
        42876, 109646, 108128,  83183,  14789,  11679,  31000,  25348])

In [107]:
base0 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=4)
base1 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=6)

In [26]:
def check_counts(model, x_data, threshold):
    y_predictions = model.predict_proba(x_data)
    y_pred = []
    count_zero = 0
    count_one = 0
    for i in range(len(list(y_predictions))):
        if y_predictions[i][0] > threshold:
            y_pred.append(0)
            count_zero += 1
        else:
            y_pred.append(1)
            count_one += 1
    return count_zero, count_one

In [114]:
#FIX MEEEEEEEEEEE: REFACTOR PLZZ !!!
df_collector = pd.DataFrame(columns=  ["alpha","hyperparams", "estimators", 
                                       "learning_rate", "max_depth", "threshold", 
                                       "cost", 
                                       "learning_cost",
                                       "train_auc", "test_auc"])
df_collector['hyperparams'] = df_collector['hyperparams'].astype('object')
k = 0
for estimator in [75, 100, 125, 150, 175, 200]:
    for lr in [0.01]:
        for baset in [base0,base1]:
            ngb_clf = NGBClassifier(Dist=Bernoulli,
                                      verbose=True, Base=baset, n_estimators = estimator, 
                                    learning_rate = lr, verbose_eval = 0
                                     )
            print(ngb_clf)
            ngb_clf.fit(X_train_resampled, y_train_resampled, sample_weight= get_sample_weights(y_train, y_train_resampled))
            index_th = 0
            #en el primer iteraba el threashold desde 0.1 a 0.9 obviamente cuanto mas menor me daba peor.. entonces itero
            #solamente d esde 0.70 a 0.95
            for threshold in list(np.arange(0.75, 0.95, 0.025)):
                count_zero, count_one = check_counts(ngb_clf, df_test.values, threshold)
                if count_zero == 0:
                    pass
                else:
                    for kix in range(0,50):
                        alpha = random.randint(20000, 140000)
                        #alpha = random.randint(10000, 700000)
                        df_collector.ix[str(k),'alpha'] = alpha
                        df_collector.ix[str(k),'hyperparams'] = ngb_clf
                        df_collector.ix[str(k),'estimators'] = estimator
                        df_collector.ix[str(k),'learning_rate'] = lr
                        df_collector.ix[str(k),'max_depth'] = baset
                        threshold = round(threshold,2)
                        print("k: "+str(k))
                        df_aux = pd.DataFrame(df_test.values, columns=cols_with_missing_indicators)
                        df_aux['predicted'] = generate_y_pred_with_custom_threshold(ngb_clf, df_test.values, threshold)
                        df_aux['proba_predicted'] = [proba[1] for proba in ngb_clf.predict_proba(df_test.values)]
                        df_aux['real'] = list(y_test)
                        df_aux['LoanPrincipal'] = df_aux.MonthlyIncome * 2
                        print(threshold)
                        print("cost "+str(cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)))
                        print("th "+str(threshold))
                        df_collector.ix[str(k),'threshold'] = threshold
                        df_collector.ix[str(k),'cost'] = cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)
                        df_collector.ix[str(k),'learning_cost'] = calculate_learning_cost(df_aux.predicted,df_aux.proba_predicted, alpha)
                        # predict probabilities
                        train_probs = ngb_clf.predict_proba(df_train.values)
                        test_probs = ngb_clf.predict_proba(df_test.values)
                        # keep probabilities for the positive outcome only
                        train_probs = train_probs[:, 1]
                        test_probs = test_probs[:, 1]
                        # calculate scores
                        train_auc = roc_auc_score(y_train, train_probs)
                        test_auc = roc_auc_score(y_test, test_probs)
                        df_collector.ix[str(k),'train_auc'] = train_auc
                        df_collector.ix[str(k),'test_auc'] = test_auc
                        k+=1
                        print("sumando k")
                        print("---------------------------------")
            del ngb_clf
df_collector['final_cost'] = df_collector['cost'] - df_collector['learning_cost']

NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=4,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=None, splitter='best'),
              Dist=<class 'ngboost.distns.categorical.k_categorical.<locals>.Categorical'>,
              Score=<class 'ngboost.scores.LogScore'>, col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=75,
              natural_gradient=True,
              random_state=RandomState(MT19

0.78
cost 1964467.519999986
th 0.78
sumando k
---------------------------------
k: 52
count_zero 84
count_one 26916
0.78
cost 1964467.519999986
th 0.78
sumando k
---------------------------------
k: 53
count_zero 84
count_one 26916
0.78
cost 1964467.519999986
th 0.78
sumando k
---------------------------------
k: 54
count_zero 84
count_one 26916
0.78
cost 1964467.519999986
th 0.78
sumando k
---------------------------------
k: 55
count_zero 84
count_one 26916
0.78
cost 1964467.519999986
th 0.78
sumando k
---------------------------------
k: 56
count_zero 84
count_one 26916
0.78
cost 1964467.519999986
th 0.78
sumando k
---------------------------------
k: 57
count_zero 84
count_one 26916
0.78
cost 1964467.519999986
th 0.78
sumando k
---------------------------------
k: 58
count_zero 84
count_one 26916
0.78
cost 1964467.519999986
th 0.78
sumando k
---------------------------------
k: 59
count_zero 84
count_one 26916
0.78
cost 1964467.519999986
th 0.78
sumando k
--------------------------

0.75
cost 1875663.8199999873
th 0.75
sumando k
---------------------------------
k: 104
count_zero 993
count_one 26007
0.75
cost 1875663.8199999873
th 0.75
sumando k
---------------------------------
k: 105
count_zero 993
count_one 26007
0.75
cost 1875663.8199999873
th 0.75
sumando k
---------------------------------
k: 106
count_zero 993
count_one 26007
0.75
cost 1875663.8199999873
th 0.75
sumando k
---------------------------------
k: 107
count_zero 993
count_one 26007
0.75
cost 1875663.8199999873
th 0.75
sumando k
---------------------------------
k: 108
count_zero 993
count_one 26007
0.75
cost 1875663.8199999873
th 0.75
sumando k
---------------------------------
k: 109
count_zero 993
count_one 26007
0.75
cost 1875663.8199999873
th 0.75
sumando k
---------------------------------
k: 110
count_zero 993
count_one 26007
0.75
cost 1875663.8199999873
th 0.75
sumando k
---------------------------------
k: 111
count_zero 993
count_one 26007
0.75
cost 1875663.8199999873
th 0.75
sumando k
-

0.78
cost 1930539.4199999874
th 0.78
sumando k
---------------------------------
k: 173
count_zero 372
count_one 26628
0.78
cost 1930539.4199999874
th 0.78
sumando k
---------------------------------
k: 174
count_zero 372
count_one 26628
0.78
cost 1930539.4199999874
th 0.78
sumando k
---------------------------------
k: 175
count_zero 372
count_one 26628
0.78
cost 1930539.4199999874
th 0.78
sumando k
---------------------------------
k: 176
count_zero 372
count_one 26628
0.78
cost 1930539.4199999874
th 0.78
sumando k
---------------------------------
k: 177
count_zero 372
count_one 26628
0.78
cost 1930539.4199999874
th 0.78
sumando k
---------------------------------
k: 178
count_zero 372
count_one 26628
0.78
cost 1930539.4199999874
th 0.78
sumando k
---------------------------------
k: 179
count_zero 372
count_one 26628
0.78
cost 1930539.4199999874
th 0.78
sumando k
---------------------------------
k: 180
count_zero 372
count_one 26628
0.78
cost 1930539.4199999874
th 0.78
sumando k
-

0.8
cost 1954609.739999987
th 0.8
sumando k
---------------------------------
k: 243
count_zero 236
count_one 26764
0.8
cost 1954609.739999987
th 0.8
sumando k
---------------------------------
k: 244
count_zero 236
count_one 26764
0.8
cost 1954609.739999987
th 0.8
sumando k
---------------------------------
k: 245
count_zero 236
count_one 26764
0.8
cost 1954609.739999987
th 0.8
sumando k
---------------------------------
k: 246
count_zero 236
count_one 26764
0.8
cost 1954609.739999987
th 0.8
sumando k
---------------------------------
k: 247
count_zero 236
count_one 26764
0.8
cost 1954609.739999987
th 0.8
sumando k
---------------------------------
k: 248
count_zero 236
count_one 26764
0.8
cost 1954609.739999987
th 0.8
sumando k
---------------------------------
k: 249
count_zero 236
count_one 26764
0.8
cost 1954609.739999987
th 0.8
sumando k
---------------------------------
k: 250
count_zero 42
count_one 26958
0.82
cost 1971846.7999999858
th 0.82
sumando k
--------------------------

k: 300
count_zero 1196
count_one 25804
0.75
cost 1855184.5399999875
th 0.75
sumando k
---------------------------------
k: 301
count_zero 1196
count_one 25804
0.75
cost 1855184.5399999875
th 0.75
sumando k
---------------------------------
k: 302
count_zero 1196
count_one 25804
0.75
cost 1855184.5399999875
th 0.75
sumando k
---------------------------------
k: 303
count_zero 1196
count_one 25804
0.75
cost 1855184.5399999875
th 0.75
sumando k
---------------------------------
k: 304
count_zero 1196
count_one 25804
0.75
cost 1855184.5399999875
th 0.75
sumando k
---------------------------------
k: 305
count_zero 1196
count_one 25804
0.75
cost 1855184.5399999875
th 0.75
sumando k
---------------------------------
k: 306
count_zero 1196
count_one 25804
0.75
cost 1855184.5399999875
th 0.75
sumando k
---------------------------------
k: 307
count_zero 1196
count_one 25804
0.75
cost 1855184.5399999875
th 0.75
sumando k
---------------------------------
k: 308
count_zero 1196
count_one 25804
0

sumando k
---------------------------------
k: 369
count_zero 719
count_one 26281
0.78
cost 1898492.6399999873
th 0.78
sumando k
---------------------------------
k: 370
count_zero 719
count_one 26281
0.78
cost 1898492.6399999873
th 0.78
sumando k
---------------------------------
k: 371
count_zero 719
count_one 26281
0.78
cost 1898492.6399999873
th 0.78
sumando k
---------------------------------
k: 372
count_zero 719
count_one 26281
0.78
cost 1898492.6399999873
th 0.78
sumando k
---------------------------------
k: 373
count_zero 719
count_one 26281
0.78
cost 1898492.6399999873
th 0.78
sumando k
---------------------------------
k: 374
count_zero 719
count_one 26281
0.78
cost 1898492.6399999873
th 0.78
sumando k
---------------------------------
k: 375
count_zero 719
count_one 26281
0.78
cost 1898492.6399999873
th 0.78
sumando k
---------------------------------
k: 376
count_zero 719
count_one 26281
0.78
cost 1898492.6399999873
th 0.78
sumando k
---------------------------------
k: 3

0.8
cost 1939455.5599999866
th 0.8
sumando k
---------------------------------
k: 439
count_zero 302
count_one 26698
0.8
cost 1939455.5599999866
th 0.8
sumando k
---------------------------------
k: 440
count_zero 302
count_one 26698
0.8
cost 1939455.5599999866
th 0.8
sumando k
---------------------------------
k: 441
count_zero 302
count_one 26698
0.8
cost 1939455.5599999866
th 0.8
sumando k
---------------------------------
k: 442
count_zero 302
count_one 26698
0.8
cost 1939455.5599999866
th 0.8
sumando k
---------------------------------
k: 443
count_zero 302
count_one 26698
0.8
cost 1939455.5599999866
th 0.8
sumando k
---------------------------------
k: 444
count_zero 302
count_one 26698
0.8
cost 1939455.5599999866
th 0.8
sumando k
---------------------------------
k: 445
count_zero 302
count_one 26698
0.8
cost 1939455.5599999866
th 0.8
sumando k
---------------------------------
k: 446
count_zero 302
count_one 26698
0.8
cost 1939455.5599999866
th 0.8
sumando k
-------------------

NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=6,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=None, splitter='best'),
              Dist=<class 'ngboost.distns.categorical.k_categorical.<locals>.Categorical'>,
              Score=<class 'ngboost.scores.LogScore'>, col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=150,
              natural_gradient=True,
              random_state=RandomState(MT1

sumando k
---------------------------------
k: 560
count_zero 875
count_one 26125
0.78
cost 1878947.119999987
th 0.78
sumando k
---------------------------------
k: 561
count_zero 875
count_one 26125
0.78
cost 1878947.119999987
th 0.78
sumando k
---------------------------------
k: 562
count_zero 875
count_one 26125
0.78
cost 1878947.119999987
th 0.78
sumando k
---------------------------------
k: 563
count_zero 875
count_one 26125
0.78
cost 1878947.119999987
th 0.78
sumando k
---------------------------------
k: 564
count_zero 875
count_one 26125
0.78
cost 1878947.119999987
th 0.78
sumando k
---------------------------------
k: 565
count_zero 875
count_one 26125
0.78
cost 1878947.119999987
th 0.78
sumando k
---------------------------------
k: 566
count_zero 875
count_one 26125
0.78
cost 1878947.119999987
th 0.78
sumando k
---------------------------------
k: 567
count_zero 875
count_one 26125
0.78
cost 1878947.119999987
th 0.78
sumando k
---------------------------------
k: 568
count

0.8
cost 1930314.3999999883
th 0.8
sumando k
---------------------------------
k: 630
count_zero 505
count_one 26495
0.8
cost 1930314.3999999883
th 0.8
sumando k
---------------------------------
k: 631
count_zero 505
count_one 26495
0.8
cost 1930314.3999999883
th 0.8
sumando k
---------------------------------
k: 632
count_zero 505
count_one 26495
0.8
cost 1930314.3999999883
th 0.8
sumando k
---------------------------------
k: 633
count_zero 505
count_one 26495
0.8
cost 1930314.3999999883
th 0.8
sumando k
---------------------------------
k: 634
count_zero 505
count_one 26495
0.8
cost 1930314.3999999883
th 0.8
sumando k
---------------------------------
k: 635
count_zero 505
count_one 26495
0.8
cost 1930314.3999999883
th 0.8
sumando k
---------------------------------
k: 636
count_zero 505
count_one 26495
0.8
cost 1930314.3999999883
th 0.8
sumando k
---------------------------------
k: 637
count_zero 505
count_one 26495
0.8
cost 1930314.3999999883
th 0.8
sumando k
-------------------

sumando k
---------------------------------
k: 699
count_zero 289
count_one 26711
0.82
cost 1945330.2799999865
th 0.82
sumando k
---------------------------------
k: 700
count_zero 1
count_one 26999
0.85
cost 1978697.7599999863
th 0.85
sumando k
---------------------------------
k: 701
count_zero 1
count_one 26999
0.85
cost 1978697.7599999863
th 0.85
sumando k
---------------------------------
k: 702
count_zero 1
count_one 26999
0.85
cost 1978697.7599999863
th 0.85
sumando k
---------------------------------
k: 703
count_zero 1
count_one 26999
0.85
cost 1978697.7599999863
th 0.85
sumando k
---------------------------------
k: 704
count_zero 1
count_one 26999
0.85
cost 1978697.7599999863
th 0.85
sumando k
---------------------------------
k: 705
count_zero 1
count_one 26999
0.85
cost 1978697.7599999863
th 0.85
sumando k
---------------------------------
k: 706
count_zero 1
count_one 26999
0.85
cost 1978697.7599999863
th 0.85
sumando k
---------------------------------
k: 707
count_zero 

0.75
cost 1851464.7799999868
th 0.75
sumando k
---------------------------------
k: 751
count_zero 1887
count_one 25113
0.75
cost 1851464.7799999868
th 0.75
sumando k
---------------------------------
k: 752
count_zero 1887
count_one 25113
0.75
cost 1851464.7799999868
th 0.75
sumando k
---------------------------------
k: 753
count_zero 1887
count_one 25113
0.75
cost 1851464.7799999868
th 0.75
sumando k
---------------------------------
k: 754
count_zero 1887
count_one 25113
0.75
cost 1851464.7799999868
th 0.75
sumando k
---------------------------------
k: 755
count_zero 1887
count_one 25113
0.75
cost 1851464.7799999868
th 0.75
sumando k
---------------------------------
k: 756
count_zero 1887
count_one 25113
0.75
cost 1851464.7799999868
th 0.75
sumando k
---------------------------------
k: 757
count_zero 1887
count_one 25113
0.75
cost 1851464.7799999868
th 0.75
sumando k
---------------------------------
k: 758
count_zero 1887
count_one 25113
0.75
cost 1851464.7799999868
th 0.75
sum

sumando k
---------------------------------
k: 819
count_zero 1067
count_one 25933
0.78
cost 1865002.2999999868
th 0.78
sumando k
---------------------------------
k: 820
count_zero 1067
count_one 25933
0.78
cost 1865002.2999999868
th 0.78
sumando k
---------------------------------
k: 821
count_zero 1067
count_one 25933
0.78
cost 1865002.2999999868
th 0.78
sumando k
---------------------------------
k: 822
count_zero 1067
count_one 25933
0.78
cost 1865002.2999999868
th 0.78
sumando k
---------------------------------
k: 823
count_zero 1067
count_one 25933
0.78
cost 1865002.2999999868
th 0.78
sumando k
---------------------------------
k: 824
count_zero 1067
count_one 25933
0.78
cost 1865002.2999999868
th 0.78
sumando k
---------------------------------
k: 825
count_zero 1067
count_one 25933
0.78
cost 1865002.2999999868
th 0.78
sumando k
---------------------------------
k: 826
count_zero 1067
count_one 25933
0.78
cost 1865002.2999999868
th 0.78
sumando k
------------------------------

0.8
cost 1896594.899999987
th 0.8
sumando k
---------------------------------
k: 889
count_zero 740
count_one 26260
0.8
cost 1896594.899999987
th 0.8
sumando k
---------------------------------
k: 890
count_zero 740
count_one 26260
0.8
cost 1896594.899999987
th 0.8
sumando k
---------------------------------
k: 891
count_zero 740
count_one 26260
0.8
cost 1896594.899999987
th 0.8
sumando k
---------------------------------
k: 892
count_zero 740
count_one 26260
0.8
cost 1896594.899999987
th 0.8
sumando k
---------------------------------
k: 893
count_zero 740
count_one 26260
0.8
cost 1896594.899999987
th 0.8
sumando k
---------------------------------
k: 894
count_zero 740
count_one 26260
0.8
cost 1896594.899999987
th 0.8
sumando k
---------------------------------
k: 895
count_zero 740
count_one 26260
0.8
cost 1896594.899999987
th 0.8
sumando k
---------------------------------
k: 896
count_zero 740
count_one 26260
0.8
cost 1896594.899999987
th 0.8
sumando k
----------------------------

sumando k
---------------------------------
k: 958
count_zero 88
count_one 26912
0.85
cost 1970740.9199999855
th 0.85
sumando k
---------------------------------
k: 959
count_zero 88
count_one 26912
0.85
cost 1970740.9199999855
th 0.85
sumando k
---------------------------------
k: 960
count_zero 88
count_one 26912
0.85
cost 1970740.9199999855
th 0.85
sumando k
---------------------------------
k: 961
count_zero 88
count_one 26912
0.85
cost 1970740.9199999855
th 0.85
sumando k
---------------------------------
k: 962
count_zero 88
count_one 26912
0.85
cost 1970740.9199999855
th 0.85
sumando k
---------------------------------
k: 963
count_zero 88
count_one 26912
0.85
cost 1970740.9199999855
th 0.85
sumando k
---------------------------------
k: 964
count_zero 88
count_one 26912
0.85
cost 1970740.9199999855
th 0.85
sumando k
---------------------------------
k: 965
count_zero 88
count_one 26912
0.85
cost 1970740.9199999855
th 0.85
sumando k
---------------------------------
k: 966
count

sumando k
---------------------------------
k: 1009
count_zero 2078
count_one 24922
0.75
cost 1853400.9199999867
th 0.75
sumando k
---------------------------------
k: 1010
count_zero 2078
count_one 24922
0.75
cost 1853400.9199999867
th 0.75
sumando k
---------------------------------
k: 1011
count_zero 2078
count_one 24922
0.75
cost 1853400.9199999867
th 0.75
sumando k
---------------------------------
k: 1012
count_zero 2078
count_one 24922
0.75
cost 1853400.9199999867
th 0.75
sumando k
---------------------------------
k: 1013
count_zero 2078
count_one 24922
0.75
cost 1853400.9199999867
th 0.75
sumando k
---------------------------------
k: 1014
count_zero 2078
count_one 24922
0.75
cost 1853400.9199999867
th 0.75
sumando k
---------------------------------
k: 1015
count_zero 2078
count_one 24922
0.75
cost 1853400.9199999867
th 0.75
sumando k
---------------------------------
k: 1016
count_zero 2078
count_one 24922
0.75
cost 1853400.9199999867
th 0.75
sumando k
----------------------

cost 1874679.9999999881
th 0.78
sumando k
---------------------------------
k: 1077
count_zero 1180
count_one 25820
0.78
cost 1874679.9999999881
th 0.78
sumando k
---------------------------------
k: 1078
count_zero 1180
count_one 25820
0.78
cost 1874679.9999999881
th 0.78
sumando k
---------------------------------
k: 1079
count_zero 1180
count_one 25820
0.78
cost 1874679.9999999881
th 0.78
sumando k
---------------------------------
k: 1080
count_zero 1180
count_one 25820
0.78
cost 1874679.9999999881
th 0.78
sumando k
---------------------------------
k: 1081
count_zero 1180
count_one 25820
0.78
cost 1874679.9999999881
th 0.78
sumando k
---------------------------------
k: 1082
count_zero 1180
count_one 25820
0.78
cost 1874679.9999999881
th 0.78
sumando k
---------------------------------
k: 1083
count_zero 1180
count_one 25820
0.78
cost 1874679.9999999881
th 0.78
sumando k
---------------------------------
k: 1084
count_zero 1180
count_one 25820
0.78
cost 1874679.9999999881
th 0.78


0.8
cost 1882106.9599999865
th 0.8
sumando k
---------------------------------
k: 1146
count_zero 837
count_one 26163
0.8
cost 1882106.9599999865
th 0.8
sumando k
---------------------------------
k: 1147
count_zero 837
count_one 26163
0.8
cost 1882106.9599999865
th 0.8
sumando k
---------------------------------
k: 1148
count_zero 837
count_one 26163
0.8
cost 1882106.9599999865
th 0.8
sumando k
---------------------------------
k: 1149
count_zero 837
count_one 26163
0.8
cost 1882106.9599999865
th 0.8
sumando k
---------------------------------
k: 1150
count_zero 544
count_one 26456
0.82
cost 1918524.9799999886
th 0.82
sumando k
---------------------------------
k: 1151
count_zero 544
count_one 26456
0.82
cost 1918524.9799999886
th 0.82
sumando k
---------------------------------
k: 1152
count_zero 544
count_one 26456
0.82
cost 1918524.9799999886
th 0.82
sumando k
---------------------------------
k: 1153
count_zero 544
count_one 26456
0.82
cost 1918524.9799999886
th 0.82
sumando k
---

count_zero 196
count_one 26804
0.85
cost 1954134.3199999868
th 0.85
sumando k
---------------------------------
k: 1215
count_zero 196
count_one 26804
0.85
cost 1954134.3199999868
th 0.85
sumando k
---------------------------------
k: 1216
count_zero 196
count_one 26804
0.85
cost 1954134.3199999868
th 0.85
sumando k
---------------------------------
k: 1217
count_zero 196
count_one 26804
0.85
cost 1954134.3199999868
th 0.85
sumando k
---------------------------------
k: 1218
count_zero 196
count_one 26804
0.85
cost 1954134.3199999868
th 0.85
sumando k
---------------------------------
k: 1219
count_zero 196
count_one 26804
0.85
cost 1954134.3199999868
th 0.85
sumando k
---------------------------------
k: 1220
count_zero 196
count_one 26804
0.85
cost 1954134.3199999868
th 0.85
sumando k
---------------------------------
k: 1221
count_zero 196
count_one 26804
0.85
cost 1954134.3199999868
th 0.85
sumando k
---------------------------------
k: 1222
count_zero 196
count_one 26804
0.85
cost

In [117]:
df_collector['diff_auc'] = df_collector['train_auc'] - df_collector['test_auc']
df_collector[(df_collector["final_cost"] > 0)].sort_values(['final_cost'])

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
1207,82148,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,200,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1.95413e+06,1.95261e+06,0.829057,0.825046,1523.36,0.00401119
1222,81924,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,200,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1.95413e+06,1.94729e+06,0.829057,0.825046,6847.71,0.00401119
495,85982,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,125,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.82,1.96346e+06,1.95467e+06,0.82949,0.825923,8783.25,0.00356673
1220,81615,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,200,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1.95413e+06,1.93994e+06,0.829057,0.825046,14192.5,0.00401119
444,43195,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,125,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.8,1.93946e+06,1.91155e+06,0.82949,0.825923,27906.9,0.00356673
...,...,...,...,...,...,...,...,...,...,...,...,...
723,26541,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1.9787e+06,3265.52,0.829286,0.825548,1.97543e+06,0.00373749
721,25104,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1.9787e+06,3088.72,0.829286,0.825548,1.97561e+06,0.00373749
745,23110,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1.9787e+06,2843.38,0.829286,0.825548,1.97585e+06,0.00373749
705,22034,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1.9787e+06,2710.99,0.829286,0.825548,1.97599e+06,0.00373749


In [158]:
df_collector[(df_collector["final_cost"] > 0)].sort_values(['final_cost'], ascending=False).head(50)

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
741,20896,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1978700.0,2570.98,0.829286,0.825548,1976130.0,0.00373749
705,22034,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1978700.0,2710.99,0.829286,0.825548,1975990.0,0.00373749
745,23110,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1978700.0,2843.38,0.829286,0.825548,1975850.0,0.00373749
721,25104,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1978700.0,3088.72,0.829286,0.825548,1975610.0,0.00373749
723,26541,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1978700.0,3265.52,0.829286,0.825548,1975430.0,0.00373749
717,31336,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1978700.0,3855.48,0.829286,0.825548,1974840.0,0.00373749
722,33043,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1978700.0,4065.51,0.829286,0.825548,1974630.0,0.00373749
715,37349,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1978700.0,4595.3,0.829286,0.825548,1974100.0,0.00373749
748,40767,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1978700.0,5015.84,0.829286,0.825548,1973680.0,0.00373749
712,45676,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1978700.0,5619.83,0.829286,0.825548,1973080.0,0.00373749


In [126]:
df_analysis["hyperparams"] = df_analysis["hyperparams"].astype(object)

In [132]:
caca = df_analysis["hyperparams"].iloc[0]

In [137]:
df_analysis.head(5)

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
0,15193,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.7,1988075.1,1047961.0,0.826748,0.821231,940114.6,0.005517
1,16394,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.7,1988075.1,1130801.0,0.826748,0.821231,857273.8,0.005517
2,83183,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1987263.76,506155.7,0.829214,0.823661,1481108.0,0.005552
3,109646,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1987263.76,667178.9,0.829214,0.823661,1320085.0,0.005552
4,185450,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.85,1987263.76,1128435.0,0.829214,0.823661,858829.2,0.005552


In [141]:
import re

max_depth_list = []
for caca in list(df_analysis["hyperparams"]):
    m = re.search('max_depth=(.+?)', caca)
    if m:
        max_depth_list.append(int(m.group(1)))

In [142]:
df_analysis["max_depth"] = max_depth_list

In [154]:
df_analysis

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
0,15193,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,6,0.7,1988075.1,1047961.0,0.826748,0.821231,940114.6,0.005517
1,16394,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,6,0.7,1988075.1,1130801.0,0.826748,0.821231,857273.8,0.005517
2,83183,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,6,0.85,1987263.76,506155.7,0.829214,0.823661,1481108.0,0.005552
3,109646,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,6,0.85,1987263.76,667178.9,0.829214,0.823661,1320085.0,0.005552
4,185450,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,6,0.85,1987263.76,1128435.0,0.829214,0.823661,858829.2,0.005552
5,108128,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,6,0.85,1987263.76,657942.1,0.829214,0.823661,1329322.0,0.005552
6,25348,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,6,0.85,1987263.76,154238.7,0.829214,0.823661,1833025.0,0.005552
7,31000,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,6,0.85,1987263.76,188630.2,0.829214,0.823661,1798634.0,0.005552
8,164985,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,100,0.01,6,0.85,1987263.76,1003908.0,0.829214,0.823661,983355.6,0.005552
9,23406,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,120,0.01,6,0.85,1980368.94,757157.6,0.829289,0.823735,1223211.0,0.005554


In [23]:
df_analysis = df_collector[(df_collector["threshold"] ==0.2) &  (df_collector["final_cost"] > 0)]

In [30]:
df_analysis

Unnamed: 0,alpha,estimators,learning_rate,max_depth,threshold,count_zero,count_one,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
1,37676,50,0.01,4,0.2,15306,11694,1983300.38,0.0,0.826083,0.824467,1983300.38,0.001617
6,57030,50,0.01,4,0.2,15306,11694,1983300.38,0.0,0.826083,0.824467,1983300.38,0.001617
11,81283,50,0.01,4,0.2,15306,11694,1983300.38,0.0,0.826083,0.824467,1983300.38,0.001617
16,61622,50,0.01,4,0.2,15306,11694,1983300.38,0.0,0.826083,0.824467,1983300.38,0.001617
21,54746,50,0.01,4,0.2,15306,11694,1983300.38,0.0,0.826083,0.824467,1983300.38,0.001617
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5001,27348,200,0.01,4,0.2,14355,12645,1983300.38,0.0,0.828593,0.827221,1983300.38,0.001372
5006,143943,200,0.01,4,0.2,14355,12645,1983300.38,0.0,0.828593,0.827221,1983300.38,0.001372
5011,82172,200,0.01,4,0.2,14355,12645,1983300.38,0.0,0.828593,0.827221,1983300.38,0.001372
5016,145278,200,0.01,4,0.2,14355,12645,1983300.38,0.0,0.828593,0.827221,1983300.38,0.001372


In [None]:
df_analysis1 = pd.read_csv("")

In [31]:

import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
def plot_4d(df, col_cost):
    """ 
    Do 4d plot
    
    Args:
        - df (DataFrame Object): df to be represented.
        - col_cost (string): name of column that contains values in order to set colors
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter3d(x=df.estimators, y=df.alpha, z=df.max_depth, name = "Cost", 
        mode='markers',
        marker=dict(
        size=6,
        color=df[col_cost],  # set color to an array/list of desired values
        colorscale='RdBu',   # choose a colorscale
        opacity=0.8), hovertemplate="estimators: %{x}<br>alpha: %{y} <br>max_depth: %{z}<extra></extra>")
    ),
                 

    
    #data = [data1]
    #fig = dict(data=data)
    fig.update_layout(
                        showlegend=False,scene = dict(
                    xaxis_title='Estimators',
                    yaxis_title='Alpha',
                    zaxis_title='max_depth of Base Learner',
                        ))
    iplot(fig)

In [26]:
del df_analysis["hyperparams"]

In [27]:
df_analysis.sort_values("final_cost")

Unnamed: 0,alpha,estimators,learning_rate,max_depth,threshold,count_zero,count_one,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
4346,13241,150,0.01,6,0.2,17867,9133,1917878.44,1.733839e+06,0.826280,0.824900,1.840399e+05,0.001380
3356,17215,120,0.01,6,0.2,17762,9238,1947011.80,1.683698e+06,0.826337,0.825059,2.633136e+05,0.001278
2431,27776,100,0.01,6,0.2,17581,9419,1931177.98,1.628399e+06,0.826514,0.825068,3.027788e+05,0.001446
661,12771,50,0.01,8,0.2,22056,4944,1939660.18,1.575866e+06,0.823704,0.813038,3.637945e+05,0.010666
4266,11690,150,0.01,6,0.2,17867,9133,1917878.44,1.530743e+06,0.826280,0.824900,3.871351e+05,0.001380
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1016,104574,75,0.01,4,0.2,14681,12319,1983300.38,0.000000e+00,0.827757,0.826482,1.983300e+06,0.001275
1011,82538,75,0.01,4,0.2,14681,12319,1983300.38,0.000000e+00,0.827757,0.826482,1.983300e+06,0.001275
1006,51980,75,0.01,4,0.2,14681,12319,1983300.38,0.000000e+00,0.827757,0.826482,1.983300e+06,0.001275
1041,149556,75,0.01,4,0.2,14681,12319,1983300.38,0.000000e+00,0.827757,0.826482,1.983300e+06,0.001275


In [28]:
plot_4d(df_analysis[['alpha',  'estimators',  'max_depth', 'final_cost']], "final_cost")

In [32]:
plot_4d(df_analysis[['alpha',  'estimators',  'max_depth', 'diff_auc']], 'diff_auc')

In [65]:
ngb_clf = NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=6,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=None, splitter='best'),
             col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=150,
              natural_gradient=True, tol=0.0001,
              verbose=True, verbose_eval=0)
ngb_clf.fit(X_train_resampled, y_train_resampled, sample_weight= get_sample_weights(y_train, y_train_resampled))

NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=6,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=None, splitter='best'),
              Dist=<class 'ngboost.distns.categorical.k_categorical.<locals>.Categorical'>,
              Score=<class 'ngboost.scores.LogScore'>, col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=150,
              natural_gradient=True,
              random_state=RandomState(MT1

In [66]:
df_analysis.sort_values("final_cost").head(20)

Unnamed: 0,alpha,estimators,learning_rate,max_depth,threshold,count_zero,count_one,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
4346,13241,150,0.01,6,0.2,17867,9133,1917878.44,1733839.0,0.82628,0.8249,184039.9,0.00138
3356,17215,120,0.01,6,0.2,17762,9238,1947011.8,1683698.0,0.826337,0.825059,263313.6,0.001278
2431,27776,100,0.01,6,0.2,17581,9419,1931177.98,1628399.0,0.826514,0.825068,302778.8,0.001446
661,12771,50,0.01,8,0.2,22056,4944,1939660.18,1575866.0,0.823704,0.813038,363794.5,0.010666
4266,11690,150,0.01,6,0.2,17867,9133,1917878.44,1530743.0,0.82628,0.8249,387135.1,0.00138
2361,26021,100,0.01,6,0.2,17581,9419,1931177.98,1525510.0,0.826514,0.825068,405667.6,0.001446
3406,14789,120,0.01,6,0.2,17762,9238,1947011.8,1446425.0,0.826337,0.825059,500586.4,0.001278
716,11213,50,0.01,8,0.2,22056,4944,1939660.18,1383618.0,0.823704,0.813038,556042.5,0.010666
631,10146,50,0.01,8,0.2,22056,4944,1939660.18,1251956.0,0.823704,0.813038,687703.9,0.010666
2261,20784,100,0.01,6,0.2,17581,9419,1931177.98,1218485.0,0.826514,0.825068,712692.6,0.001446


In [74]:
df_metric = pd.DataFrame(columns=  ["alpha", "threshold", 
                                       "cost",  "learning_cost"])
k = 0
for threshold in list(np.arange(0.2, 0.35, 0.05)):
    df_aux = pd.DataFrame(X_test, columns=cols_with_missing_indicators)
    df_aux['predicted'] = generate_y_pred_with_custom_threshold(ngb_clf, X_test, threshold)
    df_aux['proba_predicted'] = [proba[1] for proba in ngb_clf.predict_proba(X_test)]
    df_aux['real'] = list(y_test)
    df_aux['LoanPrincipal'] = df_aux.MonthlyIncome * 2
    for kix in range(0,1000):
        alpha = random.randint(10000, 30000)
        #alpha = random.randint(10000, 700000)
        df_collector.ix[str(k),'alpha'] = alpha
        df_collector.ix[str(k),'threshold'] = threshold
        print("k: "+str(k))
        print("alpha: "+str(alpha))
        print(threshold)
        print("cost "+str(cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)))
        print("learning cost "+str(calculate_learning_cost(df_aux.predicted,df_aux.proba_predicted, alpha)))
        print("th "+str(threshold))
        df_metric.ix[str(k),'cost'] = cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)
        df_metric.ix[str(k),'learning_cost'] = calculate_learning_cost(df_aux.predicted,df_aux.proba_predicted, alpha)
        k+=1
        print("sumando k")
        print("---------------------------------")

df_metric['final_cost'] = df_metric['cost'] - df_metric['learning_cost']
df_metric["%_inversion"] = (df_metric['final_cost']- df_metric['cost'])/(df_metric['cost'])

count_zero 937
count_one 26063
k: 0
alpha: 20536
0.2
cost 1947289.1999999927
learning cost 2770211.8750962713
th 0.2
sumando k
---------------------------------
k: 1
alpha: 13235
0.2
cost 1947289.1999999927
learning cost 1785340.5807800523
th 0.2
sumando k
---------------------------------
k: 2
alpha: 11544
0.2
cost 1947289.1999999927
learning cost 1557232.4642633079
th 0.2


KeyboardInterrupt: 

In [11]:
base0 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=4)
base1 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=6)
base2 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=8)
base3 = DecisionTreeRegressor(criterion="friedman_mse", max_depth=12)

In [36]:
from utils import check_counts

In [42]:
#FIX MEEEEEEEEEEE: REFACTOR PLZZ !!!
df_collector = pd.DataFrame(columns=  ["alpha","hyperparams", "estimators", 
                                       "learning_rate", "max_depth", "threshold", "count_zero", "count_one",
                                       "cost", 
                                       "learning_cost",
                                       "train_auc", "test_auc"])
df_collector['hyperparams'] = df_collector['hyperparams'].astype('object')
k = 0
for estimator in [200,250,300]:
    for lr in [0.01]:
        for baset in [base0,base1,base2,base3]:
            ngb_clf = NGBClassifier(Dist=Bernoulli,
                                      verbose=True, Base=baset, n_estimators = estimator, 
                                    learning_rate = lr, verbose_eval = 0
                                     )
            print(ngb_clf)
            ngb_clf.fit(X_train_resampled, y_train_resampled, sample_weight= get_sample_weights(y_train, y_train_resampled))
            index_th = 0
            #en el primer iteraba el threashold desde 0.1 a 0.9 obviamente cuanto mas menor me daba peor.. entonces itero
            #solamente d esde 0.70 a 0.95
            for kix in range(0,2000):
                alpha = random.randint(10000, 80000)
                cost = cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)
                learning_cost = calculate_learning_cost(df_aux.predicted,df_aux.proba_predicted, alpha)
                if cost-learning_cost  > 0:
                    for threshold in list(np.arange(0.15, 0.4, 0.05)):
                        count_zero, count_one = check_counts(ngb_clf, X_test, threshold)
                        if (count_zero > 7000):
                            #alpha = random.randint(10000, 700000)
                            df_collector.ix[str(k),'alpha'] = alpha
                            df_collector.ix[str(k),'hyperparams'] = ngb_clf
                            df_collector.ix[str(k),'estimators'] = estimator
                            df_collector.ix[str(k),'learning_rate'] = lr
                            df_collector.ix[str(k),'max_depth'] = baset
                            threshold = round(threshold,2)
                            print("k: "+str(k))
                            df_aux = pd.DataFrame(X_test, columns=cols_with_missing_indicators)
                            df_aux['predicted'] = generate_y_pred_with_custom_threshold(ngb_clf, X_test, threshold)
                            df_aux['proba_predicted'] = [proba[1] for proba in ngb_clf.predict_proba(X_test)]
                            df_aux['real'] = list(y_test)
                            df_aux['LoanPrincipal'] = df_aux.MonthlyIncome * 2
                            print(threshold)
                            print("cost "+str(cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)))
                            print("th "+str(threshold))
                            df_collector.ix[str(k),'threshold'] = threshold
                            df_collector.ix[str(k),'count_zero'] = count_zero
                            df_collector.ix[str(k),'count_one'] = count_one
                            df_collector.ix[str(k),'cost'] = cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real) #predicted tomo threshold (p(x|y=0))
                            df_collector.ix[str(k),'learning_cost'] = calculate_learning_cost(df_aux.predicted,df_aux.proba_predicted, alpha) #predicted tomo threshold (p(x|y=0)), probas predichas por el modelo para y = 1 y alpha
                            # predict probabilities
                            train_probs = ngb_clf.predict_proba(X_train)
                            test_probs = ngb_clf.predict_proba(X_test)
                            # keep probabilities for the positive outcome only
                            train_probs = train_probs[:, 1]
                            test_probs = test_probs[:, 1]
                            # calculate scores
                            train_auc = roc_auc_score(y_train, train_probs)
                            test_auc = roc_auc_score(y_test, test_probs)
                            df_collector.ix[str(k),'train_auc'] = train_auc
                            df_collector.ix[str(k),'test_auc'] = test_auc
                            k+=1
                            print("sumando k")
                            print("---------------------------------")
            del ngb_clf
df_collector['final_cost'] = df_collector['cost'] - df_collector['learning_cost']

NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=4,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=None, splitter='best'),
              Dist=<class 'ngboost.distns.categorical.k_categorical.<locals>.Categorical'>,
              Score=<class 'ngboost.scores.LogScore'>, col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=200,
              natural_gradient=True,
              random_state=RandomState(MT1

KeyboardInterrupt: 

In [43]:
df_collector

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,count_zero,count_one,cost,learning_cost,train_auc,test_auc


In [32]:
df_collector['diff_auc'] = df_collector['train_auc'] - df_collector['test_auc']


In [34]:
df_collector[(df_collector["final_cost"] > 0) & (df_collector["learning_cost"] > 0)].sort_values("final_cost")

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,count_zero,count_one,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
260,46401,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,200,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.15,20999,6001,1953620.0,1950810.0,0.830175,0.827612,2809.41,0.00256258
2285,28494,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,300,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.15,21011,5989,1970440.0,1954450.0,0.829978,0.827084,15988.8,0.00289348
2445,26021,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,300,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.15,21011,5989,1970440.0,1784830.0,0.829978,0.827084,185616.0,0.00289348
491,10376,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,200,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.2,18226,8774,1917250.0,1719680.0,0.830175,0.827612,197568.0,0.00256258
350,37829,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,200,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.15,20999,6001,1953620.0,1590420.0,0.830175,0.827612,363197.0,0.00256258
1480,24584,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,250,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.15,20987,6013,1956980.0,1561720.0,0.830045,0.827278,395256.0,0.00276701
1265,23640,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,250,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.15,20987,6013,1956980.0,1501750.0,0.830045,0.827278,455224.0,0.00276701
315,34191,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,200,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.15,20999,6001,1953620.0,1437470.0,0.830175,0.827612,516147.0,0.00256258
1410,22494,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,250,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.15,20987,6013,1956980.0,1428950.0,0.830045,0.827278,528025.0,0.00276701
2345,20784,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,300,0.01,"DecisionTreeRegressor(ccp_alpha=0.0, criterion...",0.15,21011,5989,1970440.0,1425610.0,0.829978,0.827084,544831.0,0.00289348


In [None]:
ngb_clf = 

In [35]:
df_test = pd.read_csv("results_2exp_1rstpart.csv")

In [36]:
df_test.rename(columns={'count_zero':'count_one_', 'count_one':'count_zero_'}, inplace=True)

In [37]:
df_test[(df_test["final_cost"] > 0)].sort_values(["final_cost", "diff_auc"]).head(20)

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,count_one_,count_zero_,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
4260,83336,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,6,0.15,21045,5955,1960561.36,1942076.0,0.82628,0.8249,18485.543026,0.00138
4255,82833,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,6,0.15,21045,5955,1960561.36,1930354.0,0.82628,0.8249,30207.537553,0.00138
1497,16650,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,75,0.01,6,0.25,14925,12075,1915569.22,1852691.0,0.824784,0.822259,62878.152299,0.002525
448,27849,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,6,0.3,13520,13480,1967078.18,1885824.0,0.822885,0.820535,81254.503628,0.002349
4460,80217,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,6,0.15,21045,5955,1960561.36,1869390.0,0.82628,0.8249,91171.230762,0.00138
4275,79155,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,6,0.15,21045,5955,1960561.36,1844641.0,0.82628,0.8249,115920.253005,0.00138
3295,179570,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,120,0.01,6,0.15,21046,5954,1960961.02,1827497.0,0.826337,0.825059,133464.230286,0.001278
4360,77963,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,6,0.15,21045,5955,1960561.36,1816863.0,0.82628,0.8249,143698.816577,0.00138
4346,13241,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,6,0.2,17867,9133,1917878.44,1733839.0,0.82628,0.8249,184039.864054,0.00138
4485,75814,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,150,0.01,6,0.15,21045,5955,1960561.36,1766782.0,0.82628,0.8249,193779.465164,0.00138


In [49]:
ngb_clf = NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=6,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=None, splitter='best'),
             col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=150,
              natural_gradient=True, tol=0.0001,
              verbose=True, verbose_eval=0)
ngb_clf.fit(X_train_resampled, y_train_resampled, sample_weight= get_sample_weights(y_train, y_train_resampled))

NGBClassifier(Base=DecisionTreeRegressor(ccp_alpha=0.0,
                                         criterion='friedman_mse', max_depth=6,
                                         max_features=None, max_leaf_nodes=None,
                                         min_impurity_decrease=0.0,
                                         min_impurity_split=None,
                                         min_samples_leaf=1,
                                         min_samples_split=2,
                                         min_weight_fraction_leaf=0.0,
                                         presort='deprecated',
                                         random_state=None, splitter='best'),
              Dist=<class 'ngboost.distns.categorical.k_categorical.<locals>.Categorical'>,
              Score=<class 'ngboost.scores.LogScore'>, col_sample=1.0,
              learning_rate=0.01, minibatch_frac=1.0, n_estimators=150,
              natural_gradient=True,
              random_state=RandomState(MT1

In [51]:
df_aux = pd.DataFrame(X_test, columns=cols_with_missing_indicators)
df_aux['predicted'] = generate_y_pred_with_custom_threshold(ngb_clf, X_test, 0.2)
df_aux['real'] = list(y_test)
df_aux['LoanPrincipal'] = df_aux.MonthlyIncome * 2
#cost_score(df_aux.LoanPrincipal, df_aux.predicted, df_aux.real)

count_zero 937
count_one 26063


In [52]:
count_zero, count_one = check_counts(ngb_clf, X_test, 0.2)

In [53]:
count_zero

18148

In [54]:
count_one

8852

In [4]:
df_anal1 = pd.read_csv("metrics_2nd_exp.csv")

In [5]:
df_anal2 = pd.read_csv("metrics_2exp_2nd_part.csv")

In [7]:
df_anal1

Unnamed: 0,alpha,threshold,cost,learning_cost,final_cost,%_inversion
0,57739,0.15,1964824.44,4.218270e+05,1.542997e+06,0.214689
1,28496,0.15,1964824.44,2.081848e+05,1.756640e+06,0.105956
2,27779,0.15,1964824.44,2.029466e+05,1.761878e+06,0.103290
3,47810,0.15,1964824.44,3.492882e+05,1.615536e+06,0.177771
4,12678,0.15,1964824.44,9.262237e+04,1.872202e+06,0.047140
...,...,...,...,...,...,...
1039,10927,0.20,1947289.20,1.474002e+06,4.732872e+05,0.756951
1040,11336,0.20,1947289.20,1.529174e+06,4.181150e+05,0.785284
1041,10196,0.20,1947289.20,1.375393e+06,5.718957e+05,0.706312
1042,12303,0.20,1947289.20,1.659618e+06,2.876711e+05,0.852271


In [8]:
df_anal2

Unnamed: 0,alpha,threshold,cost,learning_cost,final_cost,%_inversion
0,4573,0.2,1947289.2,6.168767e+05,1.330413e+06,-0.316787
1,3462,0.2,1947289.2,4.670079e+05,1.480281e+06,-0.239825
2,9661,0.2,1947289.2,1.303224e+06,6.440648e+05,-0.669251
3,6438,0.2,1947289.2,8.684566e+05,1.078833e+06,-0.445982
4,8925,0.2,1947289.2,1.203941e+06,7.433478e+05,-0.618265
...,...,...,...,...,...,...
995,4246,0.2,1947289.2,5.727659e+05,1.374523e+06,-0.294135
996,6145,0.2,1947289.2,8.289322e+05,1.118357e+06,-0.425685
997,4855,0.2,1947289.2,6.549172e+05,1.292372e+06,-0.336322
998,6619,0.2,1947289.2,8.928726e+05,1.054417e+06,-0.458521


In [6]:
df1 = pd.read_csv("results_2exp_1rstpart.csv")

In [9]:
df1

Unnamed: 0,alpha,hyperparams,estimators,learning_rate,max_depth,threshold,count_zero,count_one,cost,learning_cost,train_auc,test_auc,final_cost,diff_auc
0,37676,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,4,0.15,24158,2842,1983300.38,0.0,0.826083,0.824467,1983300.38,0.001617
1,37676,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,4,0.20,15306,11694,1983300.38,0.0,0.826083,0.824467,1983300.38,0.001617
2,37676,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,4,0.25,13638,13362,1983300.38,0.0,0.826083,0.824467,1983300.38,0.001617
3,37676,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,4,0.30,12464,14536,1983300.38,0.0,0.826083,0.824467,1983300.38,0.001617
4,37676,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,50,0.01,4,0.35,11273,15727,1983300.38,0.0,0.826083,0.824467,1983300.38,0.001617
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5019,145278,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,200,0.01,4,0.35,9985,17015,1983300.38,0.0,0.828593,0.827221,1983300.38,0.001372
5020,79390,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,200,0.01,4,0.15,16945,10055,1983300.38,0.0,0.828593,0.827221,1983300.38,0.001372
5021,79390,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,200,0.01,4,0.20,14355,12645,1983300.38,0.0,0.828593,0.827221,1983300.38,0.001372
5022,79390,NGBClassifier(Base=DecisionTreeRegressor(ccp_a...,200,0.01,4,0.25,12872,14128,1983300.38,0.0,0.828593,0.827221,1983300.38,0.001372
