# Prerequisite

Following packages must be installed to run codes below

```bash
$ pip3 install openpyxl 
```


In [1]:
%load_ext autoreload
%autoreload 2

# Add project path

In [2]:
import os
import sys
from pathlib import Path

PROJECT_PATH = Path().resolve().parent.parent
SRC_PATH = PROJECT_PATH / 'src'

sys.path.append(str(SRC_PATH))

In [3]:
exp_name = 'feature_selection_v2'

# Load data

In [4]:
from data_loader import SectorDataLoader

data_loader = SectorDataLoader()
data_finance = (
    data_loader
    .load_dataset()
    .pipe(data_loader.add_beaver_indicator)
    .pipe(data_loader.add_label)
)
data_finance.head()

Unnamed: 0,BIZ_NO,CMP_PFIX_NM,CMP_NM,CMP_NM1,CMP_SFIX_NM,CMP_ENM,BZ_TYP,CMP_TYP,CMP_SCL,PBCO_GB,...,STAT_OCR_DATE_y,Closed_Year,유동자산/부채총계,당기순이익(손실)/자산총계,부채총계/자산총계,순운전자본/자산총계,유동부채/유동자산,Years_From_Closed_Year_To_FS,Closed_In_1Yr,Closed_In_2Yrs
117,1018135422,(주),한국선박기술,,,Korea Marine Time Service,M,,2,2,...,20200930.0,2020.0,1.031451,0.038773,0.578094,0.543045,0.089273,2.0,0,1
219,1018154206,(주),드림미즈,,,"dreammiz Co., Ltd.",M,,2,2,...,20201231.0,2020.0,1.22004,0.001177,0.662356,0.608449,0.247063,2.0,0,1
279,1018163684,,디에프에스서울,,(주),DFS Seoul Ltd.,M,,2,2,...,20190917.0,2019.0,11.012914,0.046666,0.090794,0.909114,0.090802,1.0,1,1
339,1018178760,(주),대교디앤씨,,,"DAEGYO D & C CO.,LTD.",M,,2,2,...,20191010.0,2019.0,0.916364,-0.026128,1.091269,0.097514,0.902486,1.0,1,1
406,1018194173,,에코에너지,,(주),"Eco Energy Co.,Ltd.",M,,2,2,...,20200831.0,2020.0,1.183355,-0.062095,0.828142,0.194625,0.8014,2.0,0,1


In [5]:
from data_loader import IndexDataLoader
import pandas as pd

index_features = [
    '3YEAR TREASURY',
    '5YEAR TREASURY',
    '10YEAR TREASURY',
    '3YEAR COPORATE BOND',
    'CD 91DAYS',
    'CALL RATE 1DAY',
    'BASE RATE',
    'YEAR_MONTH',
    'DOW_JONES_COMMODITY_INDEX_PRICE',
    'DOW_JONES_COMMODITY_INDEX_CHANGE_RATE',
    'EXCHANGE_RATE_PRICE',
    'EXCHANGE_RATE_CHANGE_RATE',
    'NATIONWIDE',
    'CAPITAL AREA',
    'NON CAPITAL AREA',
    'CSI',
    'CCI',
    'CLI',
]

index_data_loader = IndexDataLoader()

index_data = (
    index_data_loader.load_interest_rate()
    .merge(
        (
            index_data_loader.load_dow_jones_commodity_index()
            .rename(columns={'PRICE':'DOW_JONES_COMMODITY_INDEX_PRICE',
                            'CHANGE RATE':'DOW_JONES_COMMODITY_INDEX_CHANGE_RATE'})
        ),
        how='outer',
        left_on='YEAR_MONTH',
        right_on='YEAR_MONTH'
    )
    .merge(
        (
            index_data_loader.load_exchange_rate()
            .rename(columns={'PRICE':'EXCHANGE_RATE_PRICE',
                            'CHANGE RATE':'EXCHANGE_RATE_CHANGE_RATE'})
        ),
        how='outer',
        left_on='YEAR_MONTH',
        right_on='YEAR_MONTH'
    )
    .merge(
        (
            index_data_loader.load_apartment_housing_index()
        ),
        how='outer',
        left_on='YEAR_MONTH',
        right_on='YEAR_MONTH'
        
    )
    .merge(
        (
            index_data_loader.load_csi()
        ),
        how='outer',
        left_on='YEAR_MONTH',
        right_on='YEAR_MONTH'
    )
    .merge(
        (
            index_data_loader.load_composite_index()
            .rename(columns={'PRICE':'DOW_JONES_COMMODITY_INDEX_PRICE',
                            'CHANGE RATE':'DOW_JONES_COMMODITY_INDEX_CHANGE_RATE'})
        ),
        how='outer',
        left_on='YEAR_MONTH',
        right_on='YEAR_MONTH'
    )
    .assign(year=lambda x : x['YEAR_MONTH'].dt.year)
    .assign(month=lambda x : x['YEAR_MONTH'].dt.month)
    .groupby(['year'])
    [index_features].mean()
    .reset_index(drop=False)
)

index_data

Unnamed: 0,year,3YEAR TREASURY,5YEAR TREASURY,10YEAR TREASURY,3YEAR COPORATE BOND,CD 91DAYS,CALL RATE 1DAY,BASE RATE,DOW_JONES_COMMODITY_INDEX_PRICE,EXCHANGE_RATE_PRICE,NATIONWIDE,CAPITAL AREA,NON CAPITAL AREA,CSI,CCI,CLI
0,2006,,,,,,,,,,63.841667,68.6,58.416667,,,
1,2007,,,,,,,,,,72.758333,82.441667,60.925,,,
2,2008,,,,,,,,,,76.775,89.15,62.991667,,,
3,2009,,,,,,,,,,76.058333,87.833333,64.2,,,
4,2010,,,,,,,,,,77.733333,86.833333,69.516667,,,
5,2011,,,,,,,,,,83.7,86.85,80.983333,,,
6,2012,,,,,,,,,,83.883333,81.891667,85.25,,,
7,2013,,,,,,,,,,84.325,80.333333,87.125,,,
8,2014,,,,,,,,,,88.008333,83.366667,91.325,,,
9,2015,,,,,,,,,,94.008333,89.175,97.658333,,,


In [6]:
data = (
    data_finance
    .assign(year=lambda x : x['결산년월'].astype(str).str[:4].astype(int))
    .merge(
        index_data,
        how='left',
        left_on='year',
        right_on='year'
    )
)
data

Unnamed: 0,BIZ_NO,CMP_PFIX_NM,CMP_NM,CMP_NM1,CMP_SFIX_NM,CMP_ENM,BZ_TYP,CMP_TYP,CMP_SCL,PBCO_GB,...,CALL RATE 1DAY,BASE RATE,DOW_JONES_COMMODITY_INDEX_PRICE,EXCHANGE_RATE_PRICE,NATIONWIDE,CAPITAL AREA,NON CAPITAL AREA,CSI,CCI,CLI
0,1018135422,(주),한국선박기술,,,Korea Marine Time Service,M,,2,2,...,1.521667,1.541667,625.940833,1098.985000,100.608333,104.325000,97.441667,104.083333,101.425000,100.058333
1,1018154206,(주),드림미즈,,,"dreammiz Co., Ltd.",M,,2,2,...,1.521667,1.541667,625.940833,1098.985000,100.608333,104.325000,97.441667,104.083333,101.425000,100.058333
2,1018163684,,디에프에스서울,,(주),DFS Seoul Ltd.,M,,2,2,...,1.521667,1.541667,625.940833,1098.985000,100.608333,104.325000,97.441667,104.083333,101.425000,100.058333
3,1018178760,(주),대교디앤씨,,,"DAEGYO D & C CO.,LTD.",M,,2,2,...,1.521667,1.541667,625.940833,1098.985000,100.608333,104.325000,97.441667,104.083333,101.425000,100.058333
4,1018194173,,에코에너지,,(주),"Eco Energy Co.,Ltd.",M,,2,2,...,1.521667,1.541667,625.940833,1098.985000,100.608333,104.325000,97.441667,104.083333,101.425000,100.058333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10516,8998700283,(주),덕성건설,,,Deokseong Construction,M,,2,2,...,1.521667,1.541667,625.940833,1098.985000,100.608333,104.325000,97.441667,104.083333,101.425000,100.058333
10517,8998700283,(주),덕성건설,,,Deokseong Construction,M,,2,2,...,1.591667,1.562500,614.237500,1164.787500,100.108333,107.183333,94.075000,99.083333,100.258333,98.475000
10518,8998701263,,하나머스트제6호기업인수목적,,(주),HANA MUST SIXTH SPECIAL PURPOSE ACQUISITION CO...,M,,2,2,...,1.521667,1.541667,625.940833,1098.985000,100.608333,104.325000,97.441667,104.083333,101.425000,100.058333
10519,8998701263,,하나머스트제6호기업인수목적,,(주),HANA MUST SIXTH SPECIAL PURPOSE ACQUISITION CO...,M,,2,2,...,1.591667,1.562500,614.237500,1164.787500,100.108333,107.183333,94.075000,99.083333,100.258333,98.475000


# Baseline classifier using Beaver's features

In [7]:
from project_paths import DATA_PATH

sectors = [
    '제조업(10~34)',
    '부동산업(68)',
    '도매 및 소매업(45~47)',
    '숙박 및 음식점업(55~56)',
    '건설업(41~42)'
]

beaver_features = [
    '유동자산/부채총계',
    '당기순이익(손실)/자산총계',
    '부채총계/자산총계',
    '순운전자본/자산총계',
    '유동부채/유동자산'
]

extended_financial_features = [
    '유동자산',
    '매출채권',
    '비유동자산',
    '유형자산',
    '자산총계',
    '유동부채',
    '비유동부채',
    '부  채  총  계',
    '자본금',
    '이익잉여금(결손금）',
    '자본총계',
    '매출액',
    '판매비와관리비',
    '영업이익（손실）',
    '법인세비용차감전순손익',
    '법인세비용',
    '당기순이익(손실)',
    '기업순이익률(%)',
    '유보액/총자산(%)',
    '유보액/납입자본(%)',
    '매출액총이익률(%)',
    '매출액영업이익률(%)',
    '매출액순이익률(%)',
    '수지비율(%)',
    '경상수지비율',
    '영업비율(%)',
    '금융비용대매출액비율(%',
    '금융비용대부채비율(%)',
    '금융비용대총비용비율(%',
    '부채비율(%)',
    '차입금의존도(%)',
    '자기자본비율(%)',
    '순운전자본비율(%)',
    '유동부채비율(%)',
    '비유동부채비율(%)',
    '부채총계대 매출액(%)',
    '총자본회전율(회)',
    '재고자산회전율(회)',
    '매출채권회전율(회)',
    '매입채무회전율(회)',
    '미수금',
    '매출원가',
    '무형자산',
    '재고자산',
]

years_to_close = [
    'Closed_In_1Yr',
    'Closed_In_2Yrs',
]

x_data_year = list(range(2018, 2020))

EXP_RESULT_PATH = DATA_PATH / 'experiment_result' / exp_name

if not os.path.exists(EXP_RESULT_PATH) : 
    os.mkdir(EXP_RESULT_PATH)

In [8]:
import numpy as np
import pandas as pd
from pycaret.classification import ClassificationExperiment
from tqdm import tqdm

In [9]:
exp_result = None

for target_years_to_close in tqdm(years_to_close) : 
    for x_data_target_year in tqdm(x_data_year, leave=True) : 
        for sector in sectors : 

            filename = EXP_RESULT_PATH / f'Beaver_baseline_{sector}_{target_years_to_close}_{x_data_target_year}.csv'

            if not os.path.exists(filename) : 

                data_to_train = (
                    data
                    .loc[~data[beaver_features[0]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[~data[beaver_features[1]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[~data[beaver_features[2]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[~data[beaver_features[3]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[~data[beaver_features[4]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[lambda x : pd.to_datetime(x['결산년월'], format='%Y%m%d').dt.year==x_data_target_year]
                    .loc[lambda x : x['대분류']==sector]
                )

                exp = ClassificationExperiment()

                exp.setup(
                    (
                        data_to_train
                        .loc[:, beaver_features
                             +extended_financial_features
                             +index_data.drop(['year'], axis=1).columns.tolist()
                             +[target_years_to_close]]
                        .reset_index(drop=True)
                    ),
                    target=target_years_to_close,
                    feature_selection=True,
                    #remove_multicollinearity = True,
                )

                models = exp.compare_models(sort='AUC', n_select=3)
                tuned_models = [exp.tune_model(x) for x in models]
                
                #if target_years_to_close == 'Closed_In_1Yr' :
                blended_model = exp.blend_models(estimator_list=tuned_models, fold=5, method = 'soft')
                result = (
                    exp.pull()
                    .assign(target=target_years_to_close)
                    .assign(x_data_yaer=x_data_target_year)
                    .assign(대분류=sector)
                    .reset_index(drop=False)
                    .rename(columns={'index':'model_name'})
                )
                result['feature_selection'] = str(exp.dataset_transformed.columns.tolist())
                result.to_csv(filename)

            else : 
                result = pd.read_csv(filename)

            if exp_result is None : 
                exp_result = result
            else : 
                exp_result = pd.concat([exp_result, result], axis=0)

  0%|                                                                                    | 0/2 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 55.90it/s][A

  0%|                                                                                    | 0/2 [00:00<?, ?it/s][A

Unnamed: 0,Description,Value
0,Session id,8903
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(1800, 65)"
4,Transformed data shape,"(1800, 11)"
5,Transformed train set shape,"(1259, 11)"
6,Transformed test set shape,"(541, 11)"
7,Numeric features,64
8,Rows with missing values,84.8%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.5799,0.6005,0.4991,0.5391,0.5171,0.1467,0.1474,0.031
et,Extra Trees Classifier,0.5782,0.5981,0.4238,0.5462,0.4769,0.1328,0.1361,0.08
rf,Random Forest Classifier,0.587,0.5948,0.4081,0.5639,0.4729,0.1472,0.153,0.096
gbc,Gradient Boosting Classifier,0.5758,0.5894,0.4062,0.5436,0.4635,0.1254,0.1294,0.067
ada,Ada Boost Classifier,0.5616,0.5714,0.4201,0.5203,0.4637,0.1005,0.1023,0.039
dt,Decision Tree Classifier,0.5552,0.5607,0.5445,0.511,0.5266,0.1082,0.1084,0.013
knn,K Neighbors Classifier,0.5393,0.5402,0.4257,0.4904,0.4547,0.0599,0.0605,0.023
lr,Logistic Regression,0.4877,0.5168,0.6707,0.4563,0.5415,0.0066,0.0017,0.334
lda,Linear Discriminant Analysis,0.5322,0.5151,0.0753,0.43,0.1272,-0.0143,-0.0199,0.013
nb,Naive Bayes,0.5449,0.5107,0.0753,0.517,0.1298,0.0108,0.026,0.013


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.619,0.5852,0.4828,0.6087,0.5385,0.2214,0.2257
1,0.5873,0.5947,0.4211,0.5581,0.48,0.1489,0.1529
2,0.5873,0.5299,0.386,0.5641,0.4583,0.1435,0.1503
3,0.6111,0.6641,0.4386,0.5952,0.5051,0.1967,0.203
4,0.5794,0.6112,0.5088,0.537,0.5225,0.1471,0.1473
5,0.5556,0.5795,0.4035,0.5111,0.451,0.0862,0.0879
6,0.5794,0.584,0.4561,0.5417,0.4952,0.1392,0.1407
7,0.6508,0.5978,0.4386,0.6757,0.5319,0.273,0.2893
8,0.5476,0.5327,0.4035,0.5,0.4466,0.0714,0.0725
9,0.568,0.5789,0.3684,0.5385,0.4375,0.1064,0.1115


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5714,0.59,0.5172,0.5357,0.5263,0.1352,0.1353
1,0.4762,0.4427,0.3684,0.4118,0.3889,-0.067,-0.0673
2,0.4921,0.5154,0.4211,0.4364,0.4286,-0.0283,-0.0283
3,0.5317,0.5261,0.4737,0.4821,0.4779,0.0535,0.0535
4,0.5238,0.5517,0.4211,0.4706,0.4444,0.03,0.0302
5,0.5397,0.5708,0.3509,0.4878,0.4082,0.0477,0.0494
6,0.6429,0.6438,0.5965,0.6071,0.6018,0.2781,0.2781
7,0.5476,0.5464,0.4035,0.5,0.4466,0.0714,0.0725
8,0.5714,0.5685,0.386,0.5366,0.449,0.1134,0.1175
9,0.616,0.6081,0.5088,0.5918,0.5472,0.2171,0.219


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5873,0.6055,0.4828,0.56,0.5185,0.1609,0.1622
1,0.5952,0.6173,0.4737,0.5625,0.5143,0.1717,0.1736
2,0.5635,0.5787,0.4737,0.5192,0.4954,0.1122,0.1126
3,0.627,0.6506,0.4912,0.6087,0.5437,0.2343,0.2381
4,0.5873,0.659,0.5088,0.5472,0.5273,0.1619,0.1623
5,0.5476,0.5784,0.4386,0.5,0.4673,0.0771,0.0776
6,0.6349,0.6354,0.5439,0.6078,0.5741,0.2564,0.2576
7,0.5952,0.598,0.4561,0.5652,0.5049,0.1691,0.1719
8,0.5635,0.5423,0.4386,0.5208,0.4762,0.1067,0.1079
9,0.6,0.6244,0.4561,0.5778,0.5098,0.1798,0.1834


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5516,0.5375,0.4087,0.5109,0.4541,0.0815,0.083
1,0.5873,0.5882,0.4298,0.5568,0.4851,0.1502,0.1537
2,0.5556,0.5999,0.4561,0.5098,0.4815,0.0947,0.0951
3,0.5952,0.5883,0.3947,0.5769,0.4688,0.16,0.1675
4,0.5618,0.551,0.3684,0.525,0.433,0.0934,0.0973
Mean,0.5703,0.573,0.4116,0.5359,0.4645,0.116,0.1193
Std,0.0176,0.0242,0.0299,0.0266,0.0192,0.0324,0.0343




Unnamed: 0,Description,Value
0,Session id,5602
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(323, 65)"
4,Transformed data shape,"(323, 11)"
5,Transformed train set shape,"(226, 11)"
6,Transformed test set shape,"(97, 11)"
7,Numeric features,64
8,Rows with missing values,98.5%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.5966,0.6601,0.6159,0.6136,0.6044,0.1914,0.1997,0.057
et,Extra Trees Classifier,0.598,0.6474,0.6091,0.623,0.6088,0.1932,0.1977,0.043
lightgbm,Light Gradient Boosting Machine,0.5733,0.6392,0.5985,0.6103,0.5938,0.1412,0.1462,0.01
knn,K Neighbors Classifier,0.6158,0.6295,0.653,0.6385,0.6385,0.2279,0.2338,0.016
ada,Ada Boost Classifier,0.5966,0.6242,0.6159,0.6093,0.601,0.1912,0.2039,0.018
dt,Decision Tree Classifier,0.6192,0.6202,0.6091,0.6477,0.6249,0.2401,0.2432,0.008
gbc,Gradient Boosting Classifier,0.5915,0.6177,0.5985,0.6154,0.598,0.1802,0.1866,0.019
lr,Logistic Regression,0.5713,0.579,0.8295,0.5583,0.6638,0.1222,0.1594,0.094
nb,Naive Bayes,0.5358,0.5789,0.8144,0.5403,0.6414,0.0527,0.0671,0.008
qda,Quadratic Discriminant Analysis,0.5401,0.5786,0.8227,0.5434,0.6418,0.0622,0.0791,0.008


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6957,0.7803,0.75,0.6923,0.72,0.3878,0.3893
1,0.7391,0.7576,0.6667,0.8,0.7273,0.4812,0.4886
2,0.5217,0.5606,0.5,0.5455,0.5217,0.0453,0.0455
3,0.4783,0.5076,0.3333,0.5,0.4,-0.0299,-0.0318
4,0.6957,0.6894,0.75,0.6923,0.72,0.3878,0.3893
5,0.6957,0.7727,0.9167,0.6471,0.7586,0.3784,0.4223
6,0.6364,0.6364,0.5455,0.6667,0.6,0.2727,0.2774
7,0.5,0.5496,0.5455,0.5,0.5217,0.0,0.0
8,0.5455,0.6333,0.5833,0.5833,0.5833,0.0833,0.0833
9,0.5455,0.7167,0.3333,0.6667,0.4444,0.127,0.1491


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5652,0.6136,0.9167,0.55,0.6875,0.1016,0.1461
1,0.5217,0.6061,0.8333,0.5263,0.6452,0.0156,0.02
2,0.5652,0.5985,0.5,0.6,0.5455,0.1353,0.1374
3,0.5217,0.4924,0.5833,0.5385,0.56,0.038,0.0382
4,0.6087,0.7045,0.75,0.6,0.6667,0.2069,0.2145
5,0.6087,0.75,1.0,0.5714,0.7273,0.1882,0.3223
6,0.4091,0.4545,0.5455,0.4286,0.48,-0.1818,-0.189
7,0.5909,0.7851,1.0,0.55,0.7097,0.1818,0.3162
8,0.6364,0.525,0.8333,0.625,0.7143,0.2414,0.2609
9,0.6364,0.7417,0.5833,0.7,0.6364,0.2787,0.2833


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6087,0.7727,0.75,0.6,0.6667,0.2069,0.2145
1,0.6087,0.6742,0.5833,0.6364,0.6087,0.2189,0.2197
2,0.5217,0.5682,0.75,0.5294,0.6207,0.0232,0.0259
3,0.6522,0.6136,0.5,0.75,0.6,0.3134,0.3337
4,0.6957,0.7576,0.75,0.6923,0.72,0.3878,0.3893
5,0.6087,0.7348,0.8333,0.5882,0.6897,0.2008,0.2241
6,0.4545,0.4298,0.5455,0.4615,0.5,-0.0909,-0.0925
7,0.5455,0.5372,0.7273,0.5333,0.6154,0.0909,0.0976
8,0.5,0.6167,0.6667,0.5333,0.5926,-0.0342,-0.0356
9,0.5909,0.6917,0.5833,0.6364,0.6087,0.1818,0.1826


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.587,0.6913,0.7083,0.5862,0.6415,0.1644,0.1686
1,0.5333,0.6581,0.6087,0.5385,0.5714,0.0634,0.064
2,0.6444,0.7095,0.6957,0.64,0.6667,0.2871,0.2883
3,0.5556,0.5833,0.5833,0.5833,0.5833,0.1071,0.1071
4,0.4667,0.5218,0.4583,0.5,0.4783,-0.0651,-0.0653
Mean,0.5574,0.6328,0.6109,0.5696,0.5882,0.1114,0.1125
Std,0.0588,0.0703,0.0902,0.0474,0.0654,0.1159,0.1167




Unnamed: 0,Description,Value
0,Session id,6383
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(1107, 65)"
4,Transformed data shape,"(1107, 11)"
5,Transformed train set shape,"(774, 11)"
6,Transformed test set shape,"(333, 11)"
7,Numeric features,64
8,Rows with missing values,83.2%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.606,0.6172,0.5005,0.5991,0.5438,0.2028,0.2058,0.066
rf,Random Forest Classifier,0.584,0.6059,0.4724,0.5772,0.5169,0.1575,0.1614,0.087
lightgbm,Light Gradient Boosting Machine,0.5441,0.5838,0.4999,0.5206,0.5089,0.0836,0.0839,0.028
lda,Linear Discriminant Analysis,0.5699,0.5825,0.252,0.6255,0.3565,0.1112,0.1406,0.012
nb,Naive Bayes,0.553,0.5787,0.1178,0.6642,0.1983,0.0643,0.1121,0.01
gbc,Gradient Boosting Classifier,0.5686,0.5777,0.4697,0.5515,0.5033,0.1272,0.1293,0.049
knn,K Neighbors Classifier,0.5594,0.5756,0.5113,0.5395,0.5226,0.1147,0.1154,0.019
qda,Quadratic Discriminant Analysis,0.5633,0.5666,0.1784,0.6032,0.2624,0.0908,0.1247,0.011
ada,Ada Boost Classifier,0.5453,0.5659,0.5029,0.523,0.5112,0.0867,0.0872,0.024
lr,Logistic Regression,0.5014,0.563,0.7959,0.4833,0.598,0.0329,0.0472,0.123


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5256,0.4881,0.2162,0.5,0.3019,0.0217,0.0261
1,0.6282,0.6836,0.2703,0.8333,0.4082,0.229,0.3065
2,0.5513,0.5438,0.2973,0.55,0.386,0.0796,0.089
3,0.5641,0.5689,0.2973,0.5789,0.3929,0.1047,0.1189
4,0.6623,0.6629,0.4722,0.7083,0.5667,0.3077,0.3248
5,0.7013,0.6992,0.4167,0.8824,0.566,0.3801,0.4425
6,0.6104,0.584,0.4167,0.625,0.5,0.2012,0.2124
7,0.5844,0.4776,0.3056,0.6111,0.4074,0.1391,0.1589
8,0.5325,0.5125,0.2162,0.5333,0.3077,0.0422,0.052
9,0.5455,0.4966,0.2432,0.5625,0.3396,0.0697,0.084


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6026,0.6289,0.5676,0.5833,0.5753,0.202,0.202
1,0.6026,0.5643,0.4865,0.6,0.5373,0.1956,0.1989
2,0.5128,0.5171,0.4865,0.4865,0.4865,0.0231,0.0231
3,0.6282,0.6763,0.5135,0.6333,0.5672,0.2475,0.2517
4,0.6753,0.6582,0.6111,0.6667,0.6377,0.3446,0.3456
5,0.6104,0.6911,0.5278,0.5938,0.5588,0.2121,0.2133
6,0.6234,0.6203,0.5556,0.6061,0.5797,0.2397,0.2404
7,0.5584,0.5803,0.4444,0.5333,0.4848,0.104,0.1054
8,0.5974,0.5885,0.4865,0.6,0.5373,0.1878,0.1911
9,0.5325,0.5098,0.5676,0.5122,0.5385,0.0673,0.0677


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5,0.5709,0.2162,0.4444,0.2909,-0.0284,-0.0328
1,0.6154,0.5768,0.4054,0.6522,0.5,0.2142,0.2303
2,0.5513,0.5508,0.2703,0.5556,0.3636,0.0771,0.0891
3,0.6154,0.5906,0.4054,0.6522,0.5,0.2142,0.2303
4,0.6104,0.6399,0.4722,0.6071,0.5312,0.2067,0.2115
5,0.6883,0.7351,0.4722,0.7727,0.5862,0.3588,0.3869
6,0.6104,0.6087,0.4167,0.625,0.5,0.2012,0.2124
7,0.5584,0.5024,0.25,0.5625,0.3462,0.082,0.0975
8,0.5584,0.5054,0.2973,0.5789,0.3929,0.0991,0.1128
9,0.5325,0.4834,0.2973,0.5238,0.3793,0.0481,0.0531


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5806,0.6123,0.4521,0.569,0.5038,0.1489,0.1518
1,0.5871,0.6048,0.4384,0.5818,0.5,0.16,0.1647
2,0.6065,0.6378,0.5205,0.5938,0.5547,0.2049,0.2063
3,0.5355,0.5816,0.4189,0.5167,0.4627,0.0614,0.0624
4,0.5455,0.5315,0.4521,0.5238,0.4853,0.0822,0.083
Mean,0.571,0.5936,0.4564,0.557,0.5013,0.1315,0.1336
Std,0.0265,0.0358,0.0343,0.0311,0.0304,0.0526,0.0533




Unnamed: 0,Description,Value
0,Session id,3053
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(205, 65)"
4,Transformed data shape,"(205, 11)"
5,Transformed train set shape,"(143, 11)"
6,Transformed test set shape,"(62, 11)"
7,Numeric features,64
8,Rows with missing values,67.3%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.6324,0.6806,0.7522,0.6977,0.7175,0.1762,0.2021,0.048
rf,Random Forest Classifier,0.6386,0.6572,0.7944,0.6817,0.7279,0.167,0.2016,0.07
lr,Logistic Regression,0.6438,0.6413,0.9011,0.6654,0.7564,0.0759,0.0982,0.074
dt,Decision Tree Classifier,0.6505,0.6299,0.7478,0.7179,0.7288,0.226,0.2253,0.009
lda,Linear Discriminant Analysis,0.6095,0.6289,0.8311,0.6578,0.7274,0.0435,0.0441,0.008
qda,Quadratic Discriminant Analysis,0.5819,0.6042,0.78,0.6061,0.6596,0.0075,0.0082,0.009
lightgbm,Light Gradient Boosting Machine,0.6457,0.5949,0.7833,0.698,0.7333,0.1949,0.2193,0.012
knn,K Neighbors Classifier,0.619,0.5875,0.7644,0.6839,0.7181,0.128,0.1457,0.014
gbc,Gradient Boosting Classifier,0.6167,0.5713,0.7722,0.6722,0.7138,0.1174,0.1387,0.018
ada,Ada Boost Classifier,0.6243,0.5649,0.7289,0.6904,0.7026,0.1767,0.1918,0.019


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6667,0.32,1.0,0.6667,0.8,0.0,0.0
1,0.6667,0.44,1.0,0.6667,0.8,0.0,0.0
2,0.6,0.4815,1.0,0.6,0.75,0.0,0.0
3,0.6429,0.7222,1.0,0.6429,0.7826,0.0,0.0
4,0.6429,0.4444,1.0,0.6429,0.7826,0.0,0.0
5,0.6429,0.6444,1.0,0.6429,0.7826,0.0,0.0
6,0.6429,0.5556,1.0,0.6429,0.7826,0.0,0.0
7,0.6429,0.2556,1.0,0.6429,0.7826,0.0,0.0
8,0.6429,0.5778,1.0,0.6429,0.7826,0.0,0.0
9,0.6429,0.5556,1.0,0.6429,0.7826,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7333,0.82,1.0,0.7143,0.8333,0.25,0.378
1,0.6667,0.33,1.0,0.6667,0.8,0.0,0.0
2,0.5333,0.3889,0.8889,0.5714,0.6957,-0.129,-0.2182
3,0.6429,0.7222,1.0,0.6429,0.7826,0.0,0.0
4,0.6429,0.4889,0.7778,0.7,0.7368,0.186,0.1886
5,0.6429,0.8444,1.0,0.6429,0.7826,0.0,0.0
6,0.6429,0.6444,1.0,0.6429,0.7826,0.0,0.0
7,0.6429,0.4222,1.0,0.6429,0.7826,0.0,0.0
8,0.6429,0.6889,1.0,0.6429,0.7826,0.0,0.0
9,0.6429,0.7556,1.0,0.6429,0.7826,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6,0.4,0.9,0.6429,0.75,-0.125,-0.189
1,0.7333,0.42,1.0,0.7143,0.8333,0.25,0.378
2,0.5333,0.6481,0.4444,0.6667,0.5333,0.1026,0.1111
3,0.6429,0.8778,1.0,0.6429,0.7826,0.0,0.0
4,0.6429,0.4444,0.7778,0.7,0.7368,0.186,0.1886
5,0.6429,0.7556,1.0,0.6429,0.7826,0.0,0.0
6,0.7143,0.9556,1.0,0.6923,0.8182,0.2432,0.3721
7,0.6429,0.7111,1.0,0.6429,0.7826,0.0,0.0
8,0.6429,0.6667,0.8889,0.6667,0.7619,0.1026,0.1217
9,0.6429,0.5333,1.0,0.6429,0.7826,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6552,0.5316,1.0,0.6552,0.7917,0.0,0.0
1,0.6552,0.4395,1.0,0.6552,0.7917,0.0,0.0
2,0.5862,0.5909,0.9444,0.6071,0.7391,-0.0675,-0.1477
3,0.6429,0.6889,1.0,0.6429,0.7826,0.0,0.0
4,0.6429,0.6444,1.0,0.6429,0.7826,0.0,0.0
Mean,0.6365,0.5791,0.9889,0.6406,0.7775,-0.0135,-0.0295
Std,0.0257,0.0874,0.0222,0.0176,0.0196,0.027,0.0591


Unnamed: 0,Description,Value
0,Session id,1669
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(435, 65)"
4,Transformed data shape,"(435, 11)"
5,Transformed train set shape,"(304, 11)"
6,Transformed test set shape,"(131, 11)"
7,Numeric features,64
8,Rows with missing values,94.9%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.5723,0.5197,0.3205,0.4143,0.3544,0.0513,0.0503,0.024
nb,Naive Bayes,0.6084,0.5158,0.0167,0.2,0.0308,0.0007,0.0021,0.009
ada,Ada Boost Classifier,0.5327,0.5148,0.3311,0.3821,0.3515,-0.0091,-0.0088,0.018
knn,K Neighbors Classifier,0.5166,0.5143,0.2364,0.3251,0.2669,-0.0722,-0.0765,0.015
et,Extra Trees Classifier,0.5625,0.5048,0.2553,0.3814,0.3,0.0116,0.0079,0.052
lr,Logistic Regression,0.5126,0.5026,0.4008,0.3293,0.3483,-0.0183,-0.0232,0.092
qda,Quadratic Discriminant Analysis,0.5594,0.5003,0.2947,0.4661,0.2717,0.0284,0.0325,0.008
dummy,Dummy Classifier,0.6118,0.5,0.0,0.0,0.0,0.0,0.0,0.011
lda,Linear Discriminant Analysis,0.6148,0.4951,0.1212,0.5025,0.1768,0.0567,0.0755,0.009
rf,Random Forest Classifier,0.5726,0.4926,0.2803,0.4026,0.3239,0.0375,0.0346,0.058


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6129,0.5439,0.0,0.0,0.0,0.0,0.0
1,0.6129,0.443,0.0,0.0,0.0,0.0,0.0
2,0.6129,0.5,0.0,0.0,0.0,0.0,0.0
3,0.6129,0.5175,0.0,0.0,0.0,0.0,0.0
4,0.6333,0.4019,0.0,0.0,0.0,0.0,0.0
5,0.6333,0.4928,0.0,0.0,0.0,0.0,0.0
6,0.6,0.5694,0.0,0.0,0.0,0.0,0.0
7,0.6,0.375,0.0,0.0,0.0,0.0,0.0
8,0.6,0.5093,0.0,0.0,0.0,0.0,0.0
9,0.6,0.4306,0.0,0.0,0.0,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6452,0.5702,0.0833,1.0,0.1538,0.1003,0.2297
1,0.6452,0.4693,0.0833,1.0,0.1538,0.1003,0.2297
2,0.6129,0.5307,0.0833,0.5,0.1429,0.0363,0.0609
3,0.5806,0.4079,0.0,0.0,0.0,-0.0633,-0.1451
4,0.6,0.4689,0.0,0.0,0.0,-0.0651,-0.1413
5,0.6667,0.7081,0.0909,1.0,0.1667,0.1124,0.2441
6,0.6,0.6204,0.0,0.0,0.0,0.0,0.0
7,0.5667,0.3611,0.0,0.0,0.0,-0.0656,-0.1516
8,0.6,0.4861,0.0,0.0,0.0,0.0,0.0
9,0.6,0.4954,0.0,0.0,0.0,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6774,0.7917,0.1667,1.0,0.2857,0.1969,0.3304
1,0.5806,0.5439,0.0,0.0,0.0,-0.0633,-0.1451
2,0.6452,0.6294,0.0833,1.0,0.1538,0.1003,0.2297
3,0.6452,0.6272,0.1667,0.6667,0.2667,0.1323,0.1879
4,0.6667,0.6555,0.1818,0.6667,0.2857,0.1525,0.2075
5,0.7,0.6364,0.2727,0.75,0.4,0.2541,0.312
6,0.6,0.6667,0.0,0.0,0.0,0.0,0.0
7,0.5667,0.3519,0.0,0.0,0.0,-0.0656,-0.1516
8,0.6667,0.6644,0.1667,1.0,0.2857,0.1935,0.3273
9,0.6333,0.4444,0.0833,1.0,0.1538,0.0984,0.2274


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6393,0.5034,0.0435,1.0,0.0833,0.0536,0.1659
1,0.5902,0.5709,0.0417,0.3333,0.0741,-0.0146,-0.028
2,0.623,0.6284,0.0417,1.0,0.08,0.0501,0.1603
3,0.5902,0.598,0.0,0.0,0.0,-0.0325,-0.104
4,0.6167,0.4971,0.0,0.0,0.0,0.0,0.0
Mean,0.6119,0.5596,0.0254,0.4667,0.0475,0.0113,0.0389
Std,0.0192,0.0518,0.0207,0.4522,0.0389,0.0347,0.107



 50%|█████████████████████████████████████▌                                     | 1/2 [02:37<02:37, 157.53s/it][A

Unnamed: 0,Description,Value
0,Session id,4082
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(828, 65)"
4,Transformed data shape,"(828, 11)"
5,Transformed train set shape,"(579, 11)"
6,Transformed test set shape,"(249, 11)"
7,Numeric features,64
8,Rows with missing values,91.3%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.6633,0.5721,0.8457,0.7241,0.7793,0.0798,0.0854,0.016
lightgbm,Light Gradient Boosting Machine,0.6459,0.5525,0.8209,0.7185,0.7647,0.0513,0.0539,0.016
et,Extra Trees Classifier,0.6528,0.5511,0.8725,0.7047,0.779,0.0028,0.0124,0.058
rf,Random Forest Classifier,0.6856,0.5459,0.9191,0.7159,0.8043,0.0595,0.0784,0.065
gbc,Gradient Boosting Classifier,0.6562,0.5422,0.87,0.7088,0.7806,0.0174,0.0133,0.035
ada,Ada Boost Classifier,0.6476,0.5421,0.8456,0.7095,0.7705,0.0261,0.0291,0.022
qda,Quadratic Discriminant Analysis,0.4634,0.5415,0.3441,0.7592,0.4304,0.077,0.0942,0.009
nb,Naive Bayes,0.639,0.5373,0.8582,0.6984,0.7694,-0.028,-0.0299,0.01
lda,Linear Discriminant Analysis,0.7029,0.5278,0.9927,0.7056,0.8248,0.0061,0.0141,0.01
dt,Decision Tree Classifier,0.5992,0.5237,0.7082,0.7192,0.713,0.0461,0.0467,0.01


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6724,0.439,0.9268,0.7037,0.8,-0.0185,-0.0258
1,0.6379,0.4232,0.9024,0.6852,0.7789,-0.1257,-0.1753
2,0.7069,0.4921,1.0,0.7069,0.8283,0.0,0.0
3,0.6897,0.6176,0.9512,0.7091,0.8125,0.0132,0.0206
4,0.7069,0.6356,0.9024,0.74,0.8132,0.163,0.1818
5,0.6724,0.622,0.9268,0.7037,0.8,-0.0185,-0.0258
6,0.6724,0.5954,0.9512,0.6964,0.8041,-0.0658,-0.1217
7,0.7069,0.5481,1.0,0.7069,0.8283,0.0,0.0
8,0.7241,0.584,0.9,0.75,0.8182,0.2658,0.2858
9,0.7193,0.6522,1.0,0.7143,0.8333,0.0806,0.205


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7069,0.5703,1.0,0.7069,0.8283,0.0,0.0
1,0.7069,0.5057,1.0,0.7069,0.8283,0.0,0.0
2,0.7069,0.6664,1.0,0.7069,0.8283,0.0,0.0
3,0.7069,0.5402,1.0,0.7069,0.8283,0.0,0.0
4,0.7069,0.4648,1.0,0.7069,0.8283,0.0,0.0
5,0.7069,0.6987,1.0,0.7069,0.8283,0.0,0.0
6,0.7069,0.627,1.0,0.7069,0.8283,0.0,0.0
7,0.7069,0.4878,1.0,0.7069,0.8283,0.0,0.0
8,0.6897,0.6097,1.0,0.6897,0.8163,0.0,0.0
9,0.7018,0.5279,1.0,0.7018,0.8247,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7069,0.5459,1.0,0.7069,0.8283,0.0,0.0
1,0.7069,0.5022,1.0,0.7069,0.8283,0.0,0.0
2,0.7069,0.6255,1.0,0.7069,0.8283,0.0,0.0
3,0.7241,0.5244,1.0,0.7193,0.8367,0.0812,0.2057
4,0.7069,0.5373,1.0,0.7069,0.8283,0.0,0.0
5,0.7069,0.6714,1.0,0.7069,0.8283,0.0,0.0
6,0.7069,0.5725,1.0,0.7069,0.8283,0.0,0.0
7,0.7069,0.5882,1.0,0.7069,0.8283,0.0,0.0
8,0.6897,0.6653,1.0,0.6897,0.8163,0.0,0.0
9,0.7018,0.5456,1.0,0.7018,0.8247,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7069,0.4591,1.0,0.7069,0.8283,0.0,0.0
1,0.7069,0.6732,1.0,0.7069,0.8283,0.0,0.0
2,0.7069,0.5011,1.0,0.7069,0.8283,0.0,0.0
3,0.6983,0.5665,1.0,0.6983,0.8223,0.0,0.0
4,0.7043,0.6173,1.0,0.7043,0.8265,0.0,0.0
Mean,0.7047,0.5634,1.0,0.7047,0.8267,0.0,0.0
Std,0.0033,0.0771,0.0,0.0033,0.0023,0.0,0.0




Unnamed: 0,Description,Value
0,Session id,6196
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(203, 65)"
4,Transformed data shape,"(203, 11)"
5,Transformed train set shape,"(142, 11)"
6,Transformed test set shape,"(61, 11)"
7,Numeric features,64
8,Rows with missing values,100.0%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.839,0.7544,0.9561,0.8604,0.9055,0.3337,0.3583,0.018
lightgbm,Light Gradient Boosting Machine,0.8252,0.7477,0.9136,0.8774,0.8928,0.3654,0.3916,0.009
rf,Random Forest Classifier,0.8462,0.7103,0.9826,0.853,0.9125,0.2862,0.3072,0.051
et,Extra Trees Classifier,0.8381,0.7097,0.947,0.8661,0.904,0.3567,0.3867,0.042
ada,Ada Boost Classifier,0.8248,0.6619,0.9129,0.8787,0.8938,0.3624,0.3831,0.016
knn,K Neighbors Classifier,0.7676,0.6362,0.9023,0.8262,0.8613,0.0534,0.0536,0.017
dt,Decision Tree Classifier,0.7605,0.6136,0.8439,0.8631,0.8501,0.1998,0.2052,0.009
lr,Logistic Regression,0.4924,0.5984,0.4659,0.4212,0.4415,0.0953,0.1137,0.075
lda,Linear Discriminant Analysis,0.8095,0.5554,0.9636,0.8284,0.8906,0.0833,0.0992,0.009
nb,Naive Bayes,0.5271,0.5422,0.5318,0.8527,0.5781,-0.0049,0.0218,0.009


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8,0.6944,1.0,0.8,0.8889,0.0,0.0
1,0.8,0.6389,1.0,0.8,0.8889,0.0,0.0
2,0.9286,0.625,1.0,0.9231,0.96,0.6316,0.6794
3,0.8571,0.375,1.0,0.8571,0.9231,0.0,0.0
4,0.9286,0.9583,1.0,0.9231,0.96,0.6316,0.6794
5,0.9286,1.0,1.0,0.9231,0.96,0.6316,0.6794
6,0.8571,0.7273,1.0,0.8462,0.9167,0.44,0.5311
7,0.7857,0.7576,1.0,0.7857,0.88,0.0,0.0
8,0.9286,0.7273,1.0,0.9167,0.9565,0.7586,0.7817
9,0.7857,0.5758,1.0,0.7857,0.88,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8,0.6111,0.9167,0.8462,0.88,0.2857,0.2942
1,0.8,0.6667,0.9167,0.8462,0.88,0.2857,0.2942
2,0.9286,0.7083,1.0,0.9231,0.96,0.6316,0.6794
3,0.8571,0.4167,1.0,0.8571,0.9231,0.0,0.0
4,0.8571,0.9583,0.9167,0.9167,0.9167,0.4167,0.4167
5,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,0.8571,0.697,1.0,0.8462,0.9167,0.44,0.5311
7,0.7857,0.7576,0.9091,0.8333,0.8696,0.2759,0.2843
8,0.9286,0.697,1.0,0.9167,0.9565,0.7586,0.7817
9,0.7857,0.6667,0.9091,0.8333,0.8696,0.2759,0.2843


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8,0.7222,0.9167,0.8462,0.88,0.2857,0.2942
1,0.6,0.6111,0.75,0.75,0.75,-0.25,-0.25
2,0.8571,0.5417,0.9167,0.9167,0.9167,0.4167,0.4167
3,0.8571,0.4167,1.0,0.8571,0.9231,0.0,0.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,0.9286,1.0,0.9167,1.0,0.9565,0.7586,0.7817
6,0.8571,0.7273,1.0,0.8462,0.9167,0.44,0.5311
7,0.7857,0.7576,0.9091,0.8333,0.8696,0.2759,0.2843
8,0.9286,0.697,1.0,0.9167,0.9565,0.7586,0.7817
9,0.8571,0.7273,0.9091,0.9091,0.9091,0.5758,0.5758


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8276,0.5417,0.9583,0.8519,0.902,0.2077,0.236
1,0.8276,0.6812,1.0,0.8214,0.902,0.2408,0.37
2,0.8214,0.8957,1.0,0.8214,0.902,0.0,0.0
3,0.8214,0.7304,0.9565,0.8462,0.898,0.2045,0.2328
4,0.8929,0.8348,1.0,0.8846,0.9388,0.5227,0.5948
Mean,0.8382,0.7367,0.983,0.8451,0.9085,0.2352,0.2867
Std,0.0275,0.1233,0.0209,0.0234,0.0152,0.1672,0.1947




Unnamed: 0,Description,Value
0,Session id,1634
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(505, 65)"
4,Transformed data shape,"(505, 11)"
5,Transformed train set shape,"(353, 11)"
6,Transformed test set shape,"(152, 11)"
7,Numeric features,64
8,Rows with missing values,91.9%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.7283,0.6028,0.9071,0.7653,0.8294,0.1696,0.1919,0.049
rf,Random Forest Classifier,0.7111,0.5429,0.93,0.7407,0.8243,0.0531,0.0669,0.059
lightgbm,Light Gradient Boosting Machine,0.6517,0.5366,0.8411,0.7271,0.7786,-0.0327,-0.041,0.012
gbc,Gradient Boosting Classifier,0.6545,0.5221,0.8603,0.7207,0.7839,-0.0547,-0.0627,0.025
dt,Decision Tree Classifier,0.6116,0.5201,0.7329,0.7364,0.7315,0.0113,0.0123,0.009
ada,Ada Boost Classifier,0.6427,0.5072,0.8483,0.7144,0.7749,-0.0801,-0.0863,0.018
qda,Quadratic Discriminant Analysis,0.5595,0.5022,0.6198,0.632,0.5787,0.0033,-0.0015,0.009
dummy,Dummy Classifier,0.731,0.5,1.0,0.731,0.8445,0.0,0.0,0.008
lda,Linear Discriminant Analysis,0.7225,0.4886,0.9885,0.7286,0.8388,-0.0161,-0.0311,0.008
knn,K Neighbors Classifier,0.6632,0.4678,0.868,0.7235,0.7884,-0.0287,-0.0309,0.014


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7222,0.3904,1.0,0.7222,0.8387,0.0,0.0
1,0.7222,0.4423,1.0,0.7222,0.8387,0.0,0.0
2,0.7222,0.4712,1.0,0.7222,0.8387,0.0,0.0
3,0.7429,0.4679,1.0,0.7429,0.8525,0.0,0.0
4,0.7429,0.5726,1.0,0.7429,0.8525,0.0,0.0
5,0.7429,0.5812,1.0,0.7429,0.8525,0.0,0.0
6,0.7429,0.4402,1.0,0.7429,0.8525,0.0,0.0
7,0.7429,0.5855,1.0,0.7429,0.8525,0.0,0.0
8,0.7143,0.508,1.0,0.7143,0.8333,0.0,0.0
9,0.7143,0.372,1.0,0.7143,0.8333,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7222,0.3885,1.0,0.7222,0.8387,0.0,0.0
1,0.7222,0.6385,1.0,0.7222,0.8387,0.0,0.0
2,0.7222,0.4923,1.0,0.7222,0.8387,0.0,0.0
3,0.7714,0.6432,1.0,0.7647,0.8667,0.1566,0.2915
4,0.7429,0.3504,1.0,0.7429,0.8525,0.0,0.0
5,0.7429,0.5427,1.0,0.7429,0.8525,0.0,0.0
6,0.7429,0.3803,1.0,0.7429,0.8525,0.0,0.0
7,0.7429,0.3825,1.0,0.7429,0.8525,0.0,0.0
8,0.7143,0.412,1.0,0.7143,0.8333,0.0,0.0
9,0.7143,0.54,1.0,0.7143,0.8333,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7222,0.375,1.0,0.7222,0.8387,0.0,0.0
1,0.7222,0.6596,1.0,0.7222,0.8387,0.0,0.0
2,0.7222,0.5385,1.0,0.7222,0.8387,0.0,0.0
3,0.7429,0.6987,1.0,0.7429,0.8525,0.0,0.0
4,0.7429,0.4936,1.0,0.7429,0.8525,0.0,0.0
5,0.7429,0.3825,1.0,0.7429,0.8525,0.0,0.0
6,0.7429,0.5299,1.0,0.7429,0.8525,0.0,0.0
7,0.7429,0.3611,1.0,0.7429,0.8525,0.0,0.0
8,0.7143,0.514,1.0,0.7143,0.8333,0.0,0.0
9,0.7143,0.498,1.0,0.7143,0.8333,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7324,0.4848,1.0,0.7324,0.8455,0.0,0.0
1,0.7324,0.5967,1.0,0.7324,0.8455,0.0,0.0
2,0.7324,0.4737,1.0,0.7324,0.8455,0.0,0.0
3,0.7286,0.4479,1.0,0.7286,0.843,0.0,0.0
4,0.7286,0.5304,1.0,0.7286,0.843,0.0,0.0
Mean,0.7309,0.5067,1.0,0.7309,0.8445,0.0,0.0
Std,0.0019,0.0523,0.0,0.0019,0.0013,0.0,0.0




Unnamed: 0,Description,Value
0,Session id,2974
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(65, 65)"
4,Transformed data shape,"(65, 11)"
5,Transformed train set shape,"(45, 11)"
6,Transformed test set shape,"(20, 11)"
7,Numeric features,64
8,Rows with missing values,93.8%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.755,0.775,0.9333,0.7917,0.8455,0.1712,0.1856,0.008
gbc,Gradient Boosting Classifier,0.755,0.7333,0.9083,0.7983,0.8437,0.2629,0.2696,0.012
lr,Logistic Regression,0.715,0.7083,0.8833,0.7967,0.8125,0.1452,0.1652,0.077
knn,K Neighbors Classifier,0.78,0.7083,0.9167,0.8317,0.8544,0.3498,0.3687,0.013
rf,Random Forest Classifier,0.76,0.7083,0.9167,0.7983,0.8448,0.2848,0.2871,0.045
dt,Decision Tree Classifier,0.795,0.7042,0.9083,0.8383,0.8651,0.397,0.417,0.007
qda,Quadratic Discriminant Analysis,0.805,0.65,1.0,0.805,0.8849,0.3,0.3,0.01
ada,Ada Boost Classifier,0.73,0.6417,0.85,0.805,0.821,0.2386,0.2587,0.016
et,Extra Trees Classifier,0.735,0.6417,0.8833,0.79,0.8258,0.2515,0.2537,0.037
nb,Naive Bayes,0.56,0.6167,0.7417,0.665,0.6848,-0.1459,-0.1765,0.007


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8,0.5,1.0,0.8,0.8889,0.0,0.0
1,0.8,0.5,1.0,0.8,0.8889,0.0,0.0
2,0.8,0.5,1.0,0.8,0.8889,0.0,0.0
3,0.6,0.5,1.0,0.6,0.75,0.0,0.0
4,0.6,0.5,1.0,0.6,0.75,0.0,0.0
5,0.75,0.5,1.0,0.75,0.8571,0.0,0.0
6,0.75,0.5,1.0,0.75,0.8571,0.0,0.0
7,0.75,0.5,1.0,0.75,0.8571,0.0,0.0
8,0.75,0.5,1.0,0.75,0.8571,0.0,0.0
9,0.75,0.5,1.0,0.75,0.8571,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8,0.75,1.0,0.8,0.8889,0.0,0.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,0.8,0.75,0.75,1.0,0.8571,0.5455,0.6124
3,0.8,0.8333,1.0,0.75,0.8571,0.5455,0.6124
4,0.8,0.8333,1.0,0.75,0.8571,0.5455,0.6124
5,0.5,0.0,0.6667,0.6667,0.6667,-0.3333,-0.3333
6,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7,1.0,1.0,1.0,1.0,1.0,1.0,1.0
8,0.75,0.3333,1.0,0.75,0.8571,0.0,0.0
9,0.75,1.0,1.0,0.75,0.8571,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8,0.75,1.0,0.8,0.8889,0.0,0.0
1,0.6,0.5,0.5,1.0,0.6667,0.2857,0.4082
2,0.8,1.0,1.0,0.8,0.8889,0.0,0.0
3,0.6,0.5,1.0,0.6,0.75,0.0,0.0
4,0.6,1.0,1.0,0.6,0.75,0.0,0.0
5,0.75,0.0,1.0,0.75,0.8571,0.0,0.0
6,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7,0.75,1.0,0.6667,1.0,0.8,0.5,0.5774
8,0.5,0.6667,0.6667,0.6667,0.6667,-0.3333,-0.3333
9,0.75,0.6667,1.0,0.75,0.8571,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5556,0.7143,0.5714,0.8,0.6667,0.0526,0.0598
1,0.4444,0.3929,0.5714,0.6667,0.6154,-0.3636,-0.378
2,0.7778,1.0,1.0,0.7778,0.875,0.0,0.0
3,0.7778,0.6667,1.0,0.75,0.8571,0.4,0.5
4,0.7778,0.7778,1.0,0.75,0.8571,0.4,0.5
Mean,0.6667,0.7103,0.8286,0.7489,0.7743,0.0978,0.1364
Std,0.1405,0.1955,0.21,0.0452,0.1102,0.2854,0.3327


Unnamed: 0,Description,Value
0,Session id,3845
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(294, 65)"
4,Transformed data shape,"(294, 11)"
5,Transformed train set shape,"(205, 11)"
6,Transformed test set shape,"(89, 11)"
7,Numeric features,64
8,Rows with missing values,97.6%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lda,Linear Discriminant Analysis,0.7119,0.6538,0.8924,0.7574,0.8159,0.1518,0.1677,0.008
nb,Naive Bayes,0.6105,0.636,0.6343,0.7769,0.6612,0.1666,0.1777,0.008
knn,K Neighbors Classifier,0.7021,0.6348,0.8457,0.7675,0.8026,0.1956,0.2058,0.014
et,Extra Trees Classifier,0.6936,0.6211,0.8324,0.7654,0.7954,0.1765,0.1858,0.042
qda,Quadratic Discriminant Analysis,0.564,0.6141,0.5733,0.7052,0.5643,0.0795,0.0986,0.01
ada,Ada Boost Classifier,0.6633,0.5793,0.8052,0.7517,0.7745,0.0993,0.1123,0.017
lr,Logistic Regression,0.6819,0.5754,0.8367,0.7519,0.7844,0.1252,0.1572,0.077
rf,Random Forest Classifier,0.6933,0.5714,0.8857,0.7438,0.8075,0.0791,0.0702,0.054
gbc,Gradient Boosting Classifier,0.6593,0.5479,0.8114,0.7431,0.7742,0.0748,0.0754,0.018
lightgbm,Light Gradient Boosting Machine,0.6829,0.5219,0.8386,0.7521,0.7916,0.1246,0.14,0.01


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7619,0.5611,1.0,0.75,0.8571,0.2222,0.3536
1,0.7143,0.6,1.0,0.7143,0.8333,0.0,0.0
2,0.7619,0.8611,0.9333,0.7778,0.8485,0.3137,0.3443
3,0.7619,0.7167,1.0,0.75,0.8571,0.2222,0.3536
4,0.7143,0.7556,1.0,0.7143,0.8333,0.0,0.0
5,0.7,0.6467,0.8,0.8,0.8,0.2,0.2
6,0.8,0.9533,1.0,0.7895,0.8824,0.2727,0.3974
7,0.75,0.64,1.0,0.75,0.8571,0.0,0.0
8,0.65,0.42,0.8,0.75,0.7742,0.0,0.0
9,0.65,0.3274,0.9286,0.6842,0.7879,-0.0938,-0.1502


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7143,0.5556,0.9333,0.7368,0.8235,0.125,0.1539
1,0.5714,0.5444,0.8,0.6667,0.7273,-0.2353,-0.2582
2,0.7619,0.8889,0.9333,0.7778,0.8485,0.3137,0.3443
3,0.5714,0.7,0.7333,0.6875,0.7097,-0.1053,-0.1061
4,0.5714,0.7444,0.4667,0.875,0.6087,0.2222,0.2791
5,0.65,0.7067,0.7333,0.7857,0.7586,0.125,0.126
6,0.95,0.96,0.9333,1.0,0.9655,0.875,0.8819
7,0.8,0.6533,0.9333,0.8235,0.875,0.3846,0.4042
8,0.5,0.3533,0.6,0.6923,0.6429,-0.1765,-0.1816
9,0.4,0.2976,0.3571,0.625,0.4545,-0.1111,-0.1336


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7143,0.65,1.0,0.7143,0.8333,0.0,0.0
1,0.7143,0.7222,1.0,0.7143,0.8333,0.0,0.0
2,0.7619,0.6667,1.0,0.75,0.8571,0.2222,0.3536
3,0.7143,0.7111,1.0,0.7143,0.8333,0.0,0.0
4,0.7143,0.6667,1.0,0.7143,0.8333,0.0,0.0
5,0.75,0.6267,0.9333,0.7778,0.8485,0.1667,0.1925
6,0.75,0.9333,1.0,0.75,0.8571,0.0,0.0
7,0.75,0.3933,1.0,0.75,0.8571,0.0,0.0
8,0.7,0.5467,0.9333,0.7368,0.8235,-0.0909,-0.1325
9,0.7,0.3512,1.0,0.7,0.8235,0.0,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7317,0.4455,0.9333,0.7568,0.8358,0.1442,0.1719
1,0.6585,0.7212,0.8333,0.7353,0.7812,0.0171,0.0178
2,0.7073,0.4864,0.9667,0.725,0.8286,-0.0468,-0.0957
3,0.7317,0.7333,1.0,0.7317,0.8451,0.0,0.0
4,0.5366,0.4756,0.5862,0.7083,0.6415,0.0026,0.0027
Mean,0.6732,0.5724,0.8639,0.7314,0.7864,0.0234,0.0193
Std,0.0733,0.1272,0.1496,0.0157,0.0757,0.0641,0.0862



100%|███████████████████████████████████████████████████████████████████████████| 2/2 [04:16<00:00, 128.48s/it][A
100%|███████████████████████████████████████████████████████████████████████████| 2/2 [04:16<00:00, 128.50s/it]


In [10]:
aggregated_result_path = EXP_RESULT_PATH / 'result.csv'

if not os.path.exists(aggregated_result_path) :
    exp_result.to_csv(aggregated_result_path)
    
exp_result

Unnamed: 0.1,Unnamed: 0,Fold,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,target,x_data_yaer,대분류,feature_selection
0,0.0,0,0.8690,0.6121,0.0000,0.0000,0.0000,0.0000,0.0000,Closed_In_1Yr,2018,제조업(10~34),"['순운전자본/자산총계', '매출채권', '비유동자산', '유형자산', '자본금',..."
1,1.0,1,0.8690,0.6165,0.0000,0.0000,0.0000,0.0000,0.0000,Closed_In_1Yr,2018,제조업(10~34),"['순운전자본/자산총계', '매출채권', '비유동자산', '유형자산', '자본금',..."
2,2.0,2,0.8690,0.7103,0.0000,0.0000,0.0000,0.0000,0.0000,Closed_In_1Yr,2018,제조업(10~34),"['순운전자본/자산총계', '매출채권', '비유동자산', '유형자산', '자본금',..."
3,3.0,3,0.8690,0.5995,0.0000,0.0000,0.0000,0.0000,0.0000,Closed_In_1Yr,2018,제조업(10~34),"['순운전자본/자산총계', '매출채권', '비유동자산', '유형자산', '자본금',..."
4,4.0,4,0.8725,0.6568,0.0000,0.0000,0.0000,0.0000,0.0000,Closed_In_1Yr,2018,제조업(10~34),"['순운전자본/자산총계', '매출채권', '비유동자산', '유형자산', '자본금',..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2,,2,0.7073,0.4864,0.9667,0.7250,0.8286,-0.0468,-0.0957,Closed_In_2Yrs,2019,건설업(41~42),"['매출채권', '비유동자산', '유형자산', '판매비와관리비', '기업순이익률(%..."
3,,3,0.7317,0.7333,1.0000,0.7317,0.8451,0.0000,0.0000,Closed_In_2Yrs,2019,건설업(41~42),"['매출채권', '비유동자산', '유형자산', '판매비와관리비', '기업순이익률(%..."
4,,4,0.5366,0.4756,0.5862,0.7083,0.6415,0.0026,0.0027,Closed_In_2Yrs,2019,건설업(41~42),"['매출채권', '비유동자산', '유형자산', '판매비와관리비', '기업순이익률(%..."
5,,Mean,0.6732,0.5724,0.8639,0.7314,0.7864,0.0234,0.0193,Closed_In_2Yrs,2019,건설업(41~42),"['매출채권', '비유동자산', '유형자산', '판매비와관리비', '기업순이익률(%..."
