# Prerequisite

Following packages must be installed to run codes below

```bash
$ pip3 install openpyxl 
```


In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Add project path

In [5]:
import os
import sys
from pathlib import Path

PROJECT_PATH = Path().resolve().parent.parent
SRC_PATH = PROJECT_PATH / 'src'

sys.path.append(str(SRC_PATH))

In [1]:
exp_name = 'extended_beavers_model'

# Load data

In [6]:
from data_loader import SectorDataLoader

data_loader = SectorDataLoader()
data = (
    data_loader
    .load_dataset()
    .pipe(data_loader.add_beaver_indicator)
    .pipe(data_loader.add_label)
)

# Baseline classifier using Beaver's features

In [16]:
from project_paths import DATA_PATH

sectors = [
    '제조업(10~34)',
    '부동산업(68)',
    '도매 및 소매업(45~47)',
    '숙박 및 음식점업(55~56)',
    '건설업(41~42)'
]

beaver_features = [
    '유동자산/부채총계',
    '당기순이익(손실)/자산총계',
    '부채총계/자산총계',
    '순운전자본/자산총계',
    '유동부채/유동자산'
]

extended_financial_features = [
    '부채비율(%)',
    '매출액총이익률(%)',
    '매출액영업이익률(%)',
    '매출액순이익률(%)',
    '금융비용대매출액비율(%', 
    '금융비용대부채비율(%)', 
    '금융비용대총비용비율(%',
    '유보액/총자산(%)', 
    '유보액/납입자본(%)'
]

years_to_close = [
    'Closed_In_1Yr',
    'Closed_In_2Yrs',
]

x_data_year = list(range(2018, 2021))

EXP_RESULT_PATH = DATA_PATH / 'experiment_result' / exp_name

if not os.path.exists(EXP_RESULT_PATH) : 
    os.mkdir(EXP_RESULT_PATH)

In [23]:
import numpy as np
import pandas as pd
from pycaret.classification import ClassificationExperiment
from tqdm import tqdm

In [25]:
exp_result = None

for target_years_to_close in tqdm(years_to_close) : 
    for x_data_target_year in tqdm(x_data_year, leave=True) : 
        for sector in sectors : 

            filename = EXP_RESULT_PATH / f'Beaver_baseline_{sector}_{target_years_to_close}_{x_data_target_year}.csv'

            if not os.path.exists(filename) : 

                data_to_train = (
                    data
                    .loc[~data[beaver_features[0]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[~data[beaver_features[1]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[~data[beaver_features[2]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[~data[beaver_features[3]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[~data[beaver_features[4]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[lambda x : pd.to_datetime(x['결산년월'], format='%Y%m%d').dt.year==x_data_target_year]
                    .loc[lambda x : x['대분류']==sector]
                )

                exp = ClassificationExperiment()

                exp.setup(
                    (
                        data_to_train
                        .loc[:, beaver_features+extended_financial_features+[target_years_to_close]]
                        .reset_index(drop=True)
                    ),
                    target=target_years_to_close
                )

                models = exp.compare_models()

                result = (
                    exp.pull()
                    .assign(target=target_years_to_close)
                    .assign(x_data_yaer=x_data_target_year)
                    .assign(대분류=sector)
                    .reset_index(drop=False)
                    .rename(columns={'index':'model_name'})
                )
                result.to_csv(filename)

            else : 
                result = pd.read_csv(filename)

            if exp_result is None : 
                exp_result = result
            else : 
                exp_result = pd.concat([exp_result, result], axis=0)

  0%|                                                                                    | 0/2 [00:00<?, ?it/s]
  0%|                                                                                    | 0/3 [00:00<?, ?it/s][A

Unnamed: 0,Description,Value
0,Session id,5341
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(1800, 15)"
4,Transformed data shape,"(1800, 15)"
5,Transformed train set shape,"(1259, 15)"
6,Transformed test set shape,"(541, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.8777,0.5934,0.1085,0.7133,0.1827,0.1548,0.2398,0.095
et,Extra Trees Classifier,0.8745,0.6159,0.121,0.5817,0.1933,0.1587,0.2192,0.068
lightgbm,Light Gradient Boosting Machine,0.8721,0.6308,0.1335,0.5369,0.2083,0.1675,0.2178,0.053
qda,Quadratic Discriminant Analysis,0.8697,0.5,0.0,0.0,0.0,0.0,0.0,0.01
dummy,Dummy Classifier,0.8697,0.5,0.0,0.0,0.0,0.0,0.0,0.006
lda,Linear Discriminant Analysis,0.8674,0.5534,0.0,0.0,0.0,-0.0044,-0.0085,0.008
gbc,Gradient Boosting Classifier,0.865,0.6321,0.0485,0.3483,0.0827,0.0552,0.0887,0.081
ada,Ada Boost Classifier,0.8642,0.6217,0.0246,0.1333,0.0411,0.0217,0.0287,0.032
ridge,Ridge Classifier,0.8594,0.0,0.0125,0.0167,0.0143,-0.0034,-0.0092,0.007
knn,K Neighbors Classifier,0.8515,0.5693,0.0485,0.1867,0.0764,0.0292,0.0355,0.016


Unnamed: 0,Description,Value
0,Session id,4553
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(323, 15)"
4,Transformed data shape,"(323, 15)"
5,Transformed train set shape,"(226, 15)"
6,Transformed test set shape,"(97, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.8006,0.6017,0.235,0.61,0.3107,0.2274,0.279,0.054
dummy,Dummy Classifier,0.7923,0.5,0.0,0.0,0.0,0.0,0.0,0.006
lr,Logistic Regression,0.7921,0.5197,0.02,0.1,0.0333,0.0203,0.0302,0.03
lda,Linear Discriminant Analysis,0.7879,0.4631,0.0,0.0,0.0,-0.0078,-0.0112,0.007
et,Extra Trees Classifier,0.7875,0.6443,0.29,0.4367,0.333,0.2242,0.2327,0.048
gbc,Gradient Boosting Classifier,0.783,0.6028,0.25,0.4233,0.301,0.1965,0.211,0.023
svm,SVM - Linear Kernel,0.7565,0.0,0.0,0.0,0.0,-0.0561,-0.0668,0.006
ada,Ada Boost Classifier,0.7565,0.5301,0.17,0.3283,0.2111,0.0967,0.1083,0.018
lightgbm,Light Gradient Boosting Machine,0.7522,0.5432,0.275,0.4417,0.3196,0.1755,0.1941,0.008
dt,Decision Tree Classifier,0.734,0.65,0.52,0.4197,0.4465,0.2814,0.2943,0.006


Unnamed: 0,Description,Value
0,Session id,6004
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(1107, 15)"
4,Transformed data shape,"(1107, 15)"
5,Transformed train set shape,"(774, 15)"
6,Transformed test set shape,"(333, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.8515,0.6913,0.1386,0.5317,0.2149,0.1628,0.2109,0.083
qda,Quadratic Discriminant Analysis,0.8514,0.5,0.0,0.0,0.0,0.0,0.0,0.012
dummy,Dummy Classifier,0.8514,0.5,0.0,0.0,0.0,0.0,0.0,0.007
lr,Logistic Regression,0.8501,0.586,0.025,0.2,0.044,0.0292,0.0483,0.026
et,Extra Trees Classifier,0.8489,0.7028,0.1303,0.4933,0.2032,0.1492,0.1919,0.06
lda,Linear Discriminant Analysis,0.845,0.6606,0.0083,0.1,0.0154,-0.0013,-0.0019,0.007
lightgbm,Light Gradient Boosting Machine,0.8386,0.6967,0.1576,0.3986,0.2195,0.151,0.1732,0.022
ada,Ada Boost Classifier,0.8385,0.6577,0.0856,0.4067,0.133,0.08,0.118,0.027
gbc,Gradient Boosting Classifier,0.8359,0.6727,0.1129,0.3743,0.1635,0.102,0.1277,0.055
nb,Naive Bayes,0.8333,0.6066,0.0348,0.245,0.0571,0.0127,0.0307,0.007


Unnamed: 0,Description,Value
0,Session id,3183
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(205, 15)"
4,Transformed data shape,"(205, 15)"
5,Transformed train set shape,"(143, 15)"
6,Transformed test set shape,"(62, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.7986,0.7517,0.5083,0.7333,0.5654,0.4434,0.474,0.046
rf,Random Forest Classifier,0.7852,0.7309,0.5083,0.6633,0.5472,0.4136,0.4356,0.059
knn,K Neighbors Classifier,0.7838,0.7475,0.45,0.6733,0.5167,0.3888,0.414,0.013
gbc,Gradient Boosting Classifier,0.7514,0.6774,0.5333,0.5779,0.5326,0.368,0.3825,0.019
lightgbm,Light Gradient Boosting Machine,0.7352,0.6691,0.4833,0.5417,0.4813,0.3086,0.3282,0.007
ada,Ada Boost Classifier,0.7343,0.6562,0.4833,0.5512,0.4798,0.31,0.3325,0.016
dummy,Dummy Classifier,0.7343,0.5,0.0,0.0,0.0,0.0,0.0,0.006
dt,Decision Tree Classifier,0.7162,0.6603,0.5833,0.4788,0.5118,0.3181,0.3297,0.006
lda,Linear Discriminant Analysis,0.7129,0.5952,0.1917,0.3233,0.2244,0.1079,0.1219,0.006
svm,SVM - Linear Kernel,0.6719,0.0,0.125,0.1367,0.1294,-0.0184,-0.0347,0.005


Unnamed: 0,Description,Value
0,Session id,5839
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(435, 15)"
4,Transformed data shape,"(435, 15)"
5,Transformed train set shape,"(304, 15)"
6,Transformed test set shape,"(131, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dummy,Dummy Classifier,0.9113,0.5,0.0,0.0,0.0,0.0,0.0,0.006
rf,Random Forest Classifier,0.9112,0.685,0.0833,0.2,0.1167,0.1027,0.1141,0.066
lightgbm,Light Gradient Boosting Machine,0.9112,0.6522,0.0833,0.2,0.1167,0.1027,0.1141,0.011
et,Extra Trees Classifier,0.8981,0.6323,0.0833,0.0833,0.08,0.0547,0.0543,0.045
knn,K Neighbors Classifier,0.8951,0.6396,0.0833,0.2,0.1167,0.0828,0.0935,0.014
lda,Linear Discriminant Analysis,0.8949,0.6459,0.1833,0.3333,0.2233,0.1801,0.1963,0.006
gbc,Gradient Boosting Classifier,0.8782,0.6007,0.1167,0.1667,0.1233,0.077,0.0846,0.029
ada,Ada Boost Classifier,0.8649,0.4818,0.1,0.15,0.12,0.0538,0.0547,0.02
dt,Decision Tree Classifier,0.8518,0.5982,0.2167,0.1567,0.1706,0.0999,0.1064,0.007
nb,Naive Bayes,0.798,0.5526,0.2,0.1767,0.1571,0.0871,0.0397,0.006



 33%|█████████████████████████▎                                                  | 1/3 [00:30<01:00, 30.22s/it][A

Unnamed: 0,Description,Value
0,Session id,573
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(828, 15)"
4,Transformed data shape,"(828, 15)"
5,Transformed train set shape,"(579, 15)"
6,Transformed test set shape,"(249, 15)"
7,Numeric features,14
8,Rows with missing values,5.0%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dummy,Dummy Classifier,0.81,0.5,0.0,0.0,0.0,0.0,0.0,0.007
lr,Logistic Regression,0.8066,0.5398,0.0182,0.0333,0.0235,0.0117,0.0124,0.035
lda,Linear Discriminant Analysis,0.8048,0.5138,0.0,0.0,0.0,-0.0088,-0.0113,0.008
et,Extra Trees Classifier,0.8031,0.5233,0.0273,0.2,0.0474,0.0196,0.0376,0.055
rf,Random Forest Classifier,0.8014,0.5842,0.0273,0.15,0.0462,0.0143,0.0164,0.069
knn,K Neighbors Classifier,0.7928,0.5217,0.0909,0.3233,0.1382,0.0666,0.0869,0.014
gbc,Gradient Boosting Classifier,0.7893,0.5935,0.0909,0.235,0.1282,0.054,0.0536,0.044
lightgbm,Light Gradient Boosting Machine,0.7755,0.5743,0.0727,0.1712,0.0966,0.0132,0.0075,0.018
ada,Ada Boost Classifier,0.772,0.5467,0.0818,0.2869,0.1178,0.0215,0.0369,0.024
svm,SVM - Linear Kernel,0.691,0.0,0.1909,0.1252,0.1451,-0.0118,-0.0112,0.005


Unnamed: 0,Description,Value
0,Session id,6546
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(203, 15)"
4,Transformed data shape,"(203, 15)"
5,Transformed train set shape,"(142, 15)"
6,Transformed test set shape,"(61, 15)"
7,Numeric features,14
8,Rows with missing values,25.1%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.6976,0.6486,0.305,0.46,0.3532,0.181,0.1852,0.016
lightgbm,Light Gradient Boosting Machine,0.691,0.6544,0.29,0.4333,0.3397,0.171,0.1767,0.008
dummy,Dummy Classifier,0.6905,0.5,0.0,0.0,0.0,0.0,0.0,0.007
rf,Random Forest Classifier,0.6776,0.6901,0.23,0.3867,0.2835,0.1213,0.1218,0.052
et,Extra Trees Classifier,0.6562,0.5904,0.255,0.3483,0.2904,0.0954,0.0847,0.043
qda,Quadratic Discriminant Analysis,0.6548,0.505,0.1,0.0308,0.0471,0.006,0.0175,0.01
dt,Decision Tree Classifier,0.6481,0.5683,0.32,0.375,0.3418,0.1109,0.1079,0.006
lda,Linear Discriminant Analysis,0.6414,0.473,0.0,0.0,0.0,-0.0832,-0.1059,0.007
ada,Ada Boost Classifier,0.641,0.55,0.32,0.3717,0.3386,0.1043,0.1008,0.018
knn,K Neighbors Classifier,0.6352,0.548,0.17,0.265,0.19,0.014,0.0166,0.013


Unnamed: 0,Description,Value
0,Session id,5568
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(505, 15)"
4,Transformed data shape,"(505, 15)"
5,Transformed train set shape,"(353, 15)"
6,Transformed test set shape,"(152, 15)"
7,Numeric features,14
8,Rows with missing values,10.3%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.7793,0.6659,0.1554,0.425,0.2236,0.1457,0.1678,0.06
dummy,Dummy Classifier,0.7762,0.5,0.0,0.0,0.0,0.0,0.0,0.006
et,Extra Trees Classifier,0.774,0.6684,0.1804,0.545,0.2588,0.1648,0.2101,0.047
gbc,Gradient Boosting Classifier,0.768,0.6466,0.2054,0.5131,0.2727,0.1674,0.2008,0.03
knn,K Neighbors Classifier,0.7679,0.667,0.2804,0.4767,0.3416,0.218,0.2329,0.012
lda,Linear Discriminant Analysis,0.7676,0.5368,0.0125,0.1,0.0222,-0.0026,0.0002,0.007
lightgbm,Light Gradient Boosting Machine,0.7428,0.655,0.1821,0.3261,0.2217,0.1004,0.1058,0.011
ada,Ada Boost Classifier,0.7425,0.615,0.2286,0.3783,0.2738,0.1352,0.1441,0.018
dt,Decision Tree Classifier,0.7256,0.5873,0.3179,0.3696,0.3352,0.1697,0.1709,0.007
lr,Logistic Regression,0.6938,0.432,0.05,0.0235,0.032,-0.0933,-0.115,0.027


Unnamed: 0,Description,Value
0,Session id,3110
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(65, 15)"
4,Transformed data shape,"(65, 15)"
5,Transformed train set shape,"(45, 15)"
6,Transformed test set shape,"(20, 15)"
7,Numeric features,14
8,Rows with missing values,7.7%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.71,0.7375,0.65,0.7417,0.6467,0.4208,0.4379,0.041
gbc,Gradient Boosting Classifier,0.705,0.6625,0.7,0.6833,0.6567,0.4177,0.4431,0.01
dt,Decision Tree Classifier,0.69,0.6875,0.65,0.6167,0.6,0.3643,0.3859,0.005
ada,Ada Boost Classifier,0.69,0.6708,0.7,0.65,0.6367,0.3844,0.4175,0.012
knn,K Neighbors Classifier,0.66,0.6792,0.75,0.6233,0.6338,0.3328,0.3755,0.011
lda,Linear Discriminant Analysis,0.65,0.5542,0.75,0.5567,0.6171,0.317,0.3413,0.005
rf,Random Forest Classifier,0.645,0.6292,0.55,0.675,0.5667,0.2874,0.2969,0.043
lr,Logistic Regression,0.6,0.7042,0.0,0.0,0.0,0.0,0.0,0.024
dummy,Dummy Classifier,0.6,0.5,0.0,0.0,0.0,0.0,0.0,0.007
lightgbm,Light Gradient Boosting Machine,0.595,0.6167,0.15,0.15,0.1333,0.0712,0.0856,0.007


Unnamed: 0,Description,Value
0,Session id,1766
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(294, 15)"
4,Transformed data shape,"(294, 15)"
5,Transformed train set shape,"(205, 15)"
6,Transformed test set shape,"(89, 15)"
7,Numeric features,14
8,Rows with missing values,13.3%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dummy,Dummy Classifier,0.7464,0.5,0.0,0.0,0.0,0.0,0.0,0.005
lda,Linear Discriminant Analysis,0.7367,0.4362,0.0,0.0,0.0,-0.0177,-0.0257,0.006
rf,Random Forest Classifier,0.7219,0.4441,0.0,0.0,0.0,-0.043,-0.0575,0.056
lr,Logistic Regression,0.7171,0.5481,0.1133,0.2375,0.1283,0.0315,0.0372,0.023
knn,K Neighbors Classifier,0.6931,0.5061,0.0933,0.275,0.1341,-0.008,-0.0005,0.01
et,Extra Trees Classifier,0.6876,0.5513,0.04,0.0667,0.05,-0.0675,-0.0864,0.042
svm,SVM - Linear Kernel,0.6831,0.0,0.2333,0.2241,0.208,0.0571,0.0529,0.004
gbc,Gradient Boosting Classifier,0.6731,0.5055,0.0767,0.2,0.1016,-0.0575,-0.058,0.019
lightgbm,Light Gradient Boosting Machine,0.6731,0.5147,0.12,0.1483,0.1317,-0.0289,-0.0406,0.008
ada,Ada Boost Classifier,0.6388,0.4897,0.0933,0.115,0.103,-0.1018,-0.1136,0.013



 67%|██████████████████████████████████████████████████▋                         | 2/3 [00:54<00:26, 26.92s/it][A

Unnamed: 0,Description,Value
0,Session id,326
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(263, 15)"
4,Transformed data shape,"(263, 15)"
5,Transformed train set shape,"(184, 15)"
6,Transformed test set shape,"(79, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.5985,0.6244,0.4375,0.5781,0.4842,0.1641,0.1749,0.059
lightgbm,Light Gradient Boosting Machine,0.5927,0.61,0.5,0.5311,0.509,0.1623,0.1659,0.007
dt,Decision Tree Classifier,0.5863,0.5809,0.55,0.519,0.5282,0.1596,0.1636,0.005
ada,Ada Boost Classifier,0.5825,0.5922,0.525,0.5008,0.5048,0.1438,0.1495,0.014
et,Extra Trees Classifier,0.5825,0.6093,0.4875,0.5301,0.4921,0.1441,0.1489,0.04
gbc,Gradient Boosting Classifier,0.5713,0.6148,0.4625,0.5074,0.4762,0.1159,0.1197,0.017
dummy,Dummy Classifier,0.5649,0.5,0.0,0.0,0.0,0.0,0.0,0.005
lda,Linear Discriminant Analysis,0.555,0.5784,0.25,0.4607,0.3079,0.0414,0.0421,0.004
lr,Logistic Regression,0.5538,0.6199,0.1875,0.4567,0.2526,0.025,0.0403,0.024
svm,SVM - Linear Kernel,0.5532,0.0,0.3625,0.4882,0.3055,0.055,0.0969,0.004


Unnamed: 0,Description,Value
0,Session id,8284
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(82, 15)"
4,Transformed data shape,"(82, 15)"
5,Transformed train set shape,"(57, 15)"
6,Transformed test set shape,"(25, 15)"
7,Numeric features,14
8,Rows with missing values,3.7%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.7867,0.7125,0.85,0.8617,0.8433,0.425,0.4342,0.011
et,Extra Trees Classifier,0.7567,0.7312,0.825,0.8317,0.8202,0.3534,0.3702,0.035
rf,Random Forest Classifier,0.7233,0.7125,0.825,0.7867,0.7984,0.2688,0.2877,0.045
gbc,Gradient Boosting Classifier,0.7167,0.875,0.775,0.8283,0.7829,0.3083,0.347,0.011
dummy,Dummy Classifier,0.7067,0.5,1.0,0.7067,0.8267,0.0,0.0,0.004
dt,Decision Tree Classifier,0.7033,0.6625,0.725,0.8517,0.7647,0.2834,0.3202,0.005
lr,Logistic Regression,0.62,0.4625,0.8,0.7067,0.7467,-0.0143,-0.0265,0.025
lightgbm,Light Gradient Boosting Machine,0.62,0.65,0.8,0.7,0.74,0.0,0.0,0.006
knn,K Neighbors Classifier,0.6,0.5312,0.8,0.6783,0.7294,-0.0964,-0.0934,0.011
qda,Quadratic Discriminant Analysis,0.58,0.5,0.7,0.4933,0.5778,0.0,0.0,0.007


Unnamed: 0,Description,Value
0,Session id,1596
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(110, 15)"
4,Transformed data shape,"(110, 15)"
5,Transformed train set shape,"(76, 15)"
6,Transformed test set shape,"(34, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.6375,0.6583,0.6583,0.5767,0.6046,0.2846,0.2816,0.012
dt,Decision Tree Classifier,0.6339,0.6292,0.6083,0.5988,0.5882,0.2546,0.269,0.005
rf,Random Forest Classifier,0.5946,0.5198,0.5167,0.575,0.525,0.1963,0.2072,0.046
ridge,Ridge Classifier,0.55,0.0,0.5417,0.4579,0.4547,0.0964,0.1088,0.004
lightgbm,Light Gradient Boosting Machine,0.5446,0.5438,0.525,0.51,0.4956,0.1078,0.0817,0.006
qda,Quadratic Discriminant Analysis,0.5286,0.5,0.1,0.05,0.0667,0.0,0.0,0.008
ada,Ada Boost Classifier,0.5286,0.5771,0.5583,0.5017,0.5183,0.0583,0.0625,0.013
dummy,Dummy Classifier,0.5286,0.5,0.0,0.0,0.0,0.0,0.0,0.006
nb,Naive Bayes,0.5107,0.3781,0.875,0.45,0.5921,0.0449,0.0756,0.005
et,Extra Trees Classifier,0.5054,0.4406,0.4083,0.4667,0.4081,0.0117,0.0305,0.037


Unnamed: 0,Description,Value
0,Session id,3435
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(11, 15)"
4,Transformed data shape,"(11, 15)"
5,Transformed train set shape,"(7, 15)"
6,Transformed test set shape,"(4, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Description,Value
0,Session id,7410
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(100, 15)"
4,Transformed data shape,"(100, 15)"
5,Transformed train set shape,"(69, 15)"
6,Transformed test set shape,"(31, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dt,Decision Tree Classifier,0.5952,0.5775,0.5,0.5,0.4786,0.1531,0.1585,0.013
dummy,Dummy Classifier,0.5952,0.5,0.0,0.0,0.0,0.0,0.0,0.005
qda,Quadratic Discriminant Analysis,0.581,0.5,0.1,0.0429,0.06,0.0,0.0,0.007
lightgbm,Light Gradient Boosting Machine,0.5762,0.5125,0.4167,0.4,0.3867,0.1228,0.1241,0.005
rf,Random Forest Classifier,0.5619,0.5192,0.4,0.4667,0.4057,0.0901,0.0962,0.042
lda,Linear Discriminant Analysis,0.5595,0.3975,0.35,0.57,0.3957,0.0835,0.0914,0.004
svm,SVM - Linear Kernel,0.5524,0.0,0.2667,0.3167,0.2778,-0.0014,-0.0246,0.005
gbc,Gradient Boosting Classifier,0.5476,0.5067,0.4667,0.4417,0.4217,0.0805,0.0944,0.011
et,Extra Trees Classifier,0.519,0.5225,0.3833,0.375,0.3648,-0.0045,-0.0138,0.037
ada,Ada Boost Classifier,0.5048,0.5283,0.4667,0.39,0.4129,0.0048,-0.0088,0.012



100%|████████████████████████████████████████████████████████████████████████████| 3/3 [01:13<00:00, 24.58s/it][A
 50%|██████████████████████████████████████                                      | 1/2 [01:13<01:13, 73.75s/it]
  0%|                                                                                    | 0/3 [00:00<?, ?it/s][A

Unnamed: 0,Description,Value
0,Session id,5961
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(1800, 15)"
4,Transformed data shape,"(1800, 15)"
5,Transformed train set shape,"(1259, 15)"
6,Transformed test set shape,"(541, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.5632,0.5688,0.3923,0.52,0.4458,0.0991,0.1008,0.076
rf,Random Forest Classifier,0.556,0.5561,0.3904,0.5155,0.4418,0.0856,0.0886,0.089
lda,Linear Discriminant Analysis,0.5528,0.5406,0.1979,0.5186,0.2844,0.0476,0.0594,0.006
ridge,Ridge Classifier,0.552,0.0,0.2066,0.5157,0.293,0.0476,0.0584,0.006
lr,Logistic Regression,0.5481,0.5363,0.2801,0.5051,0.3484,0.0522,0.0592,0.022
dummy,Dummy Classifier,0.5465,0.5,0.0,0.0,0.0,0.0,0.0,0.006
dt,Decision Tree Classifier,0.5449,0.545,0.5273,0.4987,0.5119,0.0862,0.0865,0.007
lightgbm,Light Gradient Boosting Machine,0.5322,0.5412,0.4219,0.4797,0.4479,0.0457,0.0457,0.024
gbc,Gradient Boosting Classifier,0.5258,0.5288,0.3395,0.4656,0.3915,0.0199,0.02,0.081
ada,Ada Boost Classifier,0.525,0.5169,0.359,0.4674,0.4051,0.0221,0.0222,0.026


Unnamed: 0,Description,Value
0,Session id,861
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(323, 15)"
4,Transformed data shape,"(323, 15)"
5,Transformed train set shape,"(226, 15)"
6,Transformed test set shape,"(97, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.6324,0.6577,0.6265,0.6577,0.6377,0.2637,0.2678,0.044
lightgbm,Light Gradient Boosting Machine,0.6283,0.6491,0.6614,0.6388,0.6482,0.2554,0.2564,0.008
gbc,Gradient Boosting Classifier,0.6105,0.6263,0.6705,0.6225,0.6426,0.2131,0.2147,0.018
rf,Random Forest Classifier,0.5755,0.6472,0.628,0.5934,0.6069,0.1463,0.1495,0.05
ridge,Ridge Classifier,0.5488,0.0,0.6242,0.5857,0.5768,0.0859,0.107,0.005
lr,Logistic Regression,0.5366,0.5562,0.5,0.556,0.5178,0.0753,0.0768,0.023
dt,Decision Tree Classifier,0.5304,0.5313,0.4758,0.5626,0.5096,0.0665,0.0679,0.005
dummy,Dummy Classifier,0.5221,0.5,1.0,0.5221,0.6859,0.0,0.0,0.005
svm,SVM - Linear Kernel,0.518,0.0,0.5826,0.51,0.4976,0.027,0.0168,0.004
qda,Quadratic Discriminant Analysis,0.5087,0.4867,0.9735,0.515,0.6735,-0.0268,-0.052,0.005


Unnamed: 0,Description,Value
0,Session id,7817
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(1107, 15)"
4,Transformed data shape,"(1107, 15)"
5,Transformed train set shape,"(774, 15)"
6,Transformed test set shape,"(333, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.5997,0.6479,0.5196,0.5839,0.5479,0.192,0.1936,0.072
lightgbm,Light Gradient Boosting Machine,0.5984,0.6217,0.5383,0.5814,0.5574,0.1908,0.1921,0.019
gbc,Gradient Boosting Classifier,0.5881,0.61,0.5276,0.5689,0.5461,0.1704,0.1714,0.05
et,Extra Trees Classifier,0.5803,0.611,0.4916,0.5643,0.5226,0.1518,0.154,0.057
ada,Ada Boost Classifier,0.5751,0.5857,0.4949,0.5563,0.5234,0.1426,0.1433,0.021
lda,Linear Discriminant Analysis,0.5672,0.5617,0.3443,0.585,0.4278,0.1138,0.1286,0.005
dt,Decision Tree Classifier,0.5467,0.546,0.4923,0.5168,0.502,0.0875,0.0874,0.006
knn,K Neighbors Classifier,0.5347,0.5357,0.4642,0.5074,0.483,0.062,0.0622,0.014
lr,Logistic Regression,0.5296,0.5065,0.0764,0.633,0.1318,0.0132,0.0474,0.029
qda,Quadratic Discriminant Analysis,0.5271,0.5,0.0,0.0,0.0,0.0,0.0,0.008


Unnamed: 0,Description,Value
0,Session id,1909
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(205, 15)"
4,Transformed data shape,"(205, 15)"
5,Transformed train set shape,"(143, 15)"
6,Transformed test set shape,"(62, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dt,Decision Tree Classifier,0.6857,0.6853,0.7367,0.7694,0.7399,0.3181,0.3266,0.005
rf,Random Forest Classifier,0.6648,0.64,0.8244,0.6998,0.7478,0.2111,0.2478,0.051
gbc,Gradient Boosting Classifier,0.6643,0.6987,0.8344,0.6993,0.7578,0.2019,0.2276,0.016
lightgbm,Light Gradient Boosting Machine,0.6638,0.7184,0.8122,0.7086,0.7496,0.2118,0.2291,0.007
lda,Linear Discriminant Analysis,0.659,0.5882,0.8922,0.6774,0.7671,0.1507,0.1931,0.005
et,Extra Trees Classifier,0.6576,0.643,0.8356,0.6879,0.751,0.1874,0.2139,0.038
dummy,Dummy Classifier,0.6433,0.5,1.0,0.6433,0.7828,0.0,0.0,0.004
lr,Logistic Regression,0.6362,0.4299,0.9889,0.6406,0.7773,-0.0135,-0.0207,0.019
knn,K Neighbors Classifier,0.6157,0.6404,0.7211,0.7228,0.6943,0.1393,0.1737,0.012
ada,Ada Boost Classifier,0.6014,0.65,0.7267,0.6761,0.6976,0.1037,0.1078,0.012


Unnamed: 0,Description,Value
0,Session id,7994
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(435, 15)"
4,Transformed data shape,"(435, 15)"
5,Transformed train set shape,"(304, 15)"
6,Transformed test set shape,"(131, 15)"
7,Numeric features,14
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
nb,Naive Bayes,0.6122,0.574,0.0341,0.4,0.0628,0.0161,0.0511,0.005
dummy,Dummy Classifier,0.6118,0.5,0.0,0.0,0.0,0.0,0.0,0.006
lda,Linear Discriminant Analysis,0.5987,0.5181,0.0341,0.2833,0.0597,-0.0095,-0.008,0.006
gbc,Gradient Boosting Classifier,0.5659,0.4748,0.3053,0.4157,0.3463,0.0377,0.0388,0.023
et,Extra Trees Classifier,0.5591,0.4871,0.2129,0.3824,0.2693,-0.0084,-0.0084,0.045
rf,Random Forest Classifier,0.5523,0.4614,0.1871,0.3574,0.2425,-0.0309,-0.0339,0.051
knn,K Neighbors Classifier,0.5363,0.5317,0.2811,0.3655,0.3151,-0.0215,-0.0238,0.01
lr,Logistic Regression,0.533,0.5338,0.603,0.3891,0.4713,0.0812,0.0813,0.026
ada,Ada Boost Classifier,0.5295,0.466,0.2856,0.3788,0.3166,-0.0296,-0.0273,0.015
lightgbm,Light Gradient Boosting Machine,0.5228,0.4697,0.2803,0.3442,0.3071,-0.046,-0.0492,0.009



 33%|█████████████████████████▎                                                  | 1/3 [00:25<00:51, 25.68s/it][A

Unnamed: 0,Description,Value
0,Session id,6646
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(828, 15)"
4,Transformed data shape,"(828, 15)"
5,Transformed train set shape,"(579, 15)"
6,Transformed test set shape,"(249, 15)"
7,Numeric features,14
8,Rows with missing values,5.0%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
nb,Naive Bayes,0.7064,0.4777,0.9754,0.7135,0.824,0.0524,0.0847,0.007
dummy,Dummy Classifier,0.7047,0.5,1.0,0.7047,0.8267,0.0,0.0,0.004
lda,Linear Discriminant Analysis,0.6961,0.4217,0.9755,0.7059,0.8189,0.0055,-0.001,0.005
et,Extra Trees Classifier,0.6908,0.5411,0.9215,0.7192,0.8075,0.0751,0.0904,0.066
lr,Logistic Regression,0.6839,0.4646,0.9581,0.7019,0.8098,-0.0165,-0.0317,0.034
rf,Random Forest Classifier,0.677,0.5334,0.9118,0.7115,0.7987,0.0355,0.0447,0.073
lightgbm,Light Gradient Boosting Machine,0.6632,0.5327,0.8603,0.7181,0.7824,0.0596,0.0627,0.016
qda,Quadratic Discriminant Analysis,0.6599,0.5206,0.8924,0.7009,0.7737,0.0011,-0.0075,0.005
ada,Ada Boost Classifier,0.6511,0.4795,0.8627,0.7069,0.7765,0.0116,0.0155,0.024
knn,K Neighbors Classifier,0.6459,0.5009,0.8555,0.7054,0.7727,-0.001,-0.0032,0.013


Unnamed: 0,Description,Value
0,Session id,730
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(203, 15)"
4,Transformed data shape,"(203, 15)"
5,Transformed train set shape,"(142, 15)"
6,Transformed test set shape,"(61, 15)"
7,Numeric features,14
8,Rows with missing values,25.1%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.8319,0.7275,0.9576,0.8573,0.903,0.2274,0.2457,0.014
lightgbm,Light Gradient Boosting Machine,0.8243,0.7747,0.9477,0.8557,0.898,0.2072,0.214,0.006
qda,Quadratic Discriminant Analysis,0.8171,0.5,1.0,0.8171,0.899,0.0,0.0,0.008
dummy,Dummy Classifier,0.8171,0.5,1.0,0.8171,0.899,0.0,0.0,0.004
et,Extra Trees Classifier,0.8105,0.6898,0.922,0.8606,0.8883,0.2141,0.2142,0.04
rf,Random Forest Classifier,0.7957,0.7513,0.9477,0.8286,0.8826,0.0682,0.0674,0.05
ada,Ada Boost Classifier,0.7757,0.6639,0.8788,0.8519,0.8639,0.1884,0.1874,0.014
knn,K Neighbors Classifier,0.7752,0.6613,0.9477,0.8077,0.8714,-0.0623,-0.0667,0.012
lda,Linear Discriminant Analysis,0.7752,0.5424,0.9485,0.8088,0.8725,-0.0673,-0.0783,0.006
dt,Decision Tree Classifier,0.761,0.658,0.8159,0.8815,0.8445,0.2638,0.2755,0.006


Unnamed: 0,Description,Value
0,Session id,3810
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(505, 15)"
4,Transformed data shape,"(505, 15)"
5,Transformed train set shape,"(353, 15)"
6,Transformed test set shape,"(152, 15)"
7,Numeric features,14
8,Rows with missing values,10.3%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dummy,Dummy Classifier,0.731,0.5,1.0,0.731,0.8445,0.0,0.0,0.004
lda,Linear Discriminant Analysis,0.7194,0.6117,0.9769,0.7307,0.8358,-0.006,-0.0186,0.005
rf,Random Forest Classifier,0.6998,0.5493,0.9305,0.7321,0.8192,0.003,0.0,0.073
gbc,Gradient Boosting Classifier,0.6942,0.5255,0.8954,0.7405,0.8093,0.0493,0.061,0.026
qda,Quadratic Discriminant Analysis,0.6865,0.5,0.9,0.6587,0.7606,0.0,0.0,0.007
et,Extra Trees Classifier,0.6855,0.5571,0.8838,0.7386,0.8039,0.0371,0.0439,0.048
nb,Naive Bayes,0.6723,0.5292,0.8885,0.689,0.7598,-0.0306,-0.0608,0.005
lightgbm,Light Gradient Boosting Machine,0.6713,0.5301,0.8565,0.7361,0.79,0.0303,0.0326,0.01
knn,K Neighbors Classifier,0.6658,0.5583,0.8755,0.7239,0.7918,-0.036,-0.0302,0.01
ada,Ada Boost Classifier,0.6573,0.5536,0.8452,0.7292,0.7818,-0.0118,-0.0128,0.017


Unnamed: 0,Description,Value
0,Session id,2796
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(65, 15)"
4,Transformed data shape,"(65, 15)"
5,Transformed train set shape,"(45, 15)"
6,Transformed test set shape,"(20, 15)"
7,Numeric features,14
8,Rows with missing values,7.7%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.78,0.7,0.9417,0.805,0.8639,0.2386,0.2342,0.039
knn,K Neighbors Classifier,0.765,0.6667,0.9,0.8317,0.8448,0.2922,0.3,0.012
rf,Random Forest Classifier,0.755,0.725,0.9167,0.7983,0.8448,0.2303,0.2258,0.045
lda,Linear Discriminant Analysis,0.745,0.575,0.8333,0.8417,0.8264,0.3068,0.3123,0.005
ridge,Ridge Classifier,0.735,0.0,0.8333,0.86,0.8036,0.3209,0.3629,0.004
lightgbm,Light Gradient Boosting Machine,0.735,0.7208,0.9667,0.7517,0.8405,0.0667,0.0667,0.006
dummy,Dummy Classifier,0.735,0.5,1.0,0.735,0.8452,0.0,0.0,0.004
ada,Ada Boost Classifier,0.725,0.5167,0.8417,0.8133,0.8165,0.2015,0.2115,0.012
qda,Quadratic Discriminant Analysis,0.715,0.5,0.9,0.675,0.7702,0.0,0.0,0.007
lr,Logistic Regression,0.695,0.225,0.9333,0.7083,0.8036,-0.0667,-0.0667,0.02


Unnamed: 0,Description,Value
0,Session id,5576
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(294, 15)"
4,Transformed data shape,"(294, 15)"
5,Transformed train set shape,"(205, 15)"
6,Transformed test set shape,"(89, 15)"
7,Numeric features,14
8,Rows with missing values,13.3%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dummy,Dummy Classifier,0.7271,0.5,1.0,0.7271,0.8419,0.0,0.0,0.004
lr,Logistic Regression,0.7221,0.5189,0.9867,0.7281,0.8375,0.0034,0.0021,0.02
lda,Linear Discriminant Analysis,0.7079,0.5456,0.9595,0.7265,0.8265,-0.0132,-0.0275,0.005
knn,K Neighbors Classifier,0.6988,0.546,0.8867,0.7466,0.809,0.1077,0.12,0.012
rf,Random Forest Classifier,0.6979,0.5528,0.9062,0.7385,0.8125,0.0666,0.0792,0.058
et,Extra Trees Classifier,0.6974,0.5563,0.899,0.7407,0.8113,0.0769,0.0919,0.04
ada,Ada Boost Classifier,0.6821,0.6042,0.8381,0.7553,0.7927,0.1055,0.1123,0.014
qda,Quadratic Discriminant Analysis,0.6795,0.4867,0.9067,0.689,0.7697,-0.0167,-0.0344,0.009
nb,Naive Bayes,0.6698,0.5224,0.8933,0.6863,0.763,-0.0346,-0.0618,0.005
lightgbm,Light Gradient Boosting Machine,0.625,0.5177,0.7981,0.7174,0.7522,-0.0367,-0.0374,0.007



 67%|██████████████████████████████████████████████████▋                         | 2/3 [00:49<00:24, 24.70s/it][A

Unnamed: 0,Description,Value
0,Session id,2763
1,Target,Closed_In_2Yrs
2,Target type,Multiclass
3,Target mapping,1: 0
4,Original data shape,"(263, 15)"
5,Transformed data shape,"(263, 15)"
6,Transformed train set shape,"(184, 15)"
7,Transformed test set shape,"(79, 15)"
8,Numeric features,14
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.022
nb,Naive Bayes,1.0,0.0,1.0,1.0,1.0,,0.0,0.006
dt,Decision Tree Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
ridge,Ridge Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.005
rf,Random Forest Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.043
ada,Ada Boost Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.006
lda,Linear Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,,0.0,0.006
et,Extra Trees Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.037
lightgbm,Light Gradient Boosting Machine,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
dummy,Dummy Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.006


Unnamed: 0,Description,Value
0,Session id,2988
1,Target,Closed_In_2Yrs
2,Target type,Multiclass
3,Target mapping,1: 0
4,Original data shape,"(82, 15)"
5,Transformed data shape,"(82, 15)"
6,Transformed train set shape,"(57, 15)"
7,Transformed test set shape,"(25, 15)"
8,Numeric features,14
9,Rows with missing values,3.7%


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.022
nb,Naive Bayes,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
dt,Decision Tree Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
ridge,Ridge Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.006
rf,Random Forest Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.042
ada,Ada Boost Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.006
lda,Linear Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
et,Extra Trees Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.037
lightgbm,Light Gradient Boosting Machine,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
dummy,Dummy Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.006


Unnamed: 0,Description,Value
0,Session id,8105
1,Target,Closed_In_2Yrs
2,Target type,Multiclass
3,Target mapping,1: 0
4,Original data shape,"(110, 15)"
5,Transformed data shape,"(110, 15)"
6,Transformed train set shape,"(76, 15)"
7,Transformed test set shape,"(34, 15)"
8,Numeric features,14
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.022
nb,Naive Bayes,1.0,0.0,1.0,1.0,1.0,,0.0,0.01
dt,Decision Tree Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
ridge,Ridge Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
rf,Random Forest Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.044
ada,Ada Boost Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
lda,Linear Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,,0.0,0.006
et,Extra Trees Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.034
lightgbm,Light Gradient Boosting Machine,1.0,0.0,1.0,1.0,1.0,,0.0,0.006
dummy,Dummy Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007


Unnamed: 0,Description,Value
0,Session id,1338
1,Target,Closed_In_2Yrs
2,Target type,Multiclass
3,Target mapping,1: 0
4,Original data shape,"(11, 15)"
5,Transformed data shape,"(11, 15)"
6,Transformed train set shape,"(7, 15)"
7,Transformed test set shape,"(4, 15)"
8,Numeric features,14
9,Preprocess,True


Unnamed: 0,Description,Value
0,Session id,6139
1,Target,Closed_In_2Yrs
2,Target type,Multiclass
3,Target mapping,1: 0
4,Original data shape,"(100, 15)"
5,Transformed data shape,"(100, 15)"
6,Transformed train set shape,"(69, 15)"
7,Transformed test set shape,"(31, 15)"
8,Numeric features,14
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.022
nb,Naive Bayes,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
dt,Decision Tree Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
ridge,Ridge Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.006
rf,Random Forest Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.043
ada,Ada Boost Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
lda,Linear Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,,0.0,0.006
et,Extra Trees Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.035
lightgbm,Light Gradient Boosting Machine,1.0,0.0,1.0,1.0,1.0,,0.0,0.006
dummy,Dummy Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.006



100%|████████████████████████████████████████████████████████████████████████████| 3/3 [01:07<00:00, 22.46s/it][A
100%|████████████████████████████████████████████████████████████████████████████| 2/2 [02:21<00:00, 70.56s/it]


In [28]:
aggregated_result_path = EXP_RESULT_PATH / 'result.csv'

if not os.path.exists(aggregated_result_path) :
    exp_result.to_csv(aggregated_result_path)
    
exp_result

Unnamed: 0,model_name,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec),target,x_data_yaer,대분류
0,rf,Random Forest Classifier,0.8777,0.5934,0.1085,0.7133,0.1827,0.1548,0.2398,0.095,Closed_In_1Yr,2018,제조업(10~34)
1,et,Extra Trees Classifier,0.8745,0.6159,0.1210,0.5817,0.1933,0.1587,0.2192,0.068,Closed_In_1Yr,2018,제조업(10~34)
2,lightgbm,Light Gradient Boosting Machine,0.8721,0.6308,0.1335,0.5369,0.2083,0.1675,0.2178,0.053,Closed_In_1Yr,2018,제조업(10~34)
3,qda,Quadratic Discriminant Analysis,0.8697,0.5000,0.0000,0.0000,0.0000,0.0000,0.0000,0.010,Closed_In_1Yr,2018,제조업(10~34)
4,dummy,Dummy Classifier,0.8697,0.5000,0.0000,0.0000,0.0000,0.0000,0.0000,0.006,Closed_In_1Yr,2018,제조업(10~34)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,ada,Ada Boost Classifier,1.0000,0.0000,1.0000,1.0000,1.0000,,0.0000,0.007,Closed_In_2Yrs,2020,건설업(41~42)
6,lda,Linear Discriminant Analysis,1.0000,0.0000,1.0000,1.0000,1.0000,,0.0000,0.006,Closed_In_2Yrs,2020,건설업(41~42)
7,et,Extra Trees Classifier,1.0000,0.0000,1.0000,1.0000,1.0000,,0.0000,0.035,Closed_In_2Yrs,2020,건설업(41~42)
8,lightgbm,Light Gradient Boosting Machine,1.0000,0.0000,1.0000,1.0000,1.0000,,0.0000,0.006,Closed_In_2Yrs,2020,건설업(41~42)
