# Prerequisite

Following packages must be installed to run codes below

```bash
$ pip3 install openpyxl 
```


In [1]:
%load_ext autoreload
%autoreload 2

# Add project path

In [2]:
import os
import sys
from pathlib import Path

PROJECT_PATH = Path().resolve().parent.parent
SRC_PATH = PROJECT_PATH / 'src'

sys.path.append(str(SRC_PATH))

In [3]:
exp_name = 'feature_seleciton_v1'

# Load data

In [4]:
from data_loader import SectorDataLoader

data_loader = SectorDataLoader()
data_finance = (
    data_loader
    .load_dataset()
    .pipe(data_loader.add_beaver_indicator)
    .pipe(data_loader.add_label)
)
data_finance.head()

Unnamed: 0,BIZ_NO,CMP_PFIX_NM,CMP_NM,CMP_NM1,CMP_SFIX_NM,CMP_ENM,BZ_TYP,CMP_TYP,CMP_SCL,PBCO_GB,...,STAT_OCR_DATE_y,Closed_Year,유동자산/부채총계,당기순이익(손실)/자산총계,부채총계/자산총계,순운전자본/자산총계,유동부채/유동자산,Years_From_Closed_Year_To_FS,Closed_In_1Yr,Closed_In_2Yrs
117,1018135422,(주),한국선박기술,,,Korea Marine Time Service,M,,2,2,...,20200930.0,2020.0,1.031451,0.038773,0.578094,0.543045,0.089273,2.0,0,1
219,1018154206,(주),드림미즈,,,"dreammiz Co., Ltd.",M,,2,2,...,20201231.0,2020.0,1.22004,0.001177,0.662356,0.608449,0.247063,2.0,0,1
279,1018163684,,디에프에스서울,,(주),DFS Seoul Ltd.,M,,2,2,...,20190917.0,2019.0,11.012914,0.046666,0.090794,0.909114,0.090802,1.0,1,1
339,1018178760,(주),대교디앤씨,,,"DAEGYO D & C CO.,LTD.",M,,2,2,...,20191010.0,2019.0,0.916364,-0.026128,1.091269,0.097514,0.902486,1.0,1,1
406,1018194173,,에코에너지,,(주),"Eco Energy Co.,Ltd.",M,,2,2,...,20200831.0,2020.0,1.183355,-0.062095,0.828142,0.194625,0.8014,2.0,0,1


In [5]:
data = data_finance

# Baseline classifier using Beaver's features

In [6]:
from project_paths import DATA_PATH

sectors = [
    '제조업(10~34)',
    '부동산업(68)',
    '도매 및 소매업(45~47)',
    '숙박 및 음식점업(55~56)',
    '건설업(41~42)'
]

beaver_features = [
    '유동자산/부채총계',
    '당기순이익(손실)/자산총계',
    '부채총계/자산총계',
    '순운전자본/자산총계',
    '유동부채/유동자산'
]

extended_financial_features = [
    '유동자산',
    '매출채권',
    '비유동자산',
    '유형자산',
    '자산총계',
    '유동부채',
    '비유동부채',
    '부  채  총  계',
    '자본금',
    '이익잉여금(결손금）',
    '자본총계',
    '매출액',
    '판매비와관리비',
    '영업이익（손실）',
    '법인세비용차감전순손익',
    '법인세비용',
    '당기순이익(손실)',
    '기업순이익률(%)',
    '유보액/총자산(%)',
    '유보액/납입자본(%)',
    '매출액총이익률(%)',
    '매출액영업이익률(%)',
    '매출액순이익률(%)',
    '수지비율(%)',
    '경상수지비율',
    '영업비율(%)',
    '금융비용대매출액비율(%',
    '금융비용대부채비율(%)',
    '금융비용대총비용비율(%',
    '부채비율(%)',
    '차입금의존도(%)',
    '자기자본비율(%)',
    '순운전자본비율(%)',
    '유동부채비율(%)',
    '비유동부채비율(%)',
    '부채총계대 매출액(%)',
    '총자본회전율(회)',
    '재고자산회전율(회)',
    '매출채권회전율(회)',
    '매입채무회전율(회)',
    '미수금',
    '매출원가',
    '무형자산',
    '재고자산',
]

years_to_close = [
    'Closed_In_1Yr',
    'Closed_In_2Yrs',
]

x_data_year = list(range(2018, 2021))

EXP_RESULT_PATH = DATA_PATH / 'experiment_result' / exp_name

if not os.path.exists(EXP_RESULT_PATH) : 
    os.mkdir(EXP_RESULT_PATH)

In [7]:
import numpy as np
import pandas as pd
from pycaret.classification import ClassificationExperiment
from tqdm import tqdm

In [8]:
exp_result = None

for target_years_to_close in tqdm(years_to_close) : 
    for x_data_target_year in tqdm(x_data_year, leave=True) : 
        for sector in sectors : 

            filename = EXP_RESULT_PATH / f'Beaver_baseline_{sector}_{target_years_to_close}_{x_data_target_year}.csv'

            if not os.path.exists(filename) : 

                data_to_train = (
                    data
                    .loc[~data[beaver_features[0]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[~data[beaver_features[1]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[~data[beaver_features[2]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[~data[beaver_features[3]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[~data[beaver_features[4]].isin([np.nan, np.inf, -np.inf]), :]
                    .loc[lambda x : pd.to_datetime(x['결산년월'], format='%Y%m%d').dt.year==x_data_target_year]
                    .loc[lambda x : x['대분류']==sector]
                )

                exp = ClassificationExperiment()

                exp.setup(
                    (
                        data_to_train
                        .loc[:, beaver_features+extended_financial_features+[target_years_to_close]]
                        .reset_index(drop=True)
                    ),
                    target=target_years_to_close
                )

                models = exp.compare_models()

                result = (
                    exp.pull()
                    .assign(target=target_years_to_close)
                    .assign(x_data_yaer=x_data_target_year)
                    .assign(대분류=sector)
                    .reset_index(drop=False)
                    .rename(columns={'index':'model_name'})
                )
                result.to_csv(filename)

            else : 
                result = pd.read_csv(filename)

            if exp_result is None : 
                exp_result = result
            else : 
                exp_result = pd.concat([exp_result, result], axis=0)

  0%|                                                                                    | 0/2 [00:00<?, ?it/s]
  0%|                                                                                    | 0/3 [00:00<?, ?it/s][A

Unnamed: 0,Description,Value
0,Session id,5814
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(1800, 50)"
4,Transformed data shape,"(1800, 50)"
5,Transformed train set shape,"(1259, 50)"
6,Transformed test set shape,"(541, 50)"
7,Numeric features,49
8,Rows with missing values,84.8%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.8769,0.69,0.1349,0.6762,0.2162,0.181,0.2532,0.119
et,Extra Trees Classifier,0.8745,0.6817,0.1349,0.6492,0.2148,0.1766,0.2444,0.083
lightgbm,Light Gradient Boosting Machine,0.8713,0.6923,0.1221,0.5667,0.1974,0.1568,0.2146,0.107
lr,Logistic Regression,0.8697,0.5062,0.0,0.0,0.0,0.0,0.0,0.228
qda,Quadratic Discriminant Analysis,0.8697,0.5,0.0,0.0,0.0,0.0,0.0,0.012
dummy,Dummy Classifier,0.8697,0.5,0.0,0.0,0.0,0.0,0.0,0.009
gbc,Gradient Boosting Classifier,0.8681,0.6823,0.1221,0.5142,0.1884,0.1453,0.1922,0.229
ridge,Ridge Classifier,0.8626,0.0,0.0419,0.3483,0.0717,0.0429,0.0767,0.007
lda,Linear Discriminant Analysis,0.8554,0.5946,0.0426,0.1783,0.0671,0.0282,0.0332,0.008
ada,Ada Boost Classifier,0.8546,0.6869,0.0735,0.2804,0.1102,0.0623,0.0796,0.067


Unnamed: 0,Description,Value
0,Session id,4026
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(323, 50)"
4,Transformed data shape,"(323, 50)"
5,Transformed train set shape,"(226, 50)"
6,Transformed test set shape,"(97, 50)"
7,Numeric features,49
8,Rows with missing values,98.5%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.8227,0.6798,0.3,0.65,0.3988,0.3164,0.3521,0.046
rf,Random Forest Classifier,0.8097,0.6751,0.235,0.6,0.3254,0.2477,0.2914,0.067
lightgbm,Light Gradient Boosting Machine,0.7964,0.6601,0.255,0.5667,0.3421,0.2396,0.2719,0.012
dummy,Dummy Classifier,0.7923,0.5,0.0,0.0,0.0,0.0,0.0,0.007
lda,Linear Discriminant Analysis,0.7528,0.6081,0.22,0.2667,0.2305,0.1113,0.1106,0.006
gbc,Gradient Boosting Classifier,0.7476,0.669,0.27,0.3467,0.3002,0.1516,0.1539,0.039
ada,Ada Boost Classifier,0.7437,0.6128,0.22,0.3019,0.2421,0.1071,0.1088,0.024
lr,Logistic Regression,0.7344,0.5577,0.24,0.3126,0.265,0.114,0.1143,0.059
knn,K Neighbors Classifier,0.7215,0.5556,0.08,0.25,0.1111,-0.0291,-0.0131,0.015
ridge,Ridge Classifier,0.6988,0.0,0.41,0.3359,0.344,0.1627,0.1762,0.005


Unnamed: 0,Description,Value
0,Session id,5060
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(1107, 50)"
4,Transformed data shape,"(1107, 50)"
5,Transformed train set shape,"(774, 50)"
6,Transformed test set shape,"(333, 50)"
7,Numeric features,49
8,Rows with missing values,83.2%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
qda,Quadratic Discriminant Analysis,0.8514,0.5,0.0,0.0,0.0,0.0,0.0,0.009
dummy,Dummy Classifier,0.8514,0.5,0.0,0.0,0.0,0.0,0.0,0.009
lr,Logistic Regression,0.8502,0.5222,0.0,0.0,0.0,-0.0024,-0.0049,0.056
et,Extra Trees Classifier,0.8489,0.6708,0.1742,0.485,0.2539,0.1904,0.2223,0.064
rf,Random Forest Classifier,0.8464,0.6939,0.1568,0.4933,0.2346,0.1715,0.2095,0.086
lightgbm,Light Gradient Boosting Machine,0.8386,0.6619,0.1477,0.3933,0.211,0.142,0.1644,0.056
knn,K Neighbors Classifier,0.8373,0.6097,0.0886,0.4508,0.1354,0.0825,0.1275,0.017
gbc,Gradient Boosting Classifier,0.832,0.6834,0.1735,0.3545,0.2304,0.1503,0.1627,0.136
lda,Linear Discriminant Analysis,0.8282,0.6498,0.078,0.215,0.1119,0.0487,0.0559,0.009
ada,Ada Boost Classifier,0.823,0.6352,0.1742,0.3092,0.2205,0.1331,0.1395,0.04


Unnamed: 0,Description,Value
0,Session id,5664
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(205, 50)"
4,Transformed data shape,"(205, 50)"
5,Transformed train set shape,"(143, 50)"
6,Transformed test set shape,"(62, 50)"
7,Numeric features,49
8,Rows with missing values,67.3%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
qda,Quadratic Discriminant Analysis,0.7695,0.6565,0.4167,0.6,0.4562,0.3263,0.3503,0.007
et,Extra Trees Classifier,0.7419,0.7684,0.3917,0.4567,0.4079,0.256,0.2608,0.051
gbc,Gradient Boosting Classifier,0.7367,0.7262,0.3917,0.4717,0.4025,0.2518,0.2589,0.029
rf,Random Forest Classifier,0.7352,0.7539,0.3667,0.45,0.3871,0.2333,0.2413,0.057
dummy,Dummy Classifier,0.7343,0.5,0.0,0.0,0.0,0.0,0.0,0.006
lightgbm,Light Gradient Boosting Machine,0.7205,0.7473,0.45,0.4367,0.4271,0.2535,0.2586,0.009
lr,Logistic Regression,0.72,0.5158,0.1833,0.3533,0.2269,0.119,0.135,0.059
dt,Decision Tree Classifier,0.7129,0.6699,0.5417,0.505,0.4835,0.2941,0.3178,0.006
knn,K Neighbors Classifier,0.7057,0.7182,0.4833,0.45,0.4517,0.262,0.2666,0.015
ada,Ada Boost Classifier,0.701,0.63,0.4917,0.4505,0.4262,0.2422,0.2666,0.021


Unnamed: 0,Description,Value
0,Session id,5977
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(435, 50)"
4,Transformed data shape,"(435, 50)"
5,Transformed train set shape,"(304, 50)"
6,Transformed test set shape,"(131, 50)"
7,Numeric features,49
8,Rows with missing values,94.9%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dummy,Dummy Classifier,0.9113,0.5,0.0,0.0,0.0,0.0,0.0,0.007
lr,Logistic Regression,0.9016,0.5062,0.0,0.0,0.0,-0.0135,-0.0146,0.056
rf,Random Forest Classifier,0.9015,0.7478,0.0833,0.15,0.1,0.0765,0.083,0.066
et,Extra Trees Classifier,0.8948,0.6614,0.0833,0.15,0.1,0.0697,0.0751,0.05
knn,K Neighbors Classifier,0.8915,0.5329,0.0,0.0,0.0,-0.0245,-0.0262,0.014
gbc,Gradient Boosting Classifier,0.8885,0.6915,0.1833,0.2417,0.1952,0.1473,0.1546,0.062
lightgbm,Light Gradient Boosting Machine,0.8884,0.6632,0.1167,0.1083,0.1086,0.0709,0.0715,0.019
ada,Ada Boost Classifier,0.8717,0.5512,0.1833,0.2833,0.21,0.1454,0.1553,0.028
lda,Linear Discriminant Analysis,0.8717,0.6061,0.1,0.1667,0.1167,0.058,0.064,0.007
dt,Decision Tree Classifier,0.8522,0.5628,0.1833,0.1617,0.1638,0.0899,0.0912,0.008



 33%|█████████████████████████▎                                                  | 1/3 [00:37<01:14, 37.01s/it][A

Unnamed: 0,Description,Value
0,Session id,4850
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(828, 50)"
4,Transformed data shape,"(828, 50)"
5,Transformed train set shape,"(579, 50)"
6,Transformed test set shape,"(249, 50)"
7,Numeric features,49
8,Rows with missing values,91.3%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dummy,Dummy Classifier,0.81,0.5,0.0,0.0,0.0,0.0,0.0,0.007
rf,Random Forest Classifier,0.8066,0.5465,0.0455,0.3,0.0762,0.0441,0.0698,0.079
lightgbm,Light Gradient Boosting Machine,0.7996,0.5589,0.0909,0.4233,0.1401,0.0781,0.1113,0.032
et,Extra Trees Classifier,0.7962,0.5751,0.0364,0.2083,0.0597,0.0154,0.024,0.058
lda,Linear Discriminant Analysis,0.791,0.5885,0.0818,0.44,0.1277,0.0566,0.0957,0.006
knn,K Neighbors Classifier,0.7824,0.4917,0.0364,0.2083,0.0597,-0.0065,0.0018,0.014
gbc,Gradient Boosting Classifier,0.7772,0.558,0.0727,0.2489,0.1027,0.0195,0.0273,0.104
qda,Quadratic Discriminant Analysis,0.7755,0.5066,0.0727,0.0438,0.0545,0.01,0.0108,0.008
lr,Logistic Regression,0.7721,0.5417,0.0455,0.101,0.062,-0.0172,-0.0283,0.066
ridge,Ridge Classifier,0.758,0.0,0.1727,0.2668,0.1731,0.0637,0.0749,0.006


Unnamed: 0,Description,Value
0,Session id,5778
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(203, 50)"
4,Transformed data shape,"(203, 50)"
5,Transformed train set shape,"(142, 50)"
6,Transformed test set shape,"(61, 50)"
7,Numeric features,49
8,Rows with missing values,100.0%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.741,0.735,0.465,0.545,0.48,0.3281,0.3363,0.032
lightgbm,Light Gradient Boosting Machine,0.7338,0.6729,0.415,0.5583,0.4368,0.2917,0.3111,0.009
et,Extra Trees Classifier,0.7271,0.7304,0.435,0.5667,0.4778,0.3047,0.3157,0.049
dt,Decision Tree Classifier,0.72,0.6758,0.555,0.575,0.5419,0.345,0.3635,0.006
rf,Random Forest Classifier,0.6914,0.7176,0.3,0.435,0.3405,0.1709,0.1726,0.051
dummy,Dummy Classifier,0.6905,0.5,0.0,0.0,0.0,0.0,0.0,0.007
lda,Linear Discriminant Analysis,0.669,0.6445,0.4,0.4267,0.3968,0.1791,0.1825,0.006
ada,Ada Boost Classifier,0.6629,0.6137,0.34,0.4429,0.37,0.1525,0.1617,0.018
svm,SVM - Linear Kernel,0.6443,0.0,0.51,0.5077,0.4485,0.2046,0.241,0.005
knn,K Neighbors Classifier,0.6424,0.5729,0.255,0.435,0.3019,0.0847,0.0993,0.013


Unnamed: 0,Description,Value
0,Session id,8868
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(505, 50)"
4,Transformed data shape,"(505, 50)"
5,Transformed train set shape,"(353, 50)"
6,Transformed test set shape,"(152, 50)"
7,Numeric features,49
8,Rows with missing values,91.9%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.7789,0.6155,0.1125,0.4317,0.1682,0.1101,0.1498,0.07
dummy,Dummy Classifier,0.7762,0.5,0.0,0.0,0.0,0.0,0.0,0.007
et,Extra Trees Classifier,0.7761,0.6444,0.1875,0.4333,0.253,0.1603,0.1771,0.059
gbc,Gradient Boosting Classifier,0.762,0.585,0.2143,0.5262,0.285,0.1685,0.2035,0.069
lda,Linear Discriminant Analysis,0.7421,0.6359,0.1911,0.356,0.2394,0.1106,0.1188,0.007
knn,K Neighbors Classifier,0.7395,0.6047,0.2268,0.3646,0.2619,0.1261,0.1356,0.015
ada,Ada Boost Classifier,0.7307,0.5489,0.2518,0.3481,0.2901,0.1318,0.1342,0.029
lr,Logistic Regression,0.7306,0.5423,0.0661,0.1917,0.0895,-0.0126,-0.0127,0.058
lightgbm,Light Gradient Boosting Machine,0.7278,0.5636,0.1375,0.2476,0.1744,0.0385,0.0359,0.018
ridge,Ridge Classifier,0.6771,0.0,0.2393,0.2153,0.22,0.0339,0.0279,0.005


Unnamed: 0,Description,Value
0,Session id,7584
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(65, 50)"
4,Transformed data shape,"(65, 50)"
5,Transformed train set shape,"(45, 50)"
6,Transformed test set shape,"(20, 50)"
7,Numeric features,49
8,Rows with missing values,93.8%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lda,Linear Discriminant Analysis,0.685,0.7542,0.85,0.6,0.6833,0.4063,0.4486,0.005
lr,Logistic Regression,0.63,0.6208,0.75,0.5833,0.6033,0.2951,0.3626,0.064
et,Extra Trees Classifier,0.615,0.7375,0.5,0.55,0.48,0.2194,0.2523,0.036
dummy,Dummy Classifier,0.6,0.5,0.0,0.0,0.0,0.0,0.0,0.006
rf,Random Forest Classifier,0.595,0.6208,0.4,0.35,0.3467,0.1315,0.1577,0.047
knn,K Neighbors Classifier,0.59,0.6375,0.5,0.525,0.46,0.1719,0.1969,0.015
dt,Decision Tree Classifier,0.575,0.55,0.55,0.5,0.48,0.1571,0.1911,0.006
lightgbm,Light Gradient Boosting Machine,0.57,0.6208,0.5,0.3333,0.3933,0.132,0.15,0.007
ridge,Ridge Classifier,0.55,0.0,0.15,0.2333,0.1733,0.0058,0.0112,0.008
ada,Ada Boost Classifier,0.505,0.4958,0.55,0.3583,0.3933,0.0584,0.0833,0.015


Unnamed: 0,Description,Value
0,Session id,2973
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(294, 50)"
4,Transformed data shape,"(294, 50)"
5,Transformed train set shape,"(205, 50)"
6,Transformed test set shape,"(89, 50)"
7,Numeric features,49
8,Rows with missing values,97.6%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.7567,0.5838,0.1,0.3,0.1476,0.1024,0.1225,0.055
dummy,Dummy Classifier,0.7464,0.5,0.0,0.0,0.0,0.0,0.0,0.007
gbc,Gradient Boosting Classifier,0.7321,0.6387,0.1933,0.395,0.2458,0.1236,0.1326,0.039
et,Extra Trees Classifier,0.7314,0.6816,0.1533,0.3262,0.1857,0.0899,0.1097,0.047
lr,Logistic Regression,0.7271,0.6129,0.1367,0.225,0.17,0.0723,0.0675,0.053
lightgbm,Light Gradient Boosting Machine,0.7074,0.6638,0.19,0.3183,0.2195,0.0799,0.086,0.01
knn,K Neighbors Classifier,0.7029,0.5724,0.1933,0.3417,0.2365,0.0776,0.084,0.012
ada,Ada Boost Classifier,0.6881,0.6295,0.23,0.3595,0.2688,0.0843,0.0944,0.022
dt,Decision Tree Classifier,0.629,0.4999,0.2633,0.2725,0.2514,0.0199,0.0191,0.006
lda,Linear Discriminant Analysis,0.6198,0.5128,0.15,0.19,0.1616,-0.0738,-0.0746,0.006



 67%|██████████████████████████████████████████████████▋                         | 2/3 [01:06<00:32, 32.36s/it][A

Unnamed: 0,Description,Value
0,Session id,8475
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(263, 50)"
4,Transformed data shape,"(263, 50)"
5,Transformed train set shape,"(184, 50)"
6,Transformed test set shape,"(79, 50)"
7,Numeric features,49
8,Rows with missing values,90.9%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.6582,0.6337,0.5,0.6442,0.5477,0.2846,0.2984,0.011
rf,Random Forest Classifier,0.6363,0.6403,0.475,0.6037,0.5207,0.2421,0.2483,0.054
lr,Logistic Regression,0.6082,0.6177,0.45,0.5819,0.5002,0.1862,0.1958,0.058
ada,Ada Boost Classifier,0.5974,0.6518,0.425,0.5131,0.4538,0.1578,0.1537,0.02
et,Extra Trees Classifier,0.5763,0.6305,0.45,0.5089,0.4685,0.1258,0.1267,0.048
dummy,Dummy Classifier,0.5649,0.5,0.0,0.0,0.0,0.0,0.0,0.008
dt,Decision Tree Classifier,0.5602,0.5553,0.5125,0.4934,0.4933,0.1089,0.1132,0.006
gbc,Gradient Boosting Classifier,0.5596,0.5564,0.4125,0.4654,0.4317,0.0863,0.0799,0.037
ridge,Ridge Classifier,0.5541,0.0,0.5125,0.4807,0.4884,0.098,0.1007,0.006
lda,Linear Discriminant Analysis,0.5386,0.5186,0.2875,0.4571,0.3406,0.0208,0.0249,0.007


Unnamed: 0,Description,Value
0,Session id,3798
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(82, 50)"
4,Transformed data shape,"(82, 49)"
5,Transformed train set shape,"(57, 49)"
6,Transformed test set shape,"(25, 49)"
7,Numeric features,49
8,Rows with missing values,100.0%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.72,0.4625,0.925,0.7533,0.8216,0.1922,0.1908,0.007
rf,Random Forest Classifier,0.7067,0.5875,0.9,0.7617,0.8061,0.1436,0.1566,0.046
qda,Quadratic Discriminant Analysis,0.7067,0.5,1.0,0.7067,0.8267,0.0,0.0,0.008
ada,Ada Boost Classifier,0.7067,0.6875,0.85,0.7867,0.7816,0.2675,0.2857,0.016
dummy,Dummy Classifier,0.7067,0.5,1.0,0.7067,0.8267,0.0,0.0,0.005
gbc,Gradient Boosting Classifier,0.6867,0.5875,0.8,0.7883,0.7582,0.2068,0.2158,0.015
et,Extra Trees Classifier,0.6867,0.675,0.825,0.78,0.7683,0.1818,0.1908,0.038
lr,Logistic Regression,0.6533,0.5625,0.775,0.7617,0.7493,0.1464,0.1526,0.069
knn,K Neighbors Classifier,0.6367,0.6,0.75,0.7683,0.735,0.115,0.1342,0.014
dt,Decision Tree Classifier,0.6333,0.55,0.75,0.76,0.72,0.0935,0.1153,0.007


Unnamed: 0,Description,Value
0,Session id,7010
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(110, 50)"
4,Transformed data shape,"(110, 50)"
5,Transformed train set shape,"(76, 50)"
6,Transformed test set shape,"(34, 50)"
7,Numeric features,49
8,Rows with missing values,82.7%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.6946,0.725,0.6833,0.6988,0.6622,0.3872,0.4151,0.018
rf,Random Forest Classifier,0.6714,0.6833,0.6167,0.715,0.6402,0.3474,0.3585,0.053
qda,Quadratic Discriminant Analysis,0.6554,0.6677,0.5583,0.6312,0.5513,0.3061,0.3133,0.006
et,Extra Trees Classifier,0.6554,0.6802,0.5083,0.7167,0.569,0.2875,0.3252,0.038
lightgbm,Light Gradient Boosting Machine,0.6286,0.6458,0.575,0.6333,0.5867,0.2497,0.2695,0.006
dt,Decision Tree Classifier,0.6107,0.6083,0.5667,0.6071,0.5594,0.2206,0.2366,0.006
lda,Linear Discriminant Analysis,0.5804,0.5479,0.525,0.6167,0.5421,0.1524,0.1724,0.005
ada,Ada Boost Classifier,0.5768,0.6187,0.55,0.6105,0.5411,0.1575,0.1735,0.014
svm,SVM - Linear Kernel,0.5536,0.0,0.6917,0.5188,0.5849,0.1136,0.1356,0.005
knn,K Neighbors Classifier,0.5357,0.5885,0.575,0.4917,0.5235,0.0705,0.0781,0.012


Unnamed: 0,Description,Value
0,Session id,1779
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(11, 50)"
4,Transformed data shape,"(11, 50)"
5,Transformed train set shape,"(7, 50)"
6,Transformed test set shape,"(4, 50)"
7,Numeric features,49
8,Rows with missing values,81.8%
9,Preprocess,True


Unnamed: 0,Description,Value
0,Session id,226
1,Target,Closed_In_1Yr
2,Target type,Binary
3,Original data shape,"(100, 50)"
4,Transformed data shape,"(100, 50)"
5,Transformed train set shape,"(69, 50)"
6,Transformed test set shape,"(31, 50)"
7,Numeric features,49
8,Rows with missing values,95.0%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dummy,Dummy Classifier,0.5952,0.5,0.0,0.0,0.0,0.0,0.0,0.007
rf,Random Forest Classifier,0.5833,0.55,0.3333,0.5,0.3867,0.0951,0.1026,0.047
lda,Linear Discriminant Analysis,0.569,0.5675,0.5,0.5167,0.4924,0.1258,0.1299,0.005
ridge,Ridge Classifier,0.5667,0.0,0.45,0.4833,0.4576,0.1001,0.1032,0.004
et,Extra Trees Classifier,0.5548,0.5075,0.2667,0.4083,0.3052,0.0337,0.0345,0.044
dt,Decision Tree Classifier,0.5524,0.5358,0.4667,0.4867,0.466,0.0791,0.0733,0.006
lightgbm,Light Gradient Boosting Machine,0.5357,0.5283,0.45,0.41,0.4031,0.0215,0.038,0.008
gbc,Gradient Boosting Classifier,0.5071,0.5725,0.4,0.3833,0.3838,-0.0145,-0.0246,0.024
lr,Logistic Regression,0.4929,0.4383,0.35,0.3167,0.32,-0.0553,-0.0484,0.049
knn,K Neighbors Classifier,0.4833,0.5438,0.2333,0.275,0.2371,-0.0944,-0.0963,0.012



100%|████████████████████████████████████████████████████████████████████████████| 3/3 [01:28<00:00, 29.62s/it][A
 50%|██████████████████████████████████████                                      | 1/2 [01:28<01:28, 88.87s/it]
  0%|                                                                                    | 0/3 [00:00<?, ?it/s][A

Unnamed: 0,Description,Value
0,Session id,2372
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(1800, 50)"
4,Transformed data shape,"(1800, 50)"
5,Transformed train set shape,"(1259, 50)"
6,Transformed test set shape,"(541, 50)"
7,Numeric features,49
8,Rows with missing values,84.8%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.5607,0.5766,0.3977,0.5197,0.4484,0.0956,0.0981,0.118
et,Extra Trees Classifier,0.5592,0.5824,0.4187,0.5194,0.4604,0.0963,0.0989,0.08
gbc,Gradient Boosting Classifier,0.5567,0.5682,0.3993,0.5167,0.4496,0.0886,0.0913,0.22
lda,Linear Discriminant Analysis,0.5567,0.5676,0.3448,0.5172,0.4134,0.0796,0.084,0.007
ada,Ada Boost Classifier,0.5551,0.5791,0.4309,0.5113,0.4663,0.0904,0.0916,0.061
dummy,Dummy Classifier,0.5465,0.5,0.0,0.0,0.0,0.0,0.0,0.006
qda,Quadratic Discriminant Analysis,0.5457,0.4993,0.0,0.0,0.0,-0.0016,-0.0081,0.008
ridge,Ridge Classifier,0.5441,0.0,0.3853,0.501,0.4303,0.0627,0.0653,0.005
lr,Logistic Regression,0.5354,0.5873,0.7091,0.4908,0.5797,0.0963,0.1062,0.063
lightgbm,Light Gradient Boosting Machine,0.5289,0.564,0.4065,0.4755,0.4371,0.0375,0.0375,0.061


Unnamed: 0,Description,Value
0,Session id,2756
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(323, 50)"
4,Transformed data shape,"(323, 50)"
5,Transformed train set shape,"(226, 50)"
6,Transformed test set shape,"(97, 50)"
7,Numeric features,49
8,Rows with missing values,98.5%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.5978,0.6422,0.6288,0.6086,0.6118,0.1934,0.1951,0.038
rf,Random Forest Classifier,0.5794,0.6394,0.6197,0.5969,0.6057,0.1544,0.156,0.053
ada,Ada Boost Classifier,0.5713,0.6381,0.5598,0.5904,0.5704,0.1434,0.1442,0.018
dt,Decision Tree Classifier,0.5709,0.5883,0.5591,0.592,0.5727,0.1412,0.1416,0.006
lightgbm,Light Gradient Boosting Machine,0.566,0.5968,0.5788,0.5811,0.5712,0.1326,0.1374,0.01
et,Extra Trees Classifier,0.5474,0.6226,0.5417,0.5663,0.5524,0.0967,0.096,0.049
svm,SVM - Linear Kernel,0.5395,0.0,0.6379,0.5757,0.5638,0.0589,0.0835,0.004
lda,Linear Discriminant Analysis,0.5358,0.5443,0.5341,0.5701,0.5406,0.0724,0.0766,0.005
knn,K Neighbors Classifier,0.5344,0.5566,0.5667,0.5629,0.5595,0.0652,0.0668,0.012
nb,Naive Bayes,0.5259,0.5117,0.4098,0.5531,0.4485,0.0682,0.0669,0.005


Unnamed: 0,Description,Value
0,Session id,6503
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(1107, 50)"
4,Transformed data shape,"(1107, 50)"
5,Transformed train set shape,"(774, 50)"
6,Transformed test set shape,"(333, 50)"
7,Numeric features,49
8,Rows with missing values,83.2%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ada,Ada Boost Classifier,0.5944,0.617,0.5494,0.5779,0.5616,0.1849,0.1861,0.039
rf,Random Forest Classifier,0.593,0.6274,0.4946,0.5821,0.5317,0.1773,0.1802,0.093
lightgbm,Light Gradient Boosting Machine,0.5918,0.6249,0.5195,0.5763,0.5444,0.1774,0.179,0.044
gbc,Gradient Boosting Classifier,0.584,0.6177,0.4727,0.5725,0.5155,0.1579,0.161,0.133
dt,Decision Tree Classifier,0.5838,0.5842,0.5459,0.5613,0.5524,0.1638,0.1643,0.009
et,Extra Trees Classifier,0.5581,0.6063,0.4893,0.5362,0.5098,0.1099,0.1108,0.067
nb,Naive Bayes,0.5503,0.5728,0.2631,0.6188,0.3189,0.0751,0.1005,0.006
lr,Logistic Regression,0.549,0.5681,0.5161,0.5272,0.5183,0.0948,0.0963,0.052
knn,K Neighbors Classifier,0.54,0.5446,0.4785,0.5151,0.4943,0.0743,0.0749,0.013
qda,Quadratic Discriminant Analysis,0.5284,0.5018,0.0135,0.0625,0.0222,0.0036,-0.0009,0.009


Unnamed: 0,Description,Value
0,Session id,3758
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(205, 50)"
4,Transformed data shape,"(205, 50)"
5,Transformed train set shape,"(143, 50)"
6,Transformed test set shape,"(62, 50)"
7,Numeric features,49
8,Rows with missing values,67.3%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.6805,0.6137,0.8311,0.7195,0.7651,0.2611,0.2772,0.009
knn,K Neighbors Classifier,0.6714,0.5323,0.8267,0.7163,0.7638,0.2266,0.2388,0.012
lda,Linear Discriminant Analysis,0.6657,0.6713,0.8078,0.7179,0.7543,0.2341,0.2584,0.006
lr,Logistic Regression,0.6519,0.5773,0.9278,0.6631,0.7712,0.1047,0.167,0.045
nb,Naive Bayes,0.651,0.5356,0.9789,0.6543,0.7831,0.0435,0.052,0.006
dummy,Dummy Classifier,0.6433,0.5,1.0,0.6433,0.7828,0.0,0.0,0.009
gbc,Gradient Boosting Classifier,0.6367,0.6116,0.7733,0.7019,0.731,0.1716,0.1698,0.033
rf,Random Forest Classifier,0.6252,0.6536,0.7867,0.6842,0.7257,0.133,0.1439,0.056
ada,Ada Boost Classifier,0.611,0.5912,0.7233,0.6817,0.6957,0.1514,0.1574,0.022
dt,Decision Tree Classifier,0.6038,0.56,0.7433,0.6704,0.7013,0.1096,0.1196,0.005


Unnamed: 0,Description,Value
0,Session id,4635
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(435, 50)"
4,Transformed data shape,"(435, 50)"
5,Transformed train set shape,"(304, 50)"
6,Transformed test set shape,"(131, 50)"
7,Numeric features,49
8,Rows with missing values,94.9%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
nb,Naive Bayes,0.6281,0.5461,0.0758,0.5667,0.1311,0.0636,0.1176,0.007
lightgbm,Light Gradient Boosting Machine,0.6194,0.5953,0.375,0.5275,0.4289,0.1586,0.1682,0.017
ada,Ada Boost Classifier,0.6154,0.6127,0.4341,0.524,0.4631,0.1705,0.1796,0.028
dummy,Dummy Classifier,0.6118,0.5,0.0,0.0,0.0,0.0,0.0,0.008
gbc,Gradient Boosting Classifier,0.609,0.5752,0.3417,0.491,0.3955,0.1258,0.1318,0.067
lr,Logistic Regression,0.6078,0.6046,0.4871,0.491,0.4865,0.1701,0.1717,0.069
ridge,Ridge Classifier,0.5953,0.0,0.4333,0.4779,0.4354,0.1315,0.1378,0.006
rf,Random Forest Classifier,0.5827,0.5916,0.2553,0.4403,0.3192,0.0496,0.0545,0.074
et,Extra Trees Classifier,0.5825,0.5655,0.3053,0.4568,0.3537,0.0674,0.0749,0.059
lda,Linear Discriminant Analysis,0.5791,0.5514,0.2636,0.4419,0.319,0.0479,0.0538,0.007



 33%|█████████████████████████▎                                                  | 1/3 [00:34<01:09, 34.71s/it][A

Unnamed: 0,Description,Value
0,Session id,3163
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(828, 50)"
4,Transformed data shape,"(828, 50)"
5,Transformed train set shape,"(579, 50)"
6,Transformed test set shape,"(249, 50)"
7,Numeric features,49
8,Rows with missing values,91.3%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.7167,0.6298,0.9168,0.7424,0.82,0.1871,0.2198,0.068
rf,Random Forest Classifier,0.7149,0.5831,0.9584,0.7255,0.8256,0.1189,0.1756,0.085
dummy,Dummy Classifier,0.7047,0.5,1.0,0.7047,0.8267,0.0,0.0,0.006
lr,Logistic Regression,0.7029,0.5239,0.9607,0.7156,0.82,0.0623,0.0911,0.072
lightgbm,Light Gradient Boosting Machine,0.6943,0.631,0.8798,0.7376,0.8018,0.1502,0.1666,0.031
gbc,Gradient Boosting Classifier,0.689,0.5741,0.8847,0.7314,0.8002,0.1208,0.1253,0.123
lda,Linear Discriminant Analysis,0.6804,0.5665,0.9215,0.7108,0.8023,0.0333,0.0346,0.007
ada,Ada Boost Classifier,0.6736,0.573,0.831,0.7401,0.7822,0.1362,0.1384,0.041
ridge,Ridge Classifier,0.6701,0.0,0.8774,0.7175,0.7886,0.0622,0.0709,0.005
knn,K Neighbors Classifier,0.6597,0.6005,0.7989,0.7409,0.7672,0.1303,0.1317,0.013


Unnamed: 0,Description,Value
0,Session id,5694
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(203, 50)"
4,Transformed data shape,"(203, 50)"
5,Transformed train set shape,"(142, 50)"
6,Transformed test set shape,"(61, 50)"
7,Numeric features,49
8,Rows with missing values,100.0%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.8805,0.7288,0.9833,0.8864,0.9312,0.463,0.4927,0.008
rf,Random Forest Classifier,0.8529,0.7399,0.9917,0.8549,0.9174,0.2484,0.2661,0.059
et,Extra Trees Classifier,0.8462,0.7759,0.9735,0.8582,0.9117,0.2825,0.2928,0.045
knn,K Neighbors Classifier,0.8386,0.6393,0.9917,0.8408,0.9094,0.1846,0.2159,0.015
gbc,Gradient Boosting Classifier,0.8176,0.6967,0.9318,0.8605,0.893,0.2229,0.2329,0.033
qda,Quadratic Discriminant Analysis,0.8171,0.5,1.0,0.8171,0.899,0.0,0.0,0.009
dummy,Dummy Classifier,0.8171,0.5,1.0,0.8171,0.899,0.0,0.0,0.005
svm,SVM - Linear Kernel,0.789,0.0,0.9318,0.833,0.8764,0.0724,0.0738,0.008
ada,Ada Boost Classifier,0.7829,0.5929,0.8902,0.8545,0.8684,0.2237,0.2336,0.02
lda,Linear Discriminant Analysis,0.769,0.7212,0.872,0.8505,0.8595,0.167,0.1692,0.007


Unnamed: 0,Description,Value
0,Session id,1522
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(505, 50)"
4,Transformed data shape,"(505, 50)"
5,Transformed train set shape,"(353, 50)"
6,Transformed test set shape,"(152, 50)"
7,Numeric features,49
8,Rows with missing values,91.9%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dummy,Dummy Classifier,0.731,0.5,1.0,0.731,0.8445,0.0,0.0,0.006
rf,Random Forest Classifier,0.7167,0.5317,0.9651,0.7323,0.8325,0.0115,0.0347,0.069
lr,Logistic Regression,0.7138,0.5418,0.9691,0.7289,0.8318,-0.0128,-0.0141,0.069
nb,Naive Bayes,0.7053,0.4943,0.9572,0.7264,0.8256,-0.0291,-0.0487,0.006
gbc,Gradient Boosting Classifier,0.6967,0.5144,0.9066,0.7389,0.8134,0.0416,0.046,0.071
et,Extra Trees Classifier,0.6939,0.5274,0.8954,0.7412,0.8105,0.0463,0.0477,0.054
lightgbm,Light Gradient Boosting Machine,0.6883,0.5031,0.8994,0.7346,0.8079,0.0188,0.0168,0.017
lda,Linear Discriminant Analysis,0.6714,0.5185,0.8991,0.7204,0.7996,-0.0588,-0.0727,0.006
knn,K Neighbors Classifier,0.6683,0.5781,0.8603,0.7326,0.7908,0.0062,0.0048,0.014
ada,Ada Boost Classifier,0.6569,0.548,0.8291,0.7379,0.779,0.0088,-0.001,0.03


Unnamed: 0,Description,Value
0,Session id,3159
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(65, 50)"
4,Transformed data shape,"(65, 50)"
5,Transformed train set shape,"(45, 50)"
6,Transformed test set shape,"(20, 50)"
7,Numeric features,49
8,Rows with missing values,93.8%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.835,0.8,0.9167,0.8667,0.8857,0.5818,0.5796,0.045
knn,K Neighbors Classifier,0.81,0.7417,0.9417,0.8317,0.877,0.3917,0.3917,0.013
et,Extra Trees Classifier,0.79,0.7667,0.8833,0.8467,0.8546,0.4318,0.4373,0.039
lightgbm,Light Gradient Boosting Machine,0.785,0.675,0.9667,0.8017,0.869,0.2667,0.2667,0.007
ada,Ada Boost Classifier,0.77,0.7833,0.85,0.83,0.8308,0.4015,0.4115,0.015
gbc,Gradient Boosting Classifier,0.765,0.8125,0.85,0.8383,0.8356,0.3985,0.404,0.015
lr,Logistic Regression,0.74,0.5417,0.8583,0.8267,0.8288,0.2758,0.2969,0.061
dt,Decision Tree Classifier,0.74,0.7667,0.7333,0.9,0.8019,0.4176,0.4471,0.006
ridge,Ridge Classifier,0.735,0.0,0.9167,0.79,0.8367,0.1,0.1077,0.006
dummy,Dummy Classifier,0.735,0.5,1.0,0.735,0.8452,0.0,0.0,0.005


Unnamed: 0,Description,Value
0,Session id,8788
1,Target,Closed_In_2Yrs
2,Target type,Binary
3,Original data shape,"(294, 50)"
4,Transformed data shape,"(294, 50)"
5,Transformed train set shape,"(205, 50)"
6,Transformed test set shape,"(89, 50)"
7,Numeric features,49
8,Rows with missing values,97.6%
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.7474,0.7072,0.8933,0.7978,0.8374,0.2745,0.2963,0.046
dummy,Dummy Classifier,0.7271,0.5,1.0,0.7271,0.8419,0.0,0.0,0.005
lda,Linear Discriminant Analysis,0.7171,0.6987,0.8119,0.809,0.8059,0.2773,0.2962,0.006
knn,K Neighbors Classifier,0.7124,0.6685,0.8652,0.7721,0.8138,0.179,0.1782,0.012
ada,Ada Boost Classifier,0.709,0.7232,0.8262,0.7874,0.8026,0.2327,0.2375,0.021
gbc,Gradient Boosting Classifier,0.7086,0.7085,0.8595,0.7709,0.8097,0.1834,0.2028,0.041
lr,Logistic Regression,0.7069,0.6666,0.8252,0.7858,0.8026,0.2214,0.2277,0.052
rf,Random Forest Classifier,0.7033,0.6798,0.9067,0.7433,0.8159,0.0798,0.0929,0.057
lightgbm,Light Gradient Boosting Machine,0.6995,0.7124,0.86,0.7588,0.804,0.1585,0.1827,0.01
ridge,Ridge Classifier,0.6779,0.0,0.7724,0.7848,0.7754,0.2016,0.2071,0.005



 67%|██████████████████████████████████████████████████▋                         | 2/3 [01:05<00:32, 32.37s/it][A

Unnamed: 0,Description,Value
0,Session id,8768
1,Target,Closed_In_2Yrs
2,Target type,Multiclass
3,Target mapping,1: 0
4,Original data shape,"(263, 50)"
5,Transformed data shape,"(263, 50)"
6,Transformed train set shape,"(184, 50)"
7,Transformed test set shape,"(79, 50)"
8,Numeric features,49
9,Rows with missing values,90.9%


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.045
nb,Naive Bayes,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
dt,Decision Tree Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
ridge,Ridge Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.006
rf,Random Forest Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.046
ada,Ada Boost Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
lda,Linear Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
et,Extra Trees Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.04
lightgbm,Light Gradient Boosting Machine,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
dummy,Dummy Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007


Unnamed: 0,Description,Value
0,Session id,1447
1,Target,Closed_In_2Yrs
2,Target type,Multiclass
3,Target mapping,1: 0
4,Original data shape,"(82, 50)"
5,Transformed data shape,"(82, 50)"
6,Transformed train set shape,"(57, 50)"
7,Transformed test set shape,"(25, 50)"
8,Numeric features,49
9,Rows with missing values,100.0%


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.037
nb,Naive Bayes,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
dt,Decision Tree Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
ridge,Ridge Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
rf,Random Forest Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.045
ada,Ada Boost Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
lda,Linear Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
et,Extra Trees Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.038
lightgbm,Light Gradient Boosting Machine,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
dummy,Dummy Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.008


Unnamed: 0,Description,Value
0,Session id,8503
1,Target,Closed_In_2Yrs
2,Target type,Multiclass
3,Target mapping,1: 0
4,Original data shape,"(110, 50)"
5,Transformed data shape,"(110, 50)"
6,Transformed train set shape,"(76, 50)"
7,Transformed test set shape,"(34, 50)"
8,Numeric features,49
9,Rows with missing values,82.7%


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.038
nb,Naive Bayes,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
dt,Decision Tree Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.009
ridge,Ridge Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.006
rf,Random Forest Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.046
ada,Ada Boost Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
lda,Linear Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
et,Extra Trees Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.037
lightgbm,Light Gradient Boosting Machine,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
dummy,Dummy Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007


Unnamed: 0,Description,Value
0,Session id,8718
1,Target,Closed_In_2Yrs
2,Target type,Multiclass
3,Target mapping,1: 0
4,Original data shape,"(11, 50)"
5,Transformed data shape,"(11, 50)"
6,Transformed train set shape,"(7, 50)"
7,Transformed test set shape,"(4, 50)"
8,Numeric features,49
9,Rows with missing values,81.8%


Unnamed: 0,Description,Value
0,Session id,695
1,Target,Closed_In_2Yrs
2,Target type,Multiclass
3,Target mapping,1: 0
4,Original data shape,"(100, 50)"
5,Transformed data shape,"(100, 50)"
6,Transformed train set shape,"(69, 50)"
7,Transformed test set shape,"(31, 50)"
8,Numeric features,49
9,Rows with missing values,95.0%


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.039
nb,Naive Bayes,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
dt,Decision Tree Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
ridge,Ridge Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.006
rf,Random Forest Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.048
ada,Ada Boost Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007
lda,Linear Discriminant Analysis,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
et,Extra Trees Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.038
lightgbm,Light Gradient Boosting Machine,1.0,0.0,1.0,1.0,1.0,,0.0,0.008
dummy,Dummy Classifier,1.0,0.0,1.0,1.0,1.0,,0.0,0.007



100%|████████████████████████████████████████████████████████████████████████████| 3/3 [01:25<00:00, 28.54s/it][A
100%|████████████████████████████████████████████████████████████████████████████| 2/2 [02:54<00:00, 87.25s/it]


In [9]:
aggregated_result_path = EXP_RESULT_PATH / 'result.csv'

if not os.path.exists(aggregated_result_path) :
    exp_result.to_csv(aggregated_result_path)
    
exp_result

Unnamed: 0,model_name,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec),target,x_data_yaer,대분류
0,rf,Random Forest Classifier,0.8769,0.6900,0.1349,0.6762,0.2162,0.1810,0.2532,0.119,Closed_In_1Yr,2018,제조업(10~34)
1,et,Extra Trees Classifier,0.8745,0.6817,0.1349,0.6492,0.2148,0.1766,0.2444,0.083,Closed_In_1Yr,2018,제조업(10~34)
2,lightgbm,Light Gradient Boosting Machine,0.8713,0.6923,0.1221,0.5667,0.1974,0.1568,0.2146,0.107,Closed_In_1Yr,2018,제조업(10~34)
3,lr,Logistic Regression,0.8697,0.5062,0.0000,0.0000,0.0000,0.0000,0.0000,0.228,Closed_In_1Yr,2018,제조업(10~34)
4,qda,Quadratic Discriminant Analysis,0.8697,0.5000,0.0000,0.0000,0.0000,0.0000,0.0000,0.012,Closed_In_1Yr,2018,제조업(10~34)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,ada,Ada Boost Classifier,1.0000,0.0000,1.0000,1.0000,1.0000,,0.0000,0.007,Closed_In_2Yrs,2020,건설업(41~42)
6,lda,Linear Discriminant Analysis,1.0000,0.0000,1.0000,1.0000,1.0000,,0.0000,0.008,Closed_In_2Yrs,2020,건설업(41~42)
7,et,Extra Trees Classifier,1.0000,0.0000,1.0000,1.0000,1.0000,,0.0000,0.038,Closed_In_2Yrs,2020,건설업(41~42)
8,lightgbm,Light Gradient Boosting Machine,1.0000,0.0000,1.0000,1.0000,1.0000,,0.0000,0.008,Closed_In_2Yrs,2020,건설업(41~42)
