## Import

In [1]:
from datetime import datetime
import pandas as pd
import random
import os
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier

from pycaret.classification import *
from pycaret.utils import version
SEED =37

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything(SEED) # Seed 고정
def norm(df):
    df_normalized = (df - df.mean()) / df.std()
    df_normalized.head()
    df_normalized.dropna(axis=1)
    return df_normalized

## Data Load

In [3]:
train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')

In [4]:
train_x = train_df.drop(columns=['PRODUCT_ID', 'TIMESTAMP', 'Y_Class', 'Y_Quality'])
train_y = train_df['Y_Class']

test_x = test_df.drop(columns=['PRODUCT_ID', 'TIMESTAMP'])

In [5]:
train_x.head()
train_x.LINE.unique()
train_x.PRODUCT_CODE.unique()

# onehot 

array(['A_31', 'T_31', 'O_31'], dtype=object)

## Data Pre-processing

In [6]:
train_x = train_x.fillna(0)
test_x = test_x.fillna(0)

In [7]:
# qualitative to quantitative
qual_col = ['LINE', 'PRODUCT_CODE']

for i in qual_col:
    le = LabelEncoder()
    le = le.fit(train_x[i])
    train_x[i] = le.transform(train_x[i])
    
    for label in np.unique(test_x[i]): 
        if label not in le.classes_: 
            le.classes_ = np.append(le.classes_, label)
    test_x[i] = le.transform(test_x[i]) 
print('Done.')

Done.


In [8]:
# category(int) / float feature
cate_feature = []
drop_feature =[]
float_feature =[]

for i,col in enumerate(train_x.columns):
    isfloat = False
    for j,chk in enumerate(train_x[col].unique()):
        try:
            if not chk.is_integer():
                isfloat = True
                break
        except:
            if not type(chk) == np.int32:
                isfloat = True
                break
    if not isfloat:
        if j > 1:
            cate_feature.append(col)
        else :
            if train_x[col].unique()[0] == -1:
                drop_feature.append(i) # not use
        # print(f"{col} unique : {train_x[col].unique()}")    
    # 정수를 제외한 float feature
    else:
        float_feature.append(col)   
        # 전체 평균
        # train_x[col] = train_df[col].fillna(train_x[col].mean()) # nan 값 제외한 평균
        # # 클래스 별 평균
        # for k in range(3):
        #     train_x[col] = train_df[[col,'Y_Class']][train_df['Y_Class']==k].mean()[0]
print(len(cate_feature),len(drop_feature),len(float_feature))

568 3 1864


In [9]:
print(train_y.unique())
train = pd.concat([train_x,train_y],axis=1)
train.head()

[1 2 0]


Unnamed: 0,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,X_9,X_10,X_11,X_12,X_13,X_14,X_15,X_16,X_17,X_18,X_19,X_20,X_21,X_22,X_23,X_24,X_25,X_26,X_27,X_28,X_29,X_30,X_31,X_32,X_33,X_34,X_35,X_36,X_37,X_38,X_39,X_40,X_41,X_42,X_43,X_44,X_45,X_46,X_47,X_48,X_49,X_50,X_51,X_52,X_53,X_54,X_55,X_56,X_57,X_58,X_59,X_60,X_61,X_62,X_63,X_64,X_65,X_66,X_67,X_68,X_69,X_70,X_71,X_72,X_73,X_74,X_75,X_76,X_77,X_78,X_79,X_80,X_81,X_82,X_83,X_84,X_85,X_86,X_87,X_88,X_89,X_90,X_91,X_92,X_93,X_94,X_95,X_96,X_97,X_98,X_99,X_100,X_101,X_102,X_103,X_104,X_105,X_106,X_107,X_108,X_109,X_110,X_111,X_112,X_113,X_114,X_115,X_116,X_117,X_118,X_119,X_120,X_121,X_122,X_123,X_124,X_125,X_126,X_127,X_128,X_129,X_130,X_131,X_132,X_133,X_134,X_135,X_136,X_137,X_138,X_139,X_140,X_141,X_142,X_143,X_144,X_145,X_146,X_147,X_148,X_149,X_150,X_151,X_152,X_153,X_154,X_155,X_156,X_157,X_158,X_159,X_160,X_161,X_162,X_163,X_164,X_165,X_166,X_167,X_168,X_169,X_170,X_171,X_172,X_173,X_174,X_175,X_176,X_177,X_178,X_179,X_180,X_181,X_182,X_183,X_184,X_185,X_186,X_187,X_188,X_189,X_190,X_191,X_192,X_193,X_194,X_195,X_196,X_197,X_198,X_199,X_200,X_201,X_202,X_203,X_204,X_205,X_206,X_207,X_208,X_209,X_210,X_211,X_212,X_213,X_214,X_215,X_216,X_217,X_218,X_219,X_220,X_221,X_222,X_223,X_224,X_225,X_226,X_227,X_228,X_229,X_230,X_231,X_232,X_233,X_234,X_235,X_236,X_237,X_238,X_239,X_240,X_241,X_242,X_243,X_244,X_245,X_246,X_247,X_248,...,X_2627,X_2628,X_2629,X_2630,X_2631,X_2632,X_2633,X_2634,X_2635,X_2636,X_2637,X_2638,X_2639,X_2640,X_2641,X_2642,X_2643,X_2644,X_2645,X_2646,X_2647,X_2648,X_2649,X_2650,X_2651,X_2652,X_2653,X_2654,X_2655,X_2656,X_2657,X_2658,X_2659,X_2660,X_2661,X_2662,X_2663,X_2664,X_2665,X_2666,X_2667,X_2668,X_2669,X_2670,X_2671,X_2672,X_2673,X_2674,X_2675,X_2676,X_2677,X_2678,X_2679,X_2680,X_2681,X_2682,X_2683,X_2684,X_2685,X_2686,X_2687,X_2688,X_2689,X_2690,X_2691,X_2692,X_2693,X_2694,X_2695,X_2696,X_2697,X_2698,X_2699,X_2700,X_2701,X_2702,X_2703,X_2704,X_2705,X_2706,X_2707,X_2708,X_2709,X_2710,X_2711,X_2712,X_2713,X_2714,X_2715,X_2716,X_2717,X_2718,X_2719,X_2720,X_2721,X_2722,X_2723,X_2724,X_2725,X_2726,X_2727,X_2728,X_2729,X_2730,X_2731,X_2732,X_2733,X_2734,X_2735,X_2736,X_2737,X_2738,X_2739,X_2740,X_2741,X_2742,X_2743,X_2744,X_2745,X_2746,X_2747,X_2748,X_2749,X_2750,X_2751,X_2752,X_2753,X_2754,X_2755,X_2756,X_2757,X_2758,X_2759,X_2760,X_2761,X_2762,X_2763,X_2764,X_2765,X_2766,X_2767,X_2768,X_2769,X_2770,X_2771,X_2772,X_2773,X_2774,X_2775,X_2776,X_2777,X_2778,X_2779,X_2780,X_2781,X_2782,X_2783,X_2784,X_2785,X_2786,X_2787,X_2788,X_2789,X_2790,X_2791,X_2792,X_2793,X_2794,X_2795,X_2796,X_2797,X_2798,X_2799,X_2800,X_2801,X_2802,X_2803,X_2804,X_2805,X_2806,X_2807,X_2808,X_2809,X_2810,X_2811,X_2812,X_2813,X_2814,X_2815,X_2816,X_2817,X_2818,X_2819,X_2820,X_2821,X_2822,X_2823,X_2824,X_2825,X_2826,X_2827,X_2828,X_2829,X_2830,X_2831,X_2832,X_2833,X_2834,X_2835,X_2836,X_2837,X_2838,X_2839,X_2840,X_2841,X_2842,X_2843,X_2844,X_2845,X_2846,X_2847,X_2848,X_2849,X_2850,X_2851,X_2852,X_2853,X_2854,X_2855,X_2856,X_2857,X_2858,X_2859,X_2860,X_2861,X_2862,X_2863,X_2864,X_2865,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875,Y_Class
0,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7813.0,7813.0,0.0,0.0,0.19,0.2,0.19,0.2,228.0,228.0,225.0,582.9,587.1,567.3,0.0,0.0,249.96,249.99,247.46,250.0,428.0,435.0,426.0,179.9,180.0,179.9,180.0,215.0,215.0,214.9,215.0,214.9,215.0,214.9,215.0,209.9,210.0,209.9,210.0,209.9,210.0,209.9,210.0,205.0,205.0,204.9,205.0,204.9,205.0,204.9,205.0,219.9,220.0,219.9,220.0,229.9,230.0,229.9,230.0,-70.0,70.0,8.23,0.0,0.0,0.0,0.0,0.0,0.0,51.4,51.4,51.1,48.9,55.5,55.5,52.5,47.3,55.2,55.2,49.7,48.8,49.6,55.2,55.2,50.2,50.2,52.6,52.6,52.0,53.2,50.9,52.3,52.3,58.7,55.4,50.5,50.5,50.1,50.9,51.8,59.4,52.6,52.6,54.9,53.2,0.0,0.0,0.0,0.0,290.0,290.0,290.0,290.0,40.4,999.0,35.0,35.0,3.0,3.0,0.0,0.0,0.0,...,50.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.221,0.220333,0.22,3.3e-05,2.8e-05,2.5e-05,0.000991,0.000972,0.000949,0.000127,0.000121,0.000114,3.8e-05,3.3e-05,3e-05,3.3e-05,2.5e-05,2.2e-05,0.193,0.192444,0.19,0.0,0.0,0.0,1.1,1.1,1.1,1.1,1.096296,1.0,1.2,1.122222,1.1,0.0,587.1,583.72963,576.2,0.0,249.99,249.980741,249.97,7810.0,7810.0,434.0,428.111111,426.0,180.0,179.996296,179.9,215.1,215.0,214.9,215.0,215.0,215.0,210.0,209.974074,209.9,210.1,210.011111,209.9,205.1,205.011111,205.0,205.0,204.988889,204.9,220.0,219.992593,219.9,230.0,229.974074,229.9,0.0,0.0,0.0,290.0,290.0,290.0,37.4,32.0,25.0,22.518519,20.0,23.0,22.185185,21.0,24.0,22.444444,21.0,25.0,23.074074,21.0,24.0,22.296296,21.0,24.0,22.592593,21.0,25.0,22.925926,20.0,49.7,50.9,50.9,51.8,59.4,52.6,52.6,54.9,53.2,51.4,51.4,51.1,48.9,55.5,55.5,52.5,47.3,55.2,55.2,48.8,49.6,55.2,55.2,50.2,52.6,52.6,52.0,53.2,52.3,52.3,58.7,55.4,50.5,50.5,50.1,9.04,0.0,5.02,7.0,40.06,0.000331,3.3e-05,0.0,47.2,47.094444,47.0,39.0,32.722222,26.0,184.7,175.903333,164.9,201.2,191.8,181.6,139.6,131.646667,115.4,209.0,197.286667,189.0,383.0,368.296296,353.0,39.34,40.89,32.56,34.09,77.77,0.0,0.0,0.0,0.0,0.0,1
1,3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19854.0,19854.0,0.2,0.21,0.2,0.2,413.0,414.0,414.0,589.3,595.8,575.9,0.0,0.0,249.97,249.99,249.43,250.0,423.0,433.0,419.0,179.9,180.0,179.8,180.0,214.9,215.1,214.8,215.0,214.9,215.0,214.9,215.0,210.0,210.0,210.0,210.0,210.0,210.0,209.8,210.0,204.9,205.0,204.9,205.0,204.9,205.0,204.9,205.0,219.9,220.0,219.9,220.0,229.9,230.0,229.9,230.0,-70.0,70.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,53.8,53.8,54.0,59.4,60.7,60.7,60.4,57.4,59.2,59.2,51.6,57.7,58.5,61.0,61.0,59.8,53.6,53.8,53.8,57.0,56.7,51.2,54.4,54.4,61.1,56.4,52.0,52.0,51.9,51.2,53.7,60.9,54.4,54.4,56.9,58.3,0.0,0.0,0.0,0.0,290.0,290.0,290.0,290.0,37.0,999.0,36.0,36.0,3.0,6.0,0.0,0.0,0.0,...,51.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.435,0.43475,0.434,2e-05,1.7e-05,1.1e-05,0.00411,0.004097,0.00408,7.7e-05,7.6e-05,7.3e-05,3.6e-05,3.4e-05,3.3e-05,7e-06,6e-06,4e-06,0.2,0.19955,0.197,2.0,1.9955,1.97,1.1,1.1,1.1,1.0,1.0,1.0,1.2,1.155,1.1,0.0,595.7,592.205,584.4,0.0,249.99,249.981,249.97,19851.0,19851.0,428.0,421.9,419.0,180.0,179.98,179.9,215.1,214.975,214.8,215.1,214.97,214.9,210.0,210.0,210.0,210.1,210.0,209.9,205.0,205.0,205.0,205.1,205.005,204.9,220.0,220.0,220.0,230.0,229.98,229.9,0.0,0.0,0.0,290.0,290.0,290.0,34.0,33.0,28.0,26.55,25.0,24.0,23.5,23.0,24.0,22.45,21.0,24.0,23.1,22.0,25.0,24.0,23.0,25.0,23.6,22.0,26.0,25.05,24.0,51.6,51.2,51.2,53.7,60.9,54.4,54.4,56.9,58.3,53.8,53.8,54.0,59.4,60.7,60.7,60.4,57.4,59.2,59.2,57.7,58.5,61.0,61.0,53.6,53.8,53.8,57.0,56.7,54.4,54.4,61.1,56.4,52.0,52.0,51.9,8.0,0.0,5.0,7.05,36.54,0.00115,9e-06,0.0,47.1,47.084906,47.0,34.0,30.849057,27.0,186.0,168.417241,151.2,194.9,188.755172,177.1,145.5,128.748276,119.7,198.0,193.296552,185.6,383.0,367.735849,353.0,38.89,42.82,43.92,35.34,72.55,0.0,0.0,0.0,0.0,0.0,2
2,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7815.0,7815.0,0.0,0.0,0.19,0.2,0.19,0.2,228.0,228.0,225.0,583.8,587.6,572.9,0.0,0.0,249.97,249.99,249.1,250.0,427.0,434.0,425.0,179.9,180.0,179.8,180.0,215.0,215.0,214.9,215.0,215.0,215.0,214.9,215.0,209.9,210.0,209.9,210.0,209.9,210.0,209.9,210.0,204.9,205.0,204.9,205.0,204.9,205.0,204.9,205.0,220.0,220.0,219.9,220.0,230.0,230.0,229.9,230.0,-70.0,70.0,8.23,0.0,0.0,0.0,0.0,0.0,0.0,51.4,51.4,51.1,48.9,55.5,55.5,52.5,47.3,55.2,55.2,49.7,48.8,49.6,55.2,55.2,50.2,50.2,52.6,52.6,52.0,53.2,50.9,52.3,52.3,58.7,55.4,50.5,50.5,50.1,50.9,51.8,59.4,52.6,52.6,54.9,53.2,0.0,0.0,0.0,0.0,290.0,290.0,290.0,290.0,40.5,999.0,35.0,35.0,3.0,3.0,0.0,0.0,0.0,...,50.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.221,0.220185,0.22,3.2e-05,2.9e-05,2.7e-05,0.000991,0.000972,0.000952,0.00014,0.000127,0.000112,3.6e-05,3.1e-05,2.8e-05,2.8e-05,2.4e-05,2.1e-05,0.193,0.192556,0.19,0.0,0.0,0.0,1.1,1.1,1.1,1.1,1.1,1.1,1.2,1.125926,1.1,0.0,587.5,584.322222,576.9,0.0,249.99,249.981482,249.98,7813.0,7813.0,433.0,427.703704,425.0,180.1,179.985185,179.8,215.1,215.011111,215.0,215.0,215.0,215.0,210.0,209.985185,209.9,210.1,209.981482,209.9,205.0,204.996296,204.9,205.0,205.0,205.0,220.1,220.003704,220.0,230.1,229.992593,229.9,0.0,0.0,0.0,290.0,290.0,290.0,37.1,31.6,25.0,23.148148,21.0,24.0,23.0,22.0,25.0,23.185185,22.0,25.0,23.925926,22.0,24.0,23.111111,22.0,25.0,23.37037,22.0,26.0,23.962963,22.0,49.7,50.9,50.9,51.8,59.4,52.6,52.6,54.9,53.2,51.4,51.4,51.1,48.9,55.5,55.5,52.5,47.3,55.2,55.2,48.8,49.6,55.2,55.2,50.2,52.6,52.6,52.0,53.2,52.3,52.3,58.7,55.4,50.5,50.5,50.1,9.04,0.0,5.02,6.52,40.05,0.000332,3.3e-05,0.0,47.2,47.088679,47.0,35.0,30.603774,27.0,181.4,176.137931,167.4,194.5,187.310345,183.6,128.0,115.365517,104.0,193.4,179.82069,165.5,383.0,367.320755,353.0,39.19,36.65,42.47,36.53,78.35,0.0,0.0,0.0,0.0,0.0,1
3,3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19856.0,19856.0,0.2,0.21,0.2,0.2,414.0,414.0,414.0,589.8,596.1,562.2,0.0,0.0,249.97,249.99,243.26,250.0,423.0,433.0,419.0,179.9,180.0,179.9,180.0,215.0,215.0,214.9,215.0,215.0,215.0,214.9,215.0,210.0,210.1,210.0,210.0,210.0,210.0,209.9,210.0,204.9,205.0,204.9,205.0,205.0,205.0,204.9,205.0,220.0,220.1,220.0,220.0,229.9,230.0,229.9,230.0,-70.0,70.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,53.8,53.8,54.0,59.4,60.7,60.7,60.4,57.4,59.2,59.2,51.6,57.7,58.5,61.0,61.0,59.8,53.6,53.8,53.8,57.0,56.7,51.2,54.4,54.4,61.1,56.4,52.0,52.0,51.9,51.2,53.7,60.9,54.4,54.4,56.9,58.3,0.0,0.0,0.0,0.0,290.0,290.0,290.0,290.0,40.5,999.0,36.0,36.0,3.0,6.0,0.0,0.0,0.0,...,51.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.435,0.43463,0.433,2.9e-05,1.4e-05,9e-06,0.00413,0.0041,0.00405,7.8e-05,7.6e-05,7.4e-05,3.4e-05,3.2e-05,3e-05,7e-06,5e-06,3e-06,0.2,0.199333,0.197,2.0,1.993333,1.97,1.1,1.1,1.1,1.0,1.0,1.0,1.2,1.148148,1.1,0.0,596.1,592.1,584.4,0.0,249.99,249.981852,249.97,19854.0,19854.0,427.0,421.925926,419.0,180.0,179.97037,179.9,215.1,215.011111,214.9,215.1,215.007407,214.9,210.0,210.0,210.0,210.1,209.996296,209.9,205.1,205.0,204.9,205.1,205.0,204.9,220.1,220.003704,220.0,230.1,230.018518,229.9,0.0,0.0,0.0,290.0,290.0,290.0,37.3,32.8,29.0,26.962963,25.0,25.0,23.666667,22.0,24.0,22.444444,21.0,24.0,23.185185,22.0,25.0,24.037037,23.0,25.0,23.592593,22.0,26.0,25.037037,23.0,51.6,51.2,51.2,53.7,60.9,54.4,54.4,56.9,58.3,53.8,53.8,54.0,59.4,60.7,60.7,60.4,57.4,59.2,59.2,57.7,58.5,61.0,61.0,53.6,53.8,53.8,57.0,56.7,54.4,54.4,61.1,56.4,52.0,52.0,51.9,7.49,0.0,5.01,7.03,40.03,0.00121,8e-06,0.0,47.2,47.10566,46.9,38.0,32.698113,27.0,180.5,173.637931,166.7,193.0,187.203448,179.5,126.2,112.082759,94.5,190.3,181.92069,165.8,384.0,369.188679,353.0,37.74,39.17,52.17,30.58,71.78,0.0,0.0,0.0,0.0,0.0,2
4,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7817.0,7817.0,0.0,0.0,0.19,0.2,0.18,0.2,228.0,228.0,225.0,583.2,587.3,565.9,0.0,0.0,249.96,249.99,246.46,250.0,428.0,434.0,426.0,180.0,180.0,179.8,180.0,215.0,215.1,215.0,215.0,214.9,215.0,214.9,215.0,210.0,210.1,210.0,210.0,209.9,210.0,209.9,210.0,204.9,205.0,204.9,205.0,204.9,205.0,204.9,205.0,219.9,220.0,219.9,220.0,230.0,230.0,229.9,230.0,-70.0,70.0,8.23,0.0,0.0,0.0,0.0,0.0,0.0,51.4,51.4,51.1,48.9,55.5,55.5,52.5,47.3,55.2,55.2,49.7,48.8,49.6,55.2,55.2,50.2,50.2,52.6,52.6,52.0,53.2,50.9,52.3,52.3,58.7,55.4,50.5,50.5,50.1,50.9,51.8,59.4,52.6,52.6,54.9,53.2,0.0,0.0,0.0,0.0,288.0,290.0,46.0,290.0,40.6,999.0,35.0,35.0,3.0,3.0,0.0,0.0,0.0,...,50.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.22,0.218111,0.213,3.4e-05,2.9e-05,2.4e-05,0.000987,0.00096,0.000929,0.000121,0.000116,0.000108,3.7e-05,3.3e-05,3e-05,3.3e-05,2.6e-05,2e-05,0.193,0.19063,0.183,0.0,0.0,0.0,1.1,1.1,1.1,1.1,1.1,1.1,1.2,1.122222,1.1,0.0,587.2,583.388889,576.4,0.0,249.99,249.982222,249.97,7815.0,7815.0,433.0,428.407407,426.0,180.1,180.007407,179.9,215.0,215.0,215.0,215.0,214.992593,214.9,210.1,210.003704,210.0,210.1,210.0,209.9,205.0,204.996296,204.9,205.0,204.992593,204.9,220.0,219.992593,219.9,230.1,230.007407,229.9,0.0,0.0,0.0,291.0,289.0,262.0,37.2,31.6,25.0,23.185185,21.0,24.0,22.925926,22.0,25.0,23.111111,22.0,25.0,23.888889,22.0,24.0,23.037037,22.0,25.0,23.296296,22.0,26.0,24.0,22.0,49.7,50.9,50.9,51.8,59.4,52.6,52.6,54.9,53.2,51.4,51.4,51.1,48.9,55.5,55.5,52.5,47.3,55.2,55.2,48.8,49.6,55.2,55.2,50.2,52.6,52.6,52.0,53.2,52.3,52.3,58.7,55.4,50.5,50.5,50.1,9.04,0.0,5.03,6.52,40.07,0.000334,4.1e-05,0.0,47.2,47.107407,47.0,36.0,32.888889,28.0,184.6,175.366667,167.6,198.1,194.546667,188.0,152.1,138.066667,109.7,208.6,196.393333,182.6,383.0,367.351852,352.0,38.7,41.89,46.93,33.09,76.97,0.0,0.0,0.0,0.0,0.0,1


In [10]:
reg = setup(data = train, target = 'Y_Class', 
            silent=True, session_id = SEED,
            use_gpu=True,
            remove_outliers=False,
            fold=5,
            categorical_features=cate_feature,
            train_size=0.8)

MemoryError: Unable to allocate 7.68 GiB for an array with shape (2, 515153809) and data type object

## Classification Model Fit

In [None]:
best5_model = compare_models(fold = 10, n_select = 3)
print('Done.')

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.7365,0.7732,0.529,0.751,0.6962,0.3579,0.4068,0.38
et,Extra Trees Classifier,0.7363,0.7782,0.5393,0.7378,0.7002,0.3658,0.4021,0.371
catboost,CatBoost Classifier,0.7322,0.7553,0.5287,0.7251,0.6962,0.3582,0.3981,21.971
gbc,Gradient Boosting Classifier,0.7258,0.7339,0.5278,0.7073,0.6931,0.3599,0.3872,2.674
knn,K Neighbors Classifier,0.7238,0.7444,0.5317,0.7079,0.695,0.3636,0.3852,0.487
lightgbm,Light Gradient Boosting Machine,0.7154,0.7264,0.5178,0.7058,0.6808,0.3275,0.357,1.794
xgboost,Extreme Gradient Boosting,0.7008,0.7137,0.5032,0.6844,0.6674,0.3028,0.3285,1.124
ada,Ada Boost Classifier,0.6839,0.6586,0.4832,0.664,0.6539,0.2783,0.2941,0.261
dummy,Dummy Classifier,0.6674,0.5,0.3333,0.4455,0.5343,0.0,0.0,0.007
svm,SVM - Linear Kernel,0.657,0.0,0.4791,0.6385,0.6348,0.2492,0.2581,0.032


Done.


In [None]:
tune_models = []
for model in best5_model:
    tune_models.append(tune_model(model,fold=5,n_iter=15))

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7188,0.6952,0.4857,0.721,0.666,0.2953,0.3607
1,0.7708,0.8344,0.5451,0.7651,0.7304,0.4356,0.4911
2,0.7188,0.7995,0.5112,0.6884,0.6912,0.3571,0.3728
3,0.7263,0.7393,0.5016,0.7069,0.6876,0.3179,0.3551
4,0.6947,0.77,0.5562,0.682,0.6849,0.3361,0.3395
Mean,0.7259,0.7677,0.52,0.7127,0.692,0.3484,0.3838
Std,0.0249,0.048,0.0266,0.0296,0.0211,0.0481,0.0547


In [None]:
# tune model 로 blend 아래 boosting 결과 다음 실행 예정

In [None]:
blended = blend_models(estimator_list = tune_models,fold=5)
# ensemble = ensemble_model (blended, fold = 5, method = 'Boosting')
final_model = finalize_model(blended)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7083,0.6816,0.454,0.7697,0.6354,0.2316,0.3321
1,0.75,0.8188,0.5033,0.7586,0.6953,0.3514,0.4371
2,0.6875,0.7908,0.4642,0.6634,0.6575,0.2777,0.2929
3,0.6947,0.748,0.4386,0.6735,0.6408,0.2083,0.249
4,0.7263,0.7735,0.5574,0.7199,0.7064,0.3701,0.3853
Mean,0.7134,0.7625,0.4835,0.717,0.6671,0.2878,0.3393
Std,0.0226,0.0466,0.0427,0.0431,0.0288,0.0639,0.0664


In [None]:
f1 = finalize_model(model)
f2 = finalize_model(tune)

## Inference

In [None]:
preds = predict_model(f2, data = test_x)['Label']
print('Done.')

Done.


In [None]:
for i in range(len(train_y.unique())):
    print(len(preds[preds ==i]))

10
277
23


## Submit

In [None]:
current_datetime = datetime.now()
formatted_datetime = current_datetime.strftime("%Y_%m_%d_%H_%M")
os.makedirs('./result',exist_ok=True)
submit = pd.read_csv('./sample_submission.csv')
submit['Y_Class'] = preds
submit.to_csv(f'./result/baseline_submission_{formatted_datetime}.csv', index=False)