In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings(action='ignore')

pd.options.display.max_rows = 50
pd.options.display.max_columns = 100

train_df = pd.read_csv('pure_.csv')
train_edit_df = pd.read_csv('pure_가공.csv')

variables1 = train_df.drop('voted', axis=1)
predictor1 = train_df['voted']

variables2 = train_edit_df.drop('voted', axis=1)
predictor2 = train_edit_df['voted']
    
x_train1, x_test1, y_train1, y_test1 = train_test_split(variables1, predictor1)
x_train2, x_test2, y_train2, y_test2 = train_test_split(variables2, predictor2)

In [35]:
import numpy as np
from sklearn.decomposition import PCA

def pca_function(data, p):
    X_cen = data - data.mean(axis=0)
    X_cov = np.dot(X_cen.T, X_cen) / 59
    w, v = np.linalg.eig(X_cov)
    explained_variance_ratio= w / w.sum()
    cumsum = np.cumsum(explained_variance_ratio)
    d = np.argmax(cumsum >= p) + 1
    
    pca = PCA(n_components=d)
    data_pca = pca.fit(data)
    print('x_pca_train:',data_pca)

pca_function(variables1, 0.90)
pca_function(variables1, 0.95)
pca_function(variables1, 0.99)
pca_function(variables2, 0.90)
pca_function(variables2, 0.95)
pca_function(variables2, 0.99)

x_pca_train: PCA(n_components=3)
x_pca_train: PCA(n_components=5)
x_pca_train: PCA(n_components=8)
x_pca_train: PCA(n_components=4)
x_pca_train: PCA(n_components=5)
x_pca_train: PCA(n_components=9)


In [36]:
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, roc_auc_score
import statsmodels.api as sm

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, BaggingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline, make_pipeline

In [45]:
def star_print(word):
    
    print('                              ****************************************************************')
    print('                              **************** {0:^30} ****************'.format(word))
    print('                              ****************************************************************')
    print(' ')

def scores(model, scaler, y_test, y_pred, pca_print, edit_print):
    
    if edit_print == 0:
        edit = 'Raw Data       '
    else:
        edit = 'Processed Data '
        
    model_name = str(model)[:str(model).find('(')]
    
    if pca_print == 4:
        pca_num = 90
    elif pca_print == 5:
        pca_num = 95
    else:
        pca_num = 99
    pca_percent = ' & PCA(' + str(pca_num) + '%)'
    
    if pca_print == 0:
        print(edit + '[ {0:<55}] f1 score : {1:0.04f}, acc : {2:0.04f}, auc_score : {3:0.4f}'
          .format(model_name + ' / ' + str(scaler)[:-2], f1_score(y_test, y_pred)
                  , accuracy_score(y_test, y_pred), roc_auc_score(y_test, y_pred)))
    else:
        print(edit + '[ {0:<55}] f1 score : {1:0.04f}, acc : {2:0.04f}, auc_score : {3:0.4f}'
          .format(model_name + ' / ' + str(scaler)[:-2] + pca_percent, f1_score(y_test, y_pred)
                  , accuracy_score(y_test, y_pred), roc_auc_score(y_test, y_pred)))
        
    return f1_score(y_test, y_pred), accuracy_score(y_test, y_pred), roc_auc_score(y_test, y_pred)
    
    
def run_model(model):
    
    scalers_list = ['NoScaler()', StandardScaler(), RobustScaler(), MinMaxScaler()]
    acc_max = []
    
    star_print(str(model)[:str(model).find('(')])
    
    for scaler in scalers_list:
        for pca_check in [0, 4, 5, 9]:
            for edit_check in range(2):
                
                in_model = model
                    
                if pca_check == 0:
                    if scaler == 'NoScaler()':
                        pipeline = make_pipeline(in_model)
                    else:
                        pipeline = make_pipeline(scaler, in_model)
                else:
                    if scaler == 'NoScaler()':
                        pipeline = make_pipeline(PCA(n_components=pca_check), in_model)
                    else:
                        pipeline = make_pipeline(scaler, PCA(n_components=pca_check), in_model)
                        
                if edit_check == 0:
                    x_train, y_train, x_test, y_test = x_train1, y_train1, x_test1, y_test1
                else:
                    x_train, y_train, x_test, y_test = x_train2, y_train2, x_test2, y_test2
                
                pipeline.fit(x_train, y_train)
                y_pred = pipeline.predict(x_test)
                                
                if str(model)[:3] == 'Rid' or str(model)[:3] == 'Las' or str(model)[:3] == 'Ela' or str(model)[:3] == 'Lin':
                    y_pred = [1 if i > 0.5 else 0 for i in y_pred]

                f1, acc, auc = scores(model, scaler, y_test, y_pred, pca_check, edit_check)
                
                if len(acc_max) == 0 or acc_max[0] <= acc:
                    acc_max = []
                    acc_max.append(acc)
                    acc_max.append(f1)
                    acc_max.append(auc)
                    acc_max.append(model)
                    acc_max.append(scaler)
                    acc_max.append(pca_check)
                    acc_max.append(edit_check)
    
    print(' ')
    print(' ')
    
    return acc_max


def print_max_list(max_list):
    
    star_print('S U M M A R Y')
    
    for i in max_list:
        
        if i[5] == 0:
            pca_percent = 'PCA( X )'
        else:
            if i[5] == 4:
                pca_num = 90
            elif i[5] == 5:
                pca_num = 95
            else:
                pca_num = 99        
            pca_percent = 'PCA(' + str(pca_num) + '%)'
        
        if i[6] == 0:
            is_edit = 'Raw Data'
        else:
            is_edit = 'Processed Data'
            
        print('[ {0:<28} / {1:<14} / {2:<14} / {3} ] f1 score : {4:0.04f}, acc : {5:0.04f}, auc_score : {6:0.4f}'.format(
            str(i[3])[:str(i[3]).find('(')], str(i[4])[:-2], is_edit, pca_percent, i[1], i[0], i[2]))

In [38]:
models_list = [LinearRegression(), Ridge(), Lasso(), ElasticNet()
               , LogisticRegression(), GaussianNB(), LinearDiscriminantAnalysis(), LinearSVC()
               , DecisionTreeClassifier(), KNeighborsClassifier(), RandomForestClassifier()
               , AdaBoostClassifier(), GradientBoostingClassifier(), BaggingClassifier()
               , LGBMClassifier(), XGBClassifier(verbosity=0)]

max_list = []

for model in models_list:
    max_list.append(run_model(model))
    
print_max_list(max_list)

                              ****************************************************************
                              ****************        LinearRegression        ****************
                              ****************************************************************
 
Raw Data       [ LinearRegression / NoScaler                            ] f1 score : 0.6248, acc : 0.6727, auc_score : 0.6666
Processed Data [ LinearRegression / NoScaler                            ] f1 score : 0.6259, acc : 0.6745, auc_score : 0.6683
Raw Data       [ LinearRegression / NoScaler & PCA(90%)                 ] f1 score : 0.0000, acc : 0.5495, auc_score : 0.5000
Processed Data [ LinearRegression / NoScaler & PCA(90%)                 ] f1 score : 0.5557, acc : 0.6441, auc_score : 0.6319
Raw Data       [ LinearRegression / NoScaler & PCA(95%)                 ] f1 score : 0.0004, acc : 0.5494, auc_score : 0.4999
Processed Data [ LinearRegression / NoScaler & PCA(95%)                 ] f1 score 

Raw Data       [ Ridge / MinMaxScaler & PCA(99%)                        ] f1 score : 0.5420, acc : 0.6393, auc_score : 0.6244
Processed Data [ Ridge / MinMaxScaler & PCA(99%)                        ] f1 score : 0.5202, acc : 0.6356, auc_score : 0.6198
 
 
                              ****************************************************************
                              ****************             Lasso              ****************
                              ****************************************************************
 
Raw Data       [ Lasso / NoScaler                                       ] f1 score : 0.4197, acc : 0.6147, auc_score : 0.5872
Processed Data [ Lasso / NoScaler                                       ] f1 score : 0.4204, acc : 0.6144, auc_score : 0.5905
Raw Data       [ Lasso / NoScaler & PCA(90%)                            ] f1 score : 0.0000, acc : 0.5495, auc_score : 0.5000
Processed Data [ Lasso / NoScaler & PCA(90%)                            ] f1 sc

Raw Data       [ ElasticNet / MinMaxScaler & PCA(95%)                   ] f1 score : 0.0000, acc : 0.5495, auc_score : 0.5000
Processed Data [ ElasticNet / MinMaxScaler & PCA(95%)                   ] f1 score : 0.0000, acc : 0.5419, auc_score : 0.5000
Raw Data       [ ElasticNet / MinMaxScaler & PCA(99%)                   ] f1 score : 0.0000, acc : 0.5495, auc_score : 0.5000
Processed Data [ ElasticNet / MinMaxScaler & PCA(99%)                   ] f1 score : 0.0000, acc : 0.5419, auc_score : 0.5000
 
 
                              ****************************************************************
                              ****************       LogisticRegression       ****************
                              ****************************************************************
 
Raw Data       [ LogisticRegression / NoScaler                          ] f1 score : 0.6471, acc : 0.6480, auc_score : 0.6541
Processed Data [ LogisticRegression / NoScaler                          ] f1 sc

Raw Data       [ GaussianNB / MinMaxScaler & PCA(90%)                   ] f1 score : 0.5518, acc : 0.6349, auc_score : 0.6226
Processed Data [ GaussianNB / MinMaxScaler & PCA(90%)                   ] f1 score : 0.5130, acc : 0.6344, auc_score : 0.6178
Raw Data       [ GaussianNB / MinMaxScaler & PCA(95%)                   ] f1 score : 0.5547, acc : 0.6396, auc_score : 0.6268
Processed Data [ GaussianNB / MinMaxScaler & PCA(95%)                   ] f1 score : 0.5119, acc : 0.6341, auc_score : 0.6175
Raw Data       [ GaussianNB / MinMaxScaler & PCA(99%)                   ] f1 score : 0.5549, acc : 0.6393, auc_score : 0.6267
Processed Data [ GaussianNB / MinMaxScaler & PCA(99%)                   ] f1 score : 0.5094, acc : 0.6308, auc_score : 0.6144
 
 
                              ****************************************************************
                              ****************   LinearDiscriminantAnalysis   ****************
                              ********************

Processed Data [ LinearSVC / RobustScaler & PCA(99%)                    ] f1 score : 0.6076, acc : 0.6633, auc_score : 0.6560
Raw Data       [ LinearSVC / MinMaxScaler                               ] f1 score : 0.6254, acc : 0.6730, auc_score : 0.6670
Processed Data [ LinearSVC / MinMaxScaler                               ] f1 score : 0.6259, acc : 0.6743, auc_score : 0.6682
Raw Data       [ LinearSVC / MinMaxScaler & PCA(90%)                    ] f1 score : 0.5561, acc : 0.6381, auc_score : 0.6259
Processed Data [ LinearSVC / MinMaxScaler & PCA(90%)                    ] f1 score : 0.5180, acc : 0.6348, auc_score : 0.6189
Raw Data       [ LinearSVC / MinMaxScaler & PCA(95%)                    ] f1 score : 0.5419, acc : 0.6410, auc_score : 0.6258
Processed Data [ LinearSVC / MinMaxScaler & PCA(95%)                    ] f1 score : 0.5173, acc : 0.6346, auc_score : 0.6186
Raw Data       [ LinearSVC / MinMaxScaler & PCA(99%)                    ] f1 score : 0.5422, acc : 0.6395, auc_score :

Raw Data       [ KNeighborsClassifier / RobustScaler & PCA(95%)         ] f1 score : 0.4363, acc : 0.5100, auc_score : 0.5020
Processed Data [ KNeighborsClassifier / RobustScaler & PCA(95%)         ] f1 score : 0.5969, acc : 0.6352, auc_score : 0.6317
Raw Data       [ KNeighborsClassifier / RobustScaler & PCA(99%)         ] f1 score : 0.4490, acc : 0.5085, auc_score : 0.5027
Processed Data [ KNeighborsClassifier / RobustScaler & PCA(99%)         ] f1 score : 0.6038, acc : 0.6424, auc_score : 0.6387
Raw Data       [ KNeighborsClassifier / MinMaxScaler                    ] f1 score : 0.5587, acc : 0.6157, auc_score : 0.6089
Processed Data [ KNeighborsClassifier / MinMaxScaler                    ] f1 score : 0.6038, acc : 0.6451, auc_score : 0.6409
Raw Data       [ KNeighborsClassifier / MinMaxScaler & PCA(90%)         ] f1 score : 0.5523, acc : 0.6053, auc_score : 0.5994
Processed Data [ KNeighborsClassifier / MinMaxScaler & PCA(90%)         ] f1 score : 0.6096, acc : 0.6468, auc_score :

Processed Data [ AdaBoostClassifier / RobustScaler                      ] f1 score : 0.6841, acc : 0.6929, auc_score : 0.6954
Raw Data       [ AdaBoostClassifier / RobustScaler & PCA(90%)           ] f1 score : 0.1899, acc : 0.5458, auc_score : 0.5073
Processed Data [ AdaBoostClassifier / RobustScaler & PCA(90%)           ] f1 score : 0.6491, acc : 0.6699, auc_score : 0.6696
Raw Data       [ AdaBoostClassifier / RobustScaler & PCA(95%)           ] f1 score : 0.1886, acc : 0.5457, auc_score : 0.5071
Processed Data [ AdaBoostClassifier / RobustScaler & PCA(95%)           ] f1 score : 0.6463, acc : 0.6692, auc_score : 0.6684
Raw Data       [ AdaBoostClassifier / RobustScaler & PCA(99%)           ] f1 score : 0.2796, acc : 0.5504, auc_score : 0.5182
Processed Data [ AdaBoostClassifier / RobustScaler & PCA(99%)           ] f1 score : 0.6422, acc : 0.6706, auc_score : 0.6687
Raw Data       [ AdaBoostClassifier / MinMaxScaler                      ] f1 score : 0.6822, acc : 0.6918, auc_score :

Raw Data       [ BaggingClassifier / StandardScaler & PCA(99%)          ] f1 score : 0.5507, acc : 0.6273, auc_score : 0.6164
Processed Data [ BaggingClassifier / StandardScaler & PCA(99%)          ] f1 score : 0.5763, acc : 0.6418, auc_score : 0.6333
Raw Data       [ BaggingClassifier / RobustScaler                       ] f1 score : 0.5873, acc : 0.6434, auc_score : 0.6362
Processed Data [ BaggingClassifier / RobustScaler                       ] f1 score : 0.5867, acc : 0.6428, auc_score : 0.6359
Raw Data       [ BaggingClassifier / RobustScaler & PCA(90%)            ] f1 score : 0.3738, acc : 0.5230, auc_score : 0.5043
Processed Data [ BaggingClassifier / RobustScaler & PCA(90%)            ] f1 score : 0.5610, acc : 0.6286, auc_score : 0.6200
Raw Data       [ BaggingClassifier / RobustScaler & PCA(95%)            ] f1 score : 0.3833, acc : 0.5293, auc_score : 0.5109
Processed Data [ BaggingClassifier / RobustScaler & PCA(95%)            ] f1 score : 0.5701, acc : 0.6320, auc_score :

Processed Data [ XGBClassifier / StandardScaler & PCA(90%)              ] f1 score : 0.6575, acc : 0.6751, auc_score : 0.6755
Raw Data       [ XGBClassifier / StandardScaler & PCA(95%)              ] f1 score : 0.6054, acc : 0.6469, auc_score : 0.6428
Processed Data [ XGBClassifier / StandardScaler & PCA(95%)              ] f1 score : 0.6519, acc : 0.6721, auc_score : 0.6719
Raw Data       [ XGBClassifier / StandardScaler & PCA(99%)              ] f1 score : 0.6206, acc : 0.6562, auc_score : 0.6533
Processed Data [ XGBClassifier / StandardScaler & PCA(99%)              ] f1 score : 0.6525, acc : 0.6733, auc_score : 0.6730
Raw Data       [ XGBClassifier / RobustScaler                           ] f1 score : 0.6611, acc : 0.6754, auc_score : 0.6779
Processed Data [ XGBClassifier / RobustScaler                           ] f1 score : 0.6717, acc : 0.6829, auc_score : 0.6849
Raw Data       [ XGBClassifier / RobustScaler & PCA(90%)                ] f1 score : 0.3579, acc : 0.5325, auc_score :

# Result

In [46]:
print_max_list(max_list)

                              ****************************************************************
                              ****************         S U M M A R Y          ****************
                              ****************************************************************
 
[ LinearRegression             / StandardScaler / Processed Data / PCA( X ) ] f1 score : 0.6279, acc : 0.6760, auc_score : 0.6699
[ Ridge                        / RobustScaler   / Processed Data / PCA( X ) ] f1 score : 0.6259, acc : 0.6745, auc_score : 0.6683
[ Lasso                        / NoScaler       / Raw Data       / PCA( X ) ] f1 score : 0.4197, acc : 0.6147, auc_score : 0.5872
[ ElasticNet                   / NoScaler       / Processed Data / PCA(99%) ] f1 score : 0.5800, acc : 0.6482, auc_score : 0.6391
[ LogisticRegression           / NoScaler       / Processed Data / PCA( X ) ] f1 score : 0.6317, acc : 0.6779, auc_score : 0.6721
[ GaussianNB                   / RobustScaler   / Processed Dat

In [52]:
def run_model_cv(max_model_list, params):
    
    reg_check = ['Lin', 'Rid', 'Las', 'Ela']
    model, scaler, is_pca, is_edit = max_model_list[3], max_model_list[4], max_model_list[5], max_model_list[6]
    
    print(' ')
    print('*** {0} ***'.format(str(model)[:str(model).find('(')]))  
    
    if str(model)[:3] in reg_check:
        in_model = GridSearchCV(model, param_grid=params, cv=5, n_jobs = -1, verbose=1)
    else:
        in_model = GridSearchCV(model, param_grid=params, cv=5, scoring='accuracy', n_jobs = -1, verbose=1)
    
    if is_pca == 0:
        if scaler == 'NoScaler()':
            pipeline = make_pipeline(in_model)
        else:
            pipeline = make_pipeline(scaler, in_model)
    else:
        if scaler == 'NoScaler()':
            pipeline = make_pipeline(PCA(n_components=is_pca), in_model)
        else:
            pipeline = make_pipeline(scaler, PCA(n_components=is_pca), in_model)
            
    if is_edit == 0:
        x_train, y_train, x_test, y_test = x_train1, y_train1, x_test1, y_test1
    else:
        x_train, y_train, x_test, y_test = x_train2, y_train2, x_test2, y_test2
        
    pipeline.fit(x_train, y_train)
    y_pred = pipeline.predict(x_test)
    
    if str(model)[:3] in reg_check:
        y_pred = [1 if i > 0.5 else 0 for i in y_pred]
        
    f1 = f1_score(y_test, y_pred)
    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred)

    return f1, acc, auc, model, scaler, is_pca, is_edit    


def print_max_cv_list(max_list):
    
    print(' ')
    star_print('After Grid Search')
    
    for i in max_list:
        
        if i[5] == 0:
            pca_percent = 'PCA( X )'
        else:
            if i[5] == 4:
                pca_num = 90
            elif i[5] == 5:
                pca_num = 95
            else:
                pca_num = 99        
            pca_percent = 'PCA(' + str(pca_num) + '%)'
            
        if i[6] == 0:
            is_edit = 'Raw Data'
        else:
            is_edit = 'Processed Data'
            
        print('[ {0:<28} / {1:<14} / {2:<14} / {3} ] f1 score : {4:0.04f}, acc : {5:0.04f}, auc_score : {6:0.4f}'.format(
            str(i[3])[:str(i[3]).find('(')], str(i[4])[:-2], is_edit, pca_percent, i[0], i[1], i[2]))

In [48]:
param_lix = ['lix', {'alpha': [0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100, 1000]}]
param_ada = ['ada', {'n_estimators':list(range(100, 600, 100)),
               'algorithm':['SAMME.R'],
               'learning_rate':[0.3, 0.5, 0.7]
             }]
param_ran = ['ran', {'n_estimators':list(range(100, 600, 100)),
               'max_depth': [2, 3, 4],
               'min_samples_split': [2, 5, 10, 100],
               'min_samples_leaf': [1, 4, 8],
             }]
param_gra = ['gra', {'n_estimators' : [100, 500],
                'max_depth' : [6, 9],
                'learning_rate' : [0.01, 0.1]
            }]
param_bag = ['bag', {'n_estimators':list(range(100, 600, 100)),
               'max_samples':[100, 200],
               'bootstrap': [True, False],
            }]
param_lgb = ['lgb', {'n_estimators':list(range(100, 600, 100)),
               'max_depth':[-1, 5, 10],
               'learning_rate': [0.1, 0.4, 0.7],
            }]
param_xgb = ['xgb', {'n_estimators':list(range(100, 600, 100)),
               'max_depth':[1, 5, 10],
               'learning_rate': [0.1, 0.4, 0.7],
            }]
param_svc = ['svc', {'C':[0.1, 0.5, 1, 5, 10]}]

param_s = [param_lix, param_ada, param_ran, param_gra, param_bag, param_lgb, param_xgb]
param_str_s = []
for param in param_s:
    param_str_s.append(param[0])
lin_s = ['rid', 'las', 'ela']

In [49]:
max_cv_list = []
for i in max_list:
    param_use = 'param_' + str(i[3])[:3].lower()
    
    for idx, param in enumerate(param_str_s):ㅇ
        if str(i[3])[:3].lower() in lin_s:
            max_cv_list.append(run_model_cv(i, param_s[0][1]))
            break
        elif str(i[3])[:3].lower() == param:
            max_cv_list.append(run_model_cv(i, param_s[idx][1]))
            
max_cv_list.append(run_model_cv(max_list[7], param_svc[1])) # LinearSVC

print_max_cv_list(max_cv_list)

 
*** Ridge ***
Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    2.6s finished


 
*** Lasso ***
Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    1.9s finished


 
*** ElasticNet ***
Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.1s finished


 
*** RandomForestClassifier ***
Fitting 5 folds for each of 180 candidates, totalling 900 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    9.7s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 426 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 776 tasks      | elapsed:  5.0min
[Parallel(n_jobs=-1)]: Done 900 out of 900 | elapsed:  6.0min finished


 
*** AdaBoostClassifier ***
Fitting 5 folds for each of 15 candidates, totalling 75 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:   57.4s
[Parallel(n_jobs=-1)]: Done  75 out of  75 | elapsed:  2.6min finished


 
*** GradientBoostingClassifier ***
Fitting 5 folds for each of 8 candidates, totalling 40 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:  2.8min finished


 
*** BaggingClassifier ***
Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:  5.3min finished


 
*** LGBMClassifier ***
Fitting 5 folds for each of 45 candidates, totalling 225 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:   15.7s
[Parallel(n_jobs=-1)]: Done 225 out of 225 | elapsed:   20.5s finished


 
*** XGBClassifier ***
Fitting 5 folds for each of 45 candidates, totalling 225 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:   12.5s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:  4.0min
[Parallel(n_jobs=-1)]: Done 225 out of 225 | elapsed:  5.6min finished


 
*** LinearSVC ***
Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 out of  25 | elapsed:   22.8s finished


 
                              ****************************************************************
                              ****************       After Grid Search        ****************
                              ****************************************************************
 
[ Ridge                        / RobustScaler   / Processed Data / PCA( X ) ] f1 score : 0.6741, acc : 0.6254, auc_score : 0.6679
[ Lasso                        / NoScaler       / Raw Data       / PCA( X ) ] f1 score : 0.6564, acc : 0.5963, auc_score : 0.6480
[ ElasticNet                   / NoScaler       / Processed Data / PCA(99%) ] f1 score : 0.6561, acc : 0.6014, auc_score : 0.6491
[ RandomForestClassifier       / MinMaxScaler   / Processed Data / PCA( X ) ] f1 score : 0.6870, acc : 0.6657, auc_score : 0.6865
[ AdaBoostClassifier           / MinMaxScaler   / Processed Data / PCA( X ) ] f1 score : 0.6953, acc : 0.6867, auc_score : 0.6979
[ GradientBoostingClassifier   / MinMaxScaler   / Processed D