# 데이터 로드 및 전처리

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import matplotlib

#한글꺠짐 방지
matplotlib.rcParams['font.family'] ='Malgun Gothic'
matplotlib.rcParams['axes.unicode_minus'] =False

In [2]:
data=pd.read_csv("./data/class_balance.csv",encoding="EUC-KR")

In [3]:
data['Y'] = data['Y'].map(lambda x: 0 if x == -1 else 1)

In [4]:
# X, Y 분할
Y=data["Y"].copy()
X=data.drop("Y",axis=1)
X.head(3)

# X
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=22,shuffle =True)

In [5]:
# 평가 지표 출력 함수
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
from sklearn.metrics import confusion_matrix

def print_all_reg(Y_test,pred):
    # Specificity를 구하기 위해 confusion matrix를 이용
    cm1 = confusion_matrix(Y_test,pred)
    specificity1 = cm1[0,0]/(cm1[0,0]+cm1[0,1])
    
    #결과 검사
    #recall = cm1[1,1]/(cm1[1,1]+cm1[1,0])
    #pre = cm1[1,1]/(cm1[1,1]+cm1[0,1])

    G_mean = recall_score(Y_test,pred) * specificity1
    
    print("model의 recall 값은 {:.3f}".format(recall_score(Y_test,pred)))
    print("model의 2종 오류 확률 값은 {:.3f}".format(1-recall_score(Y_test,pred)))
    print("model의 Specificity 값은 {:.3f}".format(specificity1))
    print("model의 1종 오류 확률 값은 {:.3f}".format(1-specificity1))
    print("model의 precision 값은 {:.3f}".format(precision_score(Y_test,pred)))
    print("model의 f1_score 값은 {:.3f}".format(f1_score(Y_test,pred)))
    print("model의 G-mean 값은 {:.3f}".format(np.sqrt(G_mean)))
    print("model의 accuracy 값은 {:.3f}".format(accuracy_score(Y_test,pred)))


In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=7,shuffle =True)

In [51]:

from xgboost import XGBClassifier
eval_set = [(X_test, Y_test)]
n_esti = [100,500,1000]
train_scores =[]
test_scores =[]
for n_ in n_esti:

    clf = XGBClassifier(n_estimators = n_,
                        learning_rate = 0.01,
                        min_child_weight = 10,
                        colsample_bytree = 0.5,
                        max_depth = 8 , gamma = 3, reg_lambda = 0.8,eval_metric='logloss')

    clf.fit(X_train,Y_train,eval_set = eval_set,early_stopping_rounds = 20)

    preds_train = clf.predict(X_train)
    preds = clf.predict(X_test)
    
    print_all_reg(Y_train,preds_train)
    print()
    print_all_reg(Y_test,preds)
    
    train_scores.append(f1_score(Y_train,preds_train))
    test_scores.append(f1_score(Y_test,preds))

[0]	validation_0-logloss:0.68951
[1]	validation_0-logloss:0.68641
[2]	validation_0-logloss:0.68357
[3]	validation_0-logloss:0.68037
[4]	validation_0-logloss:0.67723
[5]	validation_0-logloss:0.67416
[6]	validation_0-logloss:0.67097
[7]	validation_0-logloss:0.66851
[8]	validation_0-logloss:0.66562
[9]	validation_0-logloss:0.66274
[10]	validation_0-logloss:0.65996
[11]	validation_0-logloss:0.65787
[12]	validation_0-logloss:0.65514
[13]	validation_0-logloss:0.65282
[14]	validation_0-logloss:0.65019
[15]	validation_0-logloss:0.64762
[16]	validation_0-logloss:0.64549
[17]	validation_0-logloss:0.64291
[18]	validation_0-logloss:0.64044
[19]	validation_0-logloss:0.63842
[20]	validation_0-logloss:0.63641
[21]	validation_0-logloss:0.63432
[22]	validation_0-logloss:0.63200
[23]	validation_0-logloss:0.63022
[24]	validation_0-logloss:0.62779
[25]	validation_0-logloss:0.62582
[26]	validation_0-logloss:0.62354
[27]	validation_0-logloss:0.62125
[28]	validation_0-logloss:0.61884
[29]	validation_0-loglos



[65]	validation_0-logloss:0.56399
[66]	validation_0-logloss:0.56289
[67]	validation_0-logloss:0.56117
[68]	validation_0-logloss:0.55902
[69]	validation_0-logloss:0.55759
[70]	validation_0-logloss:0.55591
[71]	validation_0-logloss:0.55475
[72]	validation_0-logloss:0.55356
[73]	validation_0-logloss:0.55281
[74]	validation_0-logloss:0.55208
[75]	validation_0-logloss:0.55136
[76]	validation_0-logloss:0.55065
[77]	validation_0-logloss:0.54955
[78]	validation_0-logloss:0.54822
[79]	validation_0-logloss:0.54756
[80]	validation_0-logloss:0.54615
[81]	validation_0-logloss:0.54569
[82]	validation_0-logloss:0.54377
[83]	validation_0-logloss:0.54265
[84]	validation_0-logloss:0.54205
[85]	validation_0-logloss:0.54100
[86]	validation_0-logloss:0.54038
[87]	validation_0-logloss:0.53852
[88]	validation_0-logloss:0.53792
[89]	validation_0-logloss:0.53634
[90]	validation_0-logloss:0.53494
[91]	validation_0-logloss:0.53430
[92]	validation_0-logloss:0.53276
[93]	validation_0-logloss:0.53162
[94]	validatio



[64]	validation_0-logloss:0.56482
[65]	validation_0-logloss:0.56399
[66]	validation_0-logloss:0.56289
[67]	validation_0-logloss:0.56117
[68]	validation_0-logloss:0.55902
[69]	validation_0-logloss:0.55759
[70]	validation_0-logloss:0.55591
[71]	validation_0-logloss:0.55475
[72]	validation_0-logloss:0.55356
[73]	validation_0-logloss:0.55281
[74]	validation_0-logloss:0.55208
[75]	validation_0-logloss:0.55136
[76]	validation_0-logloss:0.55065
[77]	validation_0-logloss:0.54955
[78]	validation_0-logloss:0.54822
[79]	validation_0-logloss:0.54756
[80]	validation_0-logloss:0.54615
[81]	validation_0-logloss:0.54569
[82]	validation_0-logloss:0.54377
[83]	validation_0-logloss:0.54265
[84]	validation_0-logloss:0.54205
[85]	validation_0-logloss:0.54100
[86]	validation_0-logloss:0.54038
[87]	validation_0-logloss:0.53852
[88]	validation_0-logloss:0.53792
[89]	validation_0-logloss:0.53634
[90]	validation_0-logloss:0.53494
[91]	validation_0-logloss:0.53430
[92]	validation_0-logloss:0.53276
[93]	validatio

[300]	validation_0-logloss:0.44744
[301]	validation_0-logloss:0.44699
[302]	validation_0-logloss:0.44736
[303]	validation_0-logloss:0.44714
[304]	validation_0-logloss:0.44728
[305]	validation_0-logloss:0.44743
[306]	validation_0-logloss:0.44783
[307]	validation_0-logloss:0.44806
[308]	validation_0-logloss:0.44846
[309]	validation_0-logloss:0.44823
[310]	validation_0-logloss:0.44780
[311]	validation_0-logloss:0.44742
[312]	validation_0-logloss:0.44753
[313]	validation_0-logloss:0.44767
[314]	validation_0-logloss:0.44746
[315]	validation_0-logloss:0.44769
[316]	validation_0-logloss:0.44760
[317]	validation_0-logloss:0.44723
[318]	validation_0-logloss:0.44698
[319]	validation_0-logloss:0.44728
[320]	validation_0-logloss:0.44680
[321]	validation_0-logloss:0.44648
[322]	validation_0-logloss:0.44593
[323]	validation_0-logloss:0.44616
[324]	validation_0-logloss:0.44621
[325]	validation_0-logloss:0.44635
[326]	validation_0-logloss:0.44580
[327]	validation_0-logloss:0.44585
[328]	validation_0-l

[24]	validation_0-logloss:0.62779
[25]	validation_0-logloss:0.62582
[26]	validation_0-logloss:0.62354
[27]	validation_0-logloss:0.62125
[28]	validation_0-logloss:0.61884
[29]	validation_0-logloss:0.61666
[30]	validation_0-logloss:0.61471
[31]	validation_0-logloss:0.61290
[32]	validation_0-logloss:0.61072
[33]	validation_0-logloss:0.60896
[34]	validation_0-logloss:0.60723
[35]	validation_0-logloss:0.60501
[36]	validation_0-logloss:0.60303
[37]	validation_0-logloss:0.60137
[38]	validation_0-logloss:0.59942
[39]	validation_0-logloss:0.59797
[40]	validation_0-logloss:0.59612
[41]	validation_0-logloss:0.59497
[42]	validation_0-logloss:0.59302
[43]	validation_0-logloss:0.59196
[44]	validation_0-logloss:0.59055
[45]	validation_0-logloss:0.58867
[46]	validation_0-logloss:0.58714
[47]	validation_0-logloss:0.58581
[48]	validation_0-logloss:0.58429
[49]	validation_0-logloss:0.58326
[50]	validation_0-logloss:0.58182
[51]	validation_0-logloss:0.58050
[52]	validation_0-logloss:0.57955
[53]	validatio



[65]	validation_0-logloss:0.56399
[66]	validation_0-logloss:0.56289
[67]	validation_0-logloss:0.56117
[68]	validation_0-logloss:0.55902
[69]	validation_0-logloss:0.55759
[70]	validation_0-logloss:0.55591
[71]	validation_0-logloss:0.55475
[72]	validation_0-logloss:0.55356
[73]	validation_0-logloss:0.55281
[74]	validation_0-logloss:0.55208
[75]	validation_0-logloss:0.55136
[76]	validation_0-logloss:0.55065
[77]	validation_0-logloss:0.54955
[78]	validation_0-logloss:0.54822
[79]	validation_0-logloss:0.54756
[80]	validation_0-logloss:0.54615
[81]	validation_0-logloss:0.54569
[82]	validation_0-logloss:0.54377
[83]	validation_0-logloss:0.54265
[84]	validation_0-logloss:0.54205
[85]	validation_0-logloss:0.54100
[86]	validation_0-logloss:0.54038
[87]	validation_0-logloss:0.53852
[88]	validation_0-logloss:0.53792
[89]	validation_0-logloss:0.53634
[90]	validation_0-logloss:0.53494
[91]	validation_0-logloss:0.53430
[92]	validation_0-logloss:0.53276
[93]	validation_0-logloss:0.53162
[94]	validatio

[301]	validation_0-logloss:0.44699
[302]	validation_0-logloss:0.44736
[303]	validation_0-logloss:0.44714
[304]	validation_0-logloss:0.44728
[305]	validation_0-logloss:0.44743
[306]	validation_0-logloss:0.44783
[307]	validation_0-logloss:0.44806
[308]	validation_0-logloss:0.44846
[309]	validation_0-logloss:0.44823
[310]	validation_0-logloss:0.44780
[311]	validation_0-logloss:0.44742
[312]	validation_0-logloss:0.44753
[313]	validation_0-logloss:0.44767
[314]	validation_0-logloss:0.44746
[315]	validation_0-logloss:0.44769
[316]	validation_0-logloss:0.44760
[317]	validation_0-logloss:0.44723
[318]	validation_0-logloss:0.44698
[319]	validation_0-logloss:0.44728
[320]	validation_0-logloss:0.44680
[321]	validation_0-logloss:0.44648
[322]	validation_0-logloss:0.44593
[323]	validation_0-logloss:0.44616
[324]	validation_0-logloss:0.44621
[325]	validation_0-logloss:0.44635
[326]	validation_0-logloss:0.44580
[327]	validation_0-logloss:0.44585
[328]	validation_0-logloss:0.44562
[329]	validation_0-l

[536]	validation_0-logloss:0.43268
[537]	validation_0-logloss:0.43264
[538]	validation_0-logloss:0.43281
[539]	validation_0-logloss:0.43301
[540]	validation_0-logloss:0.43302
[541]	validation_0-logloss:0.43296
[542]	validation_0-logloss:0.43287
[543]	validation_0-logloss:0.43288
[544]	validation_0-logloss:0.43275
[545]	validation_0-logloss:0.43255
[546]	validation_0-logloss:0.43239
[547]	validation_0-logloss:0.43233
[548]	validation_0-logloss:0.43237
[549]	validation_0-logloss:0.43230
[550]	validation_0-logloss:0.43187
[551]	validation_0-logloss:0.43189
[552]	validation_0-logloss:0.43209
[553]	validation_0-logloss:0.43211
[554]	validation_0-logloss:0.43207
[555]	validation_0-logloss:0.43197
[556]	validation_0-logloss:0.43189
[557]	validation_0-logloss:0.43146
[558]	validation_0-logloss:0.43151
[559]	validation_0-logloss:0.43138
[560]	validation_0-logloss:0.43121
[561]	validation_0-logloss:0.43112
[562]	validation_0-logloss:0.43114
[563]	validation_0-logloss:0.43135
[564]	validation_0-l

In [52]:
train_scores

[0.8809523809523809, 0.9529411764705882, 0.9529411764705882]

In [53]:
test_scores

[0.6666666666666666, 0.7407407407407406, 0.7857142857142857]