In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

In [3]:
titanic = sns.load_dataset('titanic')
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [4]:
titanic = sns.load_dataset('titanic') 
titanic = titanic[["survived","pclass","fare","age"]].dropna()
features = titanic[["pclass","fare","age"]].values
label = titanic[["survived"]].values

In [7]:
# KFold 교차검증

model = [DecisionTreeClassifier(), RandomForestClassifier(), LogisticRegression()]
name = ['DT','RF','LR']
cv_acc = []
kfold = KFold(n_splits=5)

for model, name in zip(model,name):
    print("### 사용할 알고리즘 :",name,'###')
    for score in ['accuracy','precision','recall','f1']:
        for train_idx, test_idx in kfold.split(features):
            X_train, X_test = features[train_idx], features[test_idx]
            y_train, y_test = label[train_idx], label[test_idx]
            y_train = y_train.ravel(); y_test = y_test.ravel()
            
            model.fit(X_train, y_train)
            kf_pred = model.predict(X_test)
            acc_test = np.round(accuracy_score(y_test, kf_pred),4)
            cv_acc.append(acc_test)
        
        print('---------')
        print(score)
        print(round(np.mean(cv_acc),4))
        
    print("\n")

### 사용할 알고리즘 : DT ###
---------
accuracy
0.6415
---------
precision
0.638
---------
recall
0.6373
---------
f1
0.6362


### 사용할 알고리즘 : RF ###
---------
accuracy
0.6398
---------
precision
0.642
---------
recall
0.6453
---------
f1
0.6473


### 사용할 알고리즘 : LR ###
---------
accuracy
0.6529
---------
precision
0.6573
---------
recall
0.661
---------
f1
0.664




In [6]:
# stratified KFold 검증

model = [DecisionTreeClassifier(), RandomForestClassifier(), LogisticRegression()]
name = ['DT','RF','LR']
cv_acc = []
St_kfold = StratifiedKFold(n_splits=5)

for model, name in zip(model,name):
    print("### 사용할 알고리즘 :",name,'###')
    for score in ['accuracy','precision','recall','f1']:
        for train_idx, test_idx in St_kfold.split(features,label):
            X_train, X_test = features[train_idx], features[test_idx]
            y_train, y_test = label[train_idx], label[test_idx]
            y_train = y_train.ravel(); y_test = y_test.ravel()
            
            model.fit(X_train, y_train)
            kf_pred = model.predict(X_test)
            acc_test = np.round(accuracy_score(y_test, kf_pred),4)
            cv_acc.append(acc_test)
        
        print('---------')
        print(score)
        print(round(np.mean(cv_acc),4))
        
    print("\n")

### 사용할 알고리즘 : DT ###
---------
accuracy
0.6274
---------
precision
0.6302
---------
recall
0.6279
---------
f1
0.6299


### 사용할 알고리즘 : RF ###
---------
accuracy
0.6356
---------
precision
0.6403
---------
recall
0.6439
---------
f1
0.6466


### 사용할 알고리즘 : LR ###
---------
accuracy
0.6524
---------
precision
0.657
---------
recall
0.6608
---------
f1
0.664


