In [23]:
from sklearn.datasets import load_iris
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score

## Iris데이터 검증
### 1. iris 데이터 불러오기

In [2]:
iris = load_iris()
iris_data = iris.data
X_data = iris.data
y_data = iris.target

### 2. 8:2 train_test_split 검증

In [20]:
lr_model = LogisticRegression(solver = 'lbfgs', multi_class='auto')

X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size = 0.2)
lr_model.fit(X_train, y_train)
y_predict = lr_model.predict(X_test)
scores = accuracy_score(y_predict, y_test)
print("train_test_split scores :", scores)

train_test_split scores : 0.9333333333333333


### 3. cross_val_score

In [21]:
lr_model = LogisticRegression(solver = 'lbfgs', multi_class='auto')

scores = cross_val_score(lr_model, X_data, y_data)
print("cross_val_score :", scores)

cross_val_score : [0.98039216 0.94117647 1.        ]




### 4. StratifiedKFold

In [24]:
lr_model = LogisticRegression(solver = 'lbfgs', multi_class='auto')

skf = StratifiedKFold()
scores = cross_val_score(lr_model, X_data, y_data, cv=skf)
print("StratifiedKFold score :", scores)

StratifiedKFold score : [0.98039216 0.94117647 1.        ]




### 5. KFold - 3등분, 5등분 검증

In [28]:
lr_model = LogisticRegression(solver = 'lbfgs', multi_class='auto')

kf = StratifiedKFold(n_splits=3)
scores = cross_val_score(lr_model, X_data, y_data, cv=kf)
print("n_split = 3 score :", scores)
kf = StratifiedKFold(n_splits=5)
scores = cross_val_score(lr_model, X_data, y_data, cv=kf)
print("n_split = 5 score :", scores)

n_split = 3 score : [0.98039216 0.94117647 1.        ]
n_split = 5 score : [0.96666667 1.         0.93333333 0.96666667 1.        ]




### 6. KFold - Shuffle =  True 을 이용한 검증

In [37]:
lr_model = LogisticRegression(solver = 'lbfgs', multi_class='auto')

kf = KFold(shuffle=True)
scores = cross_val_score(lr_model, X_data, y_data, cv=kf)
print("shuffle=True score :", scores)

shuffle=True score : [0.96 0.96 0.98]




### 7.LOOCV

In [44]:
lr_model = LogisticRegression(solver = 'lbfgs', multi_class='auto')

loocv = LeaveOneOut()
scores = cross_val_score(lr_model, X_data, y_data, cv=loocv)
print("LOOCV score :", scores)







LOOCV score : [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1.
 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1.]
