In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [2]:
iris = load_iris()

In [3]:
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)

In [11]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
lr.score(X_test, y_test)

0.9111111111111111

In [17]:
svm = SVC(max_iter=100)
svm.fit(X_train, y_train)
svm.score(X_test, y_test)

0.9111111111111111

In [25]:
rf = RandomForestClassifier(n_estimators=50)
rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.9333333333333333

In [27]:
from sklearn.model_selection import KFold
kf = KFold()
kf

KFold(n_splits=5, random_state=None, shuffle=False)

In [28]:
def get_score(model, X_train, X_test, y_train, y_test):
  model.fit(X_train, y_train)
  return model.score(X_test, y_test)

In [30]:
for train_index, test_index in kf.split([1,2,3,4,5,6,7,8,9]):
  print(train_index, test_index)

[2 3 4 5 6 7 8] [0 1]
[0 1 4 5 6 7 8] [2 3]
[0 1 2 3 6 7 8] [4 5]
[0 1 2 3 4 5 8] [6 7]
[0 1 2 3 4 5 6 7] [8]


In [29]:
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits=3)

In [31]:
scores_lr = []
scores_sv = []
scores_rf = []

for train_index, test_index in kf.split(iris.data):
  X_train, X_test, y_train, y_test = iris.data[train_index], iris.data[test_index], \
                                      iris.target[train_index], iris.target[test_index]
  scores_lr.append(get_score(LogisticRegression(max_iter=1000), X_train, X_test, y_train, y_test))
  scores_sv.append(get_score(SVC(max_iter=1000), X_train, X_test, y_train, y_test))
  scores_rf.append(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))

In [32]:
scores_lr

[1.0, 1.0, 0.8666666666666667, 0.9333333333333333, 0.8333333333333334]

In [33]:
scores_sv

[1.0, 1.0, 0.8333333333333334, 0.9333333333333333, 0.7]

In [34]:
scores_rf

[1.0, 1.0, 0.8666666666666667, 0.9333333333333333, 0.7333333333333333]

In [35]:
from sklearn.model_selection import cross_val_score


In [37]:
cross_val_score(LogisticRegression(max_iter=1000), iris.data, iris.target)

array([0.96666667, 1.        , 0.93333333, 0.96666667, 1.        ])

In [38]:
cross_val_score(SVC(max_iter=1000), iris.data, iris.target)

array([0.96666667, 0.96666667, 0.96666667, 0.93333333, 1.        ])

In [39]:
cross_val_score(RandomForestClassifier(n_estimators=10), iris.data, iris.target)

array([0.96666667, 0.96666667, 0.93333333, 0.86666667, 1.        ])