In [56]:
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

from sklearn.datasets import load_digits

In [57]:
digits = load_digits()

In [58]:
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [59]:
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.3)

In [60]:
model_log = LogisticRegression(max_iter=5000)
model_log.fit(X_train, y_train)
model_log.score(X_test, y_test)

0.9555555555555556

In [61]:
model_dtree= DecisionTreeClassifier()
model_dtree.fit(X_train, y_train)
model_dtree.score(X_test, y_test)

0.8666666666666667

In [62]:
model_svm = SVC()
model_svm.fit(X_train, y_train)
model_svm.score(X_test, y_test)

0.9851851851851852

In [63]:
model_rf = RandomForestClassifier()
model_rf.fit(X_train, y_train)
model_rf.score(X_test, y_test)

0.9629629629629629

In [64]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=3)
kf

KFold(n_splits=3, random_state=None, shuffle=False)

In [65]:
for train_index, test_index in kf.split([5,6,7,8,9,10,11,12,13]):
    print(train_index,test_index)

[3 4 5 6 7 8] [0 1 2]
[0 1 2 6 7 8] [3 4 5]
[0 1 2 3 4 5] [6 7 8]


In [66]:
def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)

In [67]:
score_log = get_score(LogisticRegression(max_iter=5000), X_train, X_test, y_train, y_test )
score_log

0.9555555555555556

In [68]:
score_dtree = get_score(DecisionTreeClassifier(), X_train, X_test, y_train, y_test )
score_dtree

0.8611111111111112

In [69]:
score_svm = get_score(SVC(), X_train, X_test, y_train, y_test )
score_svm

0.9851851851851852

In [70]:
score_rf = get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test )
score_rf

0.9555555555555556

In [71]:
from sklearn.model_selection import StratifiedKFold

folds = StratifiedKFold(n_splits=3)
folds

KFold(n_splits=3, random_state=None, shuffle=False)

In [76]:
score_log = []
score_dtree = []
score_svm = []
score_rf =[]

for train_index, test_index in kf.split(digits.data):
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index], digits.target[train_index], digits.target[test_index]
    print(get_score(LogisticRegression(max_iter=5000), X_train, X_test, y_train, y_test))
    print(get_score(DecisionTreeClassifier(), X_train, X_test, y_train, y_test))
    print(get_score(SVC(), X_train, X_test, y_train, y_test))
    print(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))


0.9282136894824707
0.7512520868113522
0.9666110183639399
0.9232053422370617
0.9415692821368948
0.8096828046744574
0.9816360601001669
0.9499165275459098
0.9165275459098498
0.7929883138564274
0.9549248747913188
0.9248747913188647


In [75]:
score_log = []
score_dtree = []
score_svm = []
score_rf =[]

for train_index, test_index in folds.split(digits.data, digits.target):
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index], digits.target[train_index], digits.target[test_index]
    print(get_score(LogisticRegression(max_iter=5000), X_train, X_test, y_train, y_test))
    print(get_score(DecisionTreeClassifier(), X_train, X_test, y_train, y_test))
    print(get_score(SVC(), X_train, X_test, y_train, y_test))
    print(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))

0.9282136894824707
0.7495826377295493
0.9666110183639399
0.9165275459098498
0.9415692821368948
0.8146911519198664
0.9816360601001669
0.9549248747913188
0.9165275459098498
0.7779632721202003
0.9549248747913188
0.9048414023372288


In [78]:
score_log = []
score_dtree = []
score_svm = []
score_rf =[]

for train_index, test_index in folds.split(digits.data, digits.target):
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index], digits.target[train_index], digits.target[test_index]
    score_log.append(get_score(LogisticRegression(max_iter=5000), X_train, X_test, y_train, y_test))
    score_dtree.append(get_score(DecisionTreeClassifier(), X_train, X_test, y_train, y_test))
    score_svm.append(get_score(SVC(), X_train, X_test, y_train, y_test))
    score_rf.append(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))

In [86]:
score_log
np.array(score_log).mean()

0.9287701725097385

In [87]:
np.array(score_dtree).mean()

0.7857540345019477

In [88]:
np.array(score_svm).mean()

0.9677239844184752

In [89]:
np.array(score_rf).mean()

0.9315525876460767

In [90]:
from sklearn.model_selection import cross_val_score

In [93]:
cross_val_score(LogisticRegression(max_iter=5000), digits.data, digits.target, cv=3)

array([0.92487479, 0.93823038, 0.92320534])

In [94]:
cross_val_score(DecisionTreeClassifier(), digits.data, digits.target, cv=3)

array([0.75292154, 0.81803005, 0.77295492])

In [95]:
cross_val_score(SVC(), digits.data, digits.target, cv=3)

array([0.96494157, 0.97996661, 0.96494157])

In [98]:
cross_val_score(RandomForestClassifier(), digits.data, digits.target, cv=3)

array([0.93989983, 0.95492487, 0.92988314])

In [99]:
cross_val_score(RandomForestClassifier(n_estimators=40), digits.data, digits.target, cv=3).mean()

0.9360044518642181