In [108]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [109]:
from sklearn.datasets import load_digits
digits = load_digits()

In [110]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(digits.data,digits.target,test_size=0.3)

In [111]:
lr = LogisticRegression(max_iter=200)
lr.fit(X_train,Y_train)
lr.score(X_test,Y_test)

0.9629629629629629

In [112]:
svm = SVC()
svm.fit(X_train,Y_train)
svm.score(X_test,Y_test)

0.9851851851851852

In [113]:
rf = RandomForestClassifier(n_estimators=70)
rf.fit(X_train,Y_train)
rf.score(X_test,Y_test)

0.9685185185185186

Generic Example of KFold

In [114]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=3)
kf

KFold(n_splits=3, random_state=None, shuffle=False)

In [115]:
for train_index, test_index in kf.split([1,2,3,4,5,6,7,8,9]):
    print(train_index, test_index)

[3 4 5 6 7 8] [0 1 2]
[0 1 2 6 7 8] [3 4 5]
[0 1 2 3 4 5] [6 7 8]


In [116]:
def get_score(model,X_train,X_test,Y_train,Y_test):
    model.fit(X_train,Y_train)
    return model.score(X_test,Y_test)

Using stratified KFold as it will divide the calsses in the dataset unifromly fro each fold

In [117]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=3)

In [118]:
scores_lr = []
scores_svm =[]
scores_rf = []

The following same for loop can be used using the cross_val_score funtion

In [119]:
for train_index, test_index in skf.split(digits.data,digits.target):
    X_train,X_test,Y_train,Y_test = digits.data[train_index],digits.data[test_index],\
                                    digits.target[train_index],digits.target[test_index]
    scores_lr.append(get_score(lr,X_train,X_test,Y_train,Y_test))
    scores_svm.append(get_score(svm,X_train,X_test,Y_train,Y_test))
    scores_rf.append(get_score(rf,X_train,X_test,Y_train,Y_test))
    

In [120]:
print(scores_lr)
print(scores_svm)
print(scores_rf)


[0.9198664440734557, 0.9415692821368948, 0.9165275459098498]
[0.9649415692821369, 0.9799666110183639, 0.9649415692821369]
[0.9398998330550918, 0.9549248747913188, 0.9232053422370617]


Now we will use the cross_val_score function

In [121]:
from sklearn.model_selection import cross_val_score

Logistic regression score using cross_val_score

In [122]:
cross_val_score(LogisticRegression(solver='liblinear',multi_class='ovr'), digits.data, digits.target,cv=3)

array([0.89482471, 0.95325543, 0.90984975])

SVM score using cross_val_score

In [123]:
cross_val_score(SVC(), digits.data, digits.target,cv=3)

array([0.96494157, 0.97996661, 0.96494157])

RFC score using cross_val_score

In [124]:
cross_val_score(RandomForestClassifier(n_estimators=40),digits.data, digits.target,cv=3)

array([0.93322204, 0.94657763, 0.93322204])

RandomForestClassifier Parameter tuning using cross_val_score

In [125]:
scores1 = cross_val_score(RandomForestClassifier(n_estimators=5),digits.data, digits.target, cv=10)
np.average(scores1)

0.8648075729360645

In [126]:
scores2 = cross_val_score(RandomForestClassifier(n_estimators=10),digits.data, digits.target, cv=10)
np.average(scores2)

0.9187492240844197

In [127]:
scores3 = cross_val_score(RandomForestClassifier(n_estimators=20),digits.data, digits.target, cv=10)
np.average(scores3)

0.9359931719428924

In [128]:
scores4 = cross_val_score(RandomForestClassifier(n_estimators=50),digits.data, digits.target, cv=10)
np.average(scores4)

0.9432247051520793