In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [3]:
from sklearn.datasets import load_digits
digits = load_digits()

In [4]:
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

# Use KFold for our digits example

In [5]:
def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)

In [6]:
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits=8)

scores_logistic = []
scores_svm = []
scores_rf = []

for train_index, test_index in folds.split(digits.data,digits.target):
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index], \
                                       digits.target[train_index], digits.target[test_index]
    scores_logistic.append(get_score(LogisticRegression(solver='liblinear',multi_class='ovr'), X_train, X_test, y_train, y_test))  
    scores_svm.append(get_score(SVC(gamma='auto'), X_train, X_test, y_train, y_test))
    scores_rf.append(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))

# Logistic Regression Score

In [7]:
 scores_logistic

[0.92,
 0.9644444444444444,
 0.8844444444444445,
 0.9466666666666667,
 0.96,
 0.9464285714285714,
 0.9330357142857143,
 0.8928571428571429]

# For Average Score

In [8]:
ser_lr = pd.Series( scores_logistic)

In [9]:
ser_lr

0    0.920000
1    0.964444
2    0.884444
3    0.946667
4    0.960000
5    0.946429
6    0.933036
7    0.892857
dtype: float64

In [10]:
ser_lr.mean()

0.9309846230158731

In [11]:
sums =0
for x in  scores_logistic :
    sums = sums + x
avg = sums/len(scores_logistic)
avg

0.9309846230158731

# Support Vector Machine

In [12]:
scores_svm

[0.41333333333333333,
 0.5377777777777778,
 0.4177777777777778,
 0.4533333333333333,
 0.40444444444444444,
 0.5044642857142857,
 0.6517857142857143,
 0.4419642857142857]

In [13]:
ser_svm = pd.Series( scores_svm)
ser_svm

0    0.413333
1    0.537778
2    0.417778
3    0.453333
4    0.404444
5    0.504464
6    0.651786
7    0.441964
dtype: float64

In [14]:
ser_svm.mean()

0.478110119047619

# Random Forest

In [15]:
ser_rf = pd.Series( scores_rf)
ser_rf

0    0.920000
1    0.977778
2    0.911111
3    0.951111
4    0.960000
5    0.982143
6    0.973214
7    0.888393
dtype: float64

In [16]:
ser_rf.mean()

0.9454687499999999

# Cross_val_score function

# Logistic regression model performance using cross_val_score

In [18]:
from sklearn.model_selection import cross_val_score

In [22]:
cross_val_score_logistic=cross_val_score(LogisticRegression(solver='liblinear',multi_class='ovr'), digits.data, digits.target, cv =8)

In [23]:
cross_val_score_logistic

array([0.92      , 0.96444444, 0.88444444, 0.94666667, 0.96      ,
       0.94642857, 0.93303571, 0.89285714])

In [24]:
cross_val_score_logistic.mean()

0.930984623015873

# Support Vectore Machine model performance using cross_val_score

In [25]:
cross_val_score_svm=cross_val_score(SVC(gamma='auto'), digits.data, digits.target,cv=8)

In [26]:
cross_val_score_svm

array([0.41333333, 0.53777778, 0.41777778, 0.45333333, 0.40444444,
       0.50446429, 0.65178571, 0.44196429])

In [27]:
cross_val_score_svm.mean()

0.4781101190476191

# Random Forest performance using cross_val_score

In [30]:
cross_val_score_rf=cross_val_score(RandomForestClassifier(n_estimators=40),digits.data, digits.target,cv=8)

In [31]:
cross_val_score_rf

array([0.91111111, 0.97333333, 0.91111111, 0.94222222, 0.97333333,
       0.97767857, 0.96428571, 0.89732143])

In [32]:
cross_val_score_rf.mean()

0.9437996031746032