In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.datasets import load_digits

digits = load_digits()

In [3]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(digits.data, digits.target, test_size=0.3)

In [7]:
lr = LogisticRegression()
lr.fit(x_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [8]:
lr.score(x_test,y_test)

0.9351851851851852

In [9]:
svm = SVC()
svm.fit(x_train,y_train)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [10]:
svm.score(x_test,y_test)

0.4203703703703704

In [12]:
rf = RandomForestClassifier(n_estimators=40)
rf.fit(x_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=40, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [13]:
rf.score(x_test,y_test)

0.9685185185185186

In [14]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=3)
kf

KFold(n_splits=3, random_state=None, shuffle=False)

In [15]:
for train_index,test_index in kf.split([1,2,3,4,5,6,7,8,9]):
    print(train_index,test_index)

[3 4 5 6 7 8] [0 1 2]
[0 1 2 6 7 8] [3 4 5]
[0 1 2 3 4 5] [6 7 8]


In [16]:
def get_score(model,x_train,x_test,y_train,y_test):
    model.fit(x_train,y_train)
    return model.score(x_test,y_test)

In [21]:
# get_score(LogisticRegression(),x_train,x_test,y_train,y_test)

In [23]:
# get_score(SVC(),x_train,x_test,y_train,y_test)

In [22]:
# get_score(RandomForestClassifier(),x_train,x_test,y_train,y_test)

In [24]:
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits=3)

In [34]:
scores_lr = []
scores_svm = []
scores_rf = []

for train_index,test_index in kf.split(digits.data):
    x_train,x_test,y_train,y_test = digits.data[train_index],digits.data[test_index],digits.target[train_index], \
    digits.target[test_index]
    scores_lr.append(get_score(LogisticRegression(),x_train,x_test,y_train,y_test))
    scores_svm.append(get_score(SVC(),x_train,x_test,y_train,y_test))
    scores_rf.append(get_score(RandomForestClassifier(n_estimators=40),x_train,x_test,y_train,y_test))



In [38]:
len(x_train)

1198

In [39]:
len(x_test)

599

In [35]:
scores_lr

[0.8964941569282137, 0.9515859766277128, 0.9115191986644408]

In [36]:
scores_svm

[0.41068447412353926, 0.41569282136894825, 0.4273789649415693]

In [37]:
scores_rf

[0.9382303839732888, 0.9382303839732888, 0.9232053422370617]

In [40]:
from sklearn.model_selection import cross_val_score

In [48]:
print(cross_val_score(LogisticRegression(),digits.data,digits.target))
print(cross_val_score(LogisticRegression(),digits.data,digits.target).mean())



[0.89534884 0.94991653 0.90939597]




0.9182204459698582


In [49]:
print(cross_val_score(SVC(),digits.data,digits.target))
print(cross_val_score(SVC(),digits.data,digits.target).mean())



[0.39368771 0.41068447 0.45973154]




0.4213679084629655


In [50]:
print(cross_val_score(RandomForestClassifier(n_estimators=40),digits.data,digits.target))
print(cross_val_score(RandomForestClassifier(n_estimators=40),digits.data,digits.target).mean())



[0.93023256 0.93989983 0.92449664]




0.9320827441585258


#### Exercise on iris dataset

In [52]:
import pandas as pd
df = pd.read_csv('iris.csv')
df.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [53]:
x = df.drop(['variety'], axis='columns')
x.shape

(150, 4)

In [54]:
y = df.variety
y.shape

(150,)

In [55]:
print(cross_val_score(LogisticRegression(),x,y))
print(cross_val_score(LogisticRegression(),x,y).mean())



[0.96078431 0.92156863 0.95833333]
0.9468954248366014




In [59]:
print(cross_val_score(SVC(),x,y))
print(cross_val_score(SVC(),x,y).mean())

[0.98039216 0.96078431 0.97916667]
0.9734477124183006




In [65]:
print(cross_val_score(RandomForestClassifier(n_estimators=40),x,y))
print(cross_val_score(RandomForestClassifier(n_estimators=40),x,y).mean())

[0.98039216 0.92156863 0.95833333]
0.9669117647058822


