In [96]:
import pandas as pd
import matplotlib.pyplot as plt

# Loading Data
from sklearn import datasets

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Cross Validation
from sklearn.model_selection import cross_val_score

# Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

Loading Data

In [97]:
digits = datasets.load_digits()
x, y = digits.data, digits.target

irisDF = pd.DataFrame(data=x, columns=digits.feature_names)
irisDF.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_6,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,9.0,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0


Cross Validation

In [98]:
# Among various models
print(cross_val_score(LogisticRegression(max_iter=370), x, y))
print(cross_val_score(RandomForestClassifier(), x, y))
print(cross_val_score(SVC(), x, y))
print()
# Among various parameters
print(cross_val_score(RandomForestClassifier(n_estimators=10), x, y))
print(cross_val_score(RandomForestClassifier(n_estimators=37), x, y))
print(cross_val_score(RandomForestClassifier(n_estimators=69), x, y))

[0.92222222 0.86944444 0.94428969 0.94150418 0.89693593]
[0.93055556 0.90833333 0.95543175 0.95821727 0.92479109]
[0.96111111 0.94444444 0.98328691 0.98885794 0.93871866]

[0.91944444 0.86111111 0.93593315 0.95543175 0.85236769]
[0.93333333 0.90555556 0.94707521 0.94707521 0.91922006]
[0.93333333 0.91666667 0.95821727 0.96100279 0.93314763]


HyperParameter Tuning

In [99]:
# Grid Search
parameters = {'n_estimators': [10, 37, 69], 'max_depth': [None, 5, 10, 15]}
classifier = GridSearchCV(RandomForestClassifier(), param_grid=parameters, cv=5)
classifier.fit(x, y)

print(classifier.best_params_)
print(classifier.best_score_)

pd.DataFrame(classifier.cv_results_)[["param_max_depth", "param_n_estimators", "mean_test_score"]]

{'max_depth': 15, 'n_estimators': 69}
0.9399195295574125


Unnamed: 0,param_max_depth,param_n_estimators,mean_test_score
0,,10,0.895972
1,,37,0.932686
2,,69,0.936017
3,5.0,10,0.845305
4,5.0,37,0.893726
5,5.0,69,0.90598
6,10.0,10,0.895401
7,10.0,37,0.929913
8,10.0,69,0.933801
9,15.0,10,0.894868


In [101]:
# Random Search
parameters = {'n_estimators': [10, 37, 69], 'max_depth': [None, 5, 10, 15]}
classifier = RandomizedSearchCV(RandomForestClassifier  (), param_distributions=parameters, cv=5)
classifier.fit(x, y)

print(classifier.best_params_)
print(classifier.best_score_)

pd.DataFrame(classifier.cv_results_)[["param_max_depth", "param_n_estimators", "mean_test_score"]]

{'n_estimators': 37, 'max_depth': None}
0.9365769111730116


Unnamed: 0,param_max_depth,param_n_estimators,mean_test_score
0,10.0,37,0.93268
1,5.0,37,0.888723
2,5.0,69,0.897632
3,15.0,10,0.900963
4,,10,0.911543
5,15.0,69,0.93602
6,10.0,69,0.934915
7,,37,0.936577
8,10.0,10,0.912643
9,15.0,37,0.931565
