In [31]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier, export_graphviz, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, AdaBoostClassifier, GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import numpy as np

In [11]:
dataset = load_digits()
X = dataset.data
y = dataset.target

In [12]:
X.shape

(1797, 64)

In [13]:
pca = PCA(n_components=0.8)
X_reduce = pca.fit_transform(X, y)

In [14]:
X_reduce.shape

(1797, 13)

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X_reduce, y, test_size = 0.3)

In [7]:
decision_tree = DecisionTreeClassifier(criterion = 'entropy', max_depth = 19, min_samples_leaf = 4, min_samples_split = 7)
decision_tree.fit(X_train, y_train)

In [8]:
decision_tree.score(X_test, y_test)

0.8537037037037037

In [9]:
export_graphviz(decision_tree, out_file = './tree.dot')

In [11]:
rfc_clf = RandomForestClassifier()
param_grid = {
    'n_estimators': range(100, 110),
    'criterion': ['entropy', 'gini'],
    'max_depth': range(10, 31, 5),
    'min_samples_leaf': range(1, 10),
    'min_samples_split': range(1,10)
}

random_search = RandomizedSearchCV(rfc_clf, param_grid, cv = 5)
random_search.fit(X_train, y_train)
rfc_clf = random_search.best_estimator_

5 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\denka\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\denka\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 1466, in wrapper
    estimator._validate_params()
  File "C:\Users\denka\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\denka\AppData\Local\Programs\Python\Python312\Lib\si

In [17]:
svm_clf = SVC()
param_grid = {
    'C': range(1, 3),
    'kernel': ['linear', 'poly', 'rbf']
}
random_search = RandomizedSearchCV(svm_clf, param_grid, cv = 5)
random_search.fit(X_train, y_train)
svm_clf = random_search.best_estimator_



In [18]:
log_clf = LogisticRegression()
dt = DecisionTreeClassifier(criterion = 'entropy', max_depth = 19, min_samples_leaf = 4, min_samples_split = 7)

In [22]:
voting_clf = VotingClassifier(estimators=[('lr', log_clf), ('rf', rfc_clf), ('svc', svm_clf), ('dt', dt)], voting='hard')
voting_clf.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [23]:
voting_clf.score(X_test, y_test)

0.9629629629629629

In [26]:
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3), n_estimators=200, algorithm="SAMME", learning_rate=0.5)
ada_clf.fit(X_train, y_train)

In [27]:
ada_clf.score(X_test, y_test)

0.9296296296296296

In [30]:
np.average(cross_val_score(ada_clf, X_test, y_test, cv = 5))

0.9092592592592593