In [103]:
# !pip install sklearn

# Libraries

In [104]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

# Data

In [105]:
digits = load_digits()
x = digits.data
y = digits.target
x_train_temp, x_test, y_train_temp, y_test = train_test_split(x, y, test_size=0.2, random_state=67)
x_train, x_valid, y_train, y_valid = train_test_split(x_train_temp, y_train_temp, test_size=0.25, random_state=67)
# print(x_valid.shape)
# print(y_valid.shape)
# plt.gray()
# plt.matshow(digits.images[67])
# plt.show()


# Standardize

In [106]:
scaler = MinMaxScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_valid = scaler.transform(x_valid)
x_test = scaler.transform(x_test)

# Logistic regression

In [107]:
#Logistic regression
C_values = [0.01, 0.1, 1, 10, 100]
penalty_types = ['l1', 'l2']
best_paramsLR = {}
best_modelLR = None
best_accLR = 0

for c in C_values:
    for p in penalty_types:
        cur_modelLR = LogisticRegression(C=c, penalty=p, solver='liblinear', max_iter=1000, random_state=67)
        cur_modelLR.fit(x_train, y_train)
        cur_acc = cur_modelLR.score(x_valid, y_valid)

        if cur_acc > best_accLR:
            best_accLR = cur_acc
            best_modelLR = cur_modelLR
            best_paramsLR = {'penalty types': p, "C value": c}

y_pred_LR = best_modelLR.predict(x_test)



In [108]:
print(classification_report(y_pred=y_pred_LR, y_true=y_test))
print(best_paramsLR)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       0.98      0.87      0.92        46
           2       0.97      1.00      0.98        31
           3       0.92      0.95      0.93        37
           4       0.98      0.98      0.98        42
           5       0.97      0.89      0.93        36
           6       0.89      1.00      0.94        33
           7       0.91      1.00      0.95        31
           8       0.80      0.83      0.81        29
           9       0.93      0.88      0.90        42

    accuracy                           0.94       360
   macro avg       0.93      0.94      0.94       360
weighted avg       0.94      0.94      0.94       360

{'penalty types': 'l1', 'C value': 10}


# Decision tree

In [128]:
#Decision tree
max_depth_value = [5, 10, 15, None]
min_samples_leaf_value = [1, 5, 10]
best_paramsDT = {}
best_modelDT = None
best_accDT = 0

for d in max_depth_value:
    for l in min_samples_leaf_value:
        cur_modelDT = DecisionTreeClassifier(max_depth=d, min_samples_leaf=l, random_state=67)
        cur_modelDT.fit(x_train, y_train)
        cur_acc = cur_modelDT.score(x_valid, y_valid)

        if cur_acc > best_accDT:
            best_accDT = cur_acc
            best_modelDT = cur_modelDT
            best_paramsDT = {'max depth': d, 'min samples leaf': l}

y_pred_DT = best_modelDT.predict(x_test)

In [129]:
print(classification_report(y_pred=y_pred_DT, y_true=y_test))
print(best_paramsDT)

              precision    recall  f1-score   support

           0       0.97      0.97      0.97        33
           1       0.88      0.80      0.84        46
           2       0.88      0.71      0.79        31
           3       0.74      0.76      0.75        37
           4       0.95      0.93      0.94        42
           5       0.91      0.81      0.85        36
           6       0.80      0.97      0.88        33
           7       0.76      0.94      0.84        31
           8       0.62      0.69      0.66        29
           9       0.74      0.69      0.72        42

    accuracy                           0.82       360
   macro avg       0.83      0.83      0.82       360
weighted avg       0.83      0.82      0.82       360

{'max depth': 15, 'min samples leaf': 1}


# KNN

In [111]:
#KNN
k_values = [1, 3, 5, 7, 9, 11]
weight_types = ['uniform', 'distance']
metric_types = ['euclidean', 'manhattan'] 
best_modelKNN = None
best_accKNN = 0
best_paramsKNN = {}

for k in k_values:
    for w in weight_types:
        for m in metric_types:
            cur_modelKNN = KNeighborsClassifier(n_neighbors=k, weights=w, metric=m)
            cur_modelKNN.fit(x_train, y_train)
            cur_acc = cur_modelKNN.score(x_valid, y_valid)

            if cur_acc > best_accKNN:
                best_accKNN = cur_acc
                best_modelKNN = cur_modelKNN
                best_paramsKNN = {'k value': k, 'weight type': w, 'metric type': m}

y_pred_KNN = best_modelKNN.predict(x_test)

In [112]:
print(classification_report(y_pred=y_pred_KNN, y_true=y_test))
print(best_paramsKNN)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       0.98      0.98      0.98        46
           2       1.00      1.00      1.00        31
           3       0.95      0.97      0.96        37
           4       0.98      1.00      0.99        42
           5       0.97      0.94      0.96        36
           6       0.97      1.00      0.99        33
           7       0.97      1.00      0.98        31
           8       0.96      0.93      0.95        29
           9       0.97      0.93      0.95        42

    accuracy                           0.97       360
   macro avg       0.98      0.98      0.98       360
weighted avg       0.98      0.97      0.97       360

{'k value': 3, 'weight type': 'uniform', 'metric type': 'euclidean'}


# Artificial Neural Network

In [119]:
hidden_layer_options = [(50,), (100,), (50, 50), (100, 100, 50)]
alpha_values = [0.0001, 0.001, 0.01]
learning_rates = [0.001, 0.0001]
best_modelANN = None
best_accANN = 0
best_paramsANN = {}

for h in hidden_layer_options:
    for a in alpha_values:
        for l in learning_rates:
            cur_modelANN = MLPClassifier(hidden_layer_sizes=h, alpha=a, learning_rate_init=l, solver='adam', max_iter=1000, random_state=67)
            cur_modelANN.fit(x_train, y_train)
            cur_acc = cur_modelANN.score(x_valid, y_valid)
            if best_accANN < cur_acc:
                best_accANN = cur_acc
                best_modelANN = cur_modelANN
                best_paramsANN = {'hidden layer': h, 'alpha': a, 'learning rate': l}
        
y_pred_ANN = best_modelANN.predict(x_test)



In [123]:
print(classification_report(y_pred=y_pred_ANN, y_true=y_test))
print(best_paramsANN)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       0.98      0.98      0.98        46
           2       0.97      1.00      0.98        31
           3       1.00      0.97      0.99        37
           4       1.00      0.98      0.99        42
           5       0.94      0.92      0.93        36
           6       0.92      1.00      0.96        33
           7       1.00      1.00      1.00        31
           8       0.89      0.83      0.86        29
           9       0.98      1.00      0.99        42

    accuracy                           0.97       360
   macro avg       0.97      0.97      0.97       360
weighted avg       0.97      0.97      0.97       360

{'hidden layer': (100,), 'alpha': 0.0001, 'learning rate': 0.001}
