In [1]:
import numpy as np
from sklearn.datasets import load_digits

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import RandomForestClassifier 
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression 

from sklearn.metrics import classification_report
from sklearn.metrics import recall_score

In [2]:
seed = 24

In [3]:
digits = load_digits()
digits_data = digits.data
digits_label = digits.target

In [4]:
np.bincount(digits_label)

array([178, 182, 177, 183, 181, 182, 181, 179, 174, 180])

In [26]:
X_train, X_test, y_train, y_test = train_test_split(digits_data, 
                                                    digits_label, 
                                                    test_size=0.2, 
                                                    random_state=seed) 

In [27]:
X_train.shape,  y_train.shape, X_test.shape, y_test.shape

((1437, 64), (1437,), (360, 64), (360,))

In [28]:
decision_tree = DecisionTreeClassifier(random_state=seed) 
decision_tree.fit(X_train, y_train) 
decision_y = decision_tree.predict(X_test)
print(classification_report(y_test, decision_y))

              precision    recall  f1-score   support

           0       0.94      0.94      0.94        32
           1       0.89      0.89      0.89        36
           2       0.89      0.84      0.86        38
           3       0.85      0.76      0.80        45
           4       0.78      0.84      0.81        37
           5       0.79      0.91      0.85        33
           6       0.95      0.98      0.97        43
           7       0.85      0.85      0.85        26
           8       0.68      0.78      0.72        27
           9       0.86      0.74      0.80        43

    accuracy                           0.85       360
   macro avg       0.85      0.85      0.85       360
weighted avg       0.85      0.85      0.85       360



In [29]:
random_forest = RandomForestClassifier(random_state=seed) 
random_forest.fit(X_train, y_train)
random_y = random_forest.predict(X_test)
print(classification_report(y_test, random_y))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        32
           1       0.97      1.00      0.99        36
           2       1.00      1.00      1.00        38
           3       1.00      0.93      0.97        45
           4       1.00      1.00      1.00        37
           5       0.92      1.00      0.96        33
           6       1.00      1.00      1.00        43
           7       0.93      1.00      0.96        26
           8       0.96      0.93      0.94        27
           9       0.98      0.93      0.95        43

    accuracy                           0.98       360
   macro avg       0.98      0.98      0.98       360
weighted avg       0.98      0.98      0.98       360



In [30]:
svm_model = svm.SVC(random_state=seed,kernel='linear')
svm_model = svm.SVC(random_state=seed)
svm_model.fit(X_train, y_train) 
svm_y = svm_model.predict(X_test)
print(classification_report(y_test, svm_y))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        32
           1       0.97      1.00      0.99        36
           2       1.00      1.00      1.00        38
           3       1.00      0.96      0.98        45
           4       1.00      0.97      0.99        37
           5       0.92      1.00      0.96        33
           6       1.00      1.00      1.00        43
           7       1.00      1.00      1.00        26
           8       0.96      0.96      0.96        27
           9       1.00      0.98      0.99        43

    accuracy                           0.99       360
   macro avg       0.99      0.99      0.99       360
weighted avg       0.99      0.99      0.99       360



In [32]:
sgd_model = SGDClassifier(random_state=seed)
sgd_model.fit(X_train, y_train) 
sgd_y = sgd_model.predict(X_test)
print(classification_report(y_test, sgd_y)) 

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        32
           1       0.83      0.97      0.90        36
           2       1.00      0.97      0.99        38
           3       0.90      0.96      0.92        45
           4       1.00      0.95      0.97        37
           5       0.94      0.91      0.92        33
           6       0.98      1.00      0.99        43
           7       0.96      1.00      0.98        26
           8       0.93      0.93      0.93        27
           9       1.00      0.84      0.91        43

    accuracy                           0.95       360
   macro avg       0.95      0.95      0.95       360
weighted avg       0.95      0.95      0.95       360



In [11]:
logistic_model = LogisticRegression(random_state=seed,max_iter=10000)
logistic_model.fit(X_train, y_train) 
log_y = logistic_model.predict(X_test) 
print(classification_report(y_test, log_y))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        32
           1       0.97      1.00      0.99        36
           2       1.00      1.00      1.00        38
           3       0.93      0.96      0.95        45
           4       1.00      1.00      1.00        37
           5       0.94      0.94      0.94        33
           6       1.00      1.00      1.00        43
           7       0.96      1.00      0.98        26
           8       1.00      0.93      0.96        27
           9       0.98      0.95      0.96        43

    accuracy                           0.98       360
   macro avg       0.98      0.98      0.98       360
weighted avg       0.98      0.98      0.98       360



10개의 target들이 비율이 균등하다

precision,recall,f1-score,accuracy의 값들이 동일함을 확인하였다.

0.99의 성능을 보인 SVM이 최적의 모델이다.