In [4]:
import sklearn
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.preprocessing import StandardScaler
import pandas as pd

print(sklearn.__version__)

digits_data = load_digits()
X = digits_data.data
y = digits_data.target

target_names = [str(i) for i in digits_data.target_names]
print("Target Names:", target_names)

digits_df = pd.DataFrame(X, columns=digits_data.feature_names)
print(digits_df.describe())

# 데이터 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(kernel='rbf', C=1, random_state=42),
    "SGD Classifier": SGDClassifier(loss='hinge', alpha=0.0001, max_iter=1000, tol=1e-3, random_state=42),
    "Logistic Regression": LogisticRegression(C=1, max_iter=1000, random_state=42)
}

for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{model_name}:")
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:\n", classification_report(y_test, y_pred, target_names=target_names))
    print("\n")

# 데이터 분리
# •훈련 데이터: 80% (1437 샘플)
# •테스트 데이터: 20% (360 샘플)

# 모델 학습 및 평가
# 5가지 모델을 사용하여 학습 및 평가를 진행했습니다:
# 1.Decision Tree: 정확도 84.17%
# 2.Random Forest: 정확도 97.22%
# 3.SVM: 정확도 98.06%
# 4.SGD Classifier: 정확도 96.11%
# 5.Logistic Regression: 정확도 97.22%
# SVM이 가장 높은 정확도를 보여주었습니다.

# 평가 지표
# 	1.정확도(Accuracy): 전체적인 모델 성능을 빠르게 파악할 수 있습니다.
# 	2.분류 보고서(Classification Report): 각 숫자 클래스별로 precision, recall, f1-score를 제공하여 세부적인 성능 분석이 가능합니다.

1.0
Target Names: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
       pixel_0_0    pixel_0_1    pixel_0_2    pixel_0_3    pixel_0_4  \
count     1797.0  1797.000000  1797.000000  1797.000000  1797.000000   
mean         0.0     0.303840     5.204786    11.835838    11.848080   
std          0.0     0.907192     4.754826     4.248842     4.287388   
min          0.0     0.000000     0.000000     0.000000     0.000000   
25%          0.0     0.000000     1.000000    10.000000    10.000000   
50%          0.0     0.000000     4.000000    13.000000    13.000000   
75%          0.0     0.000000     9.000000    15.000000    15.000000   
max          0.0     8.000000    16.000000    16.000000    16.000000   

         pixel_0_5    pixel_0_6    pixel_0_7    pixel_1_0    pixel_1_1  ...  \
count  1797.000000  1797.000000  1797.000000  1797.000000  1797.000000  ...   
mean      5.781859     1.362270     0.129661     0.005565     1.993879  ...   
std       5.666418     3.325775     1.037383 