In [31]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
import pandas as pd
import numpy as np

# Load dataset
data = load_digits()
X = data.data
y = data.target

# Convert to pandas DataFrame
df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y

display(df.head())

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,target
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4


In [32]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# List of (name, model) pairs
models = [
    ("Logistic Regression", make_pipeline(StandardScaler(), LogisticRegression(solver='lbfgs', max_iter=1000))),
    ("Decision Tree", DecisionTreeClassifier(random_state=42)),
    ("Random Forest", RandomForestClassifier(random_state=42)),
    ("Gradient Boosting", GradientBoostingClassifier(random_state=42)),
    ("Support Vector Classifier", SVC(probability=True, random_state=42))
]


In [33]:
# Train and test each model
for name, model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    print(f"{name}:")
    print(f"  Accuracy = {acc:.4f}")
    print(f"  Precision = {precision:.4f}")
    print(f"  Recall = {recall:.4f}")
    print(f"  F1 Score = {f1:.4f}")
    print(f"  Sample actuals    : {y_test[:5]}")
    print(f"  Sample predictions: {y_pred[:5]}")
    print("-" * 40)

Logistic Regression:
  Accuracy = 0.9722
  Precision = 0.9725
  Recall = 0.9722
  F1 Score = 0.9723
  Sample actuals    : [6 9 3 7 2]
  Sample predictions: [6 9 3 7 2]
----------------------------------------
Decision Tree:
  Accuracy = 0.8417
  Precision = 0.8456
  Recall = 0.8417
  F1 Score = 0.8418
  Sample actuals    : [6 9 3 7 2]
  Sample predictions: [6 9 3 7 2]
----------------------------------------
Random Forest:
  Accuracy = 0.9722
  Precision = 0.9726
  Recall = 0.9722
  F1 Score = 0.9722
  Sample actuals    : [6 9 3 7 2]
  Sample predictions: [6 9 3 7 2]
----------------------------------------
Gradient Boosting:
  Accuracy = 0.9694
  Precision = 0.9709
  Recall = 0.9694
  F1 Score = 0.9696
  Sample actuals    : [6 9 3 7 2]
  Sample predictions: [6 9 3 7 2]
----------------------------------------
Support Vector Classifier:
  Accuracy = 0.9861
  Precision = 0.9862
  Recall = 0.9861
  F1 Score = 0.9861
  Sample actuals    : [6 9 3 7 2]
  Sample predictions: [6 9 3 7 2]
----

### Insights:
- Support Vector Classifier (SVC) achieved the best performance with the highest accuracy (98.6%) and nearly perfect precision, recall, and F1 score.

- Logistic Regression and Random Forest both performed excellently, with accuracy, precision, recall, and F1 score all around 97.2%.

- Gradient Boosting also performed very well (accuracy ≈ 96.9%), only slightly below the top models.

- Decision Tree lagged behind the others, with noticeably lower scores (accuracy ≈ 84.2%), suggesting it may overfit or underfit compared to ensemble and linear methods.