**Imports**

In [2]:
import numpy as np
import pickle
import matplotlib.pyplot as plt

from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    ConfusionMatrixDisplay
)


**Logistic Regression Class (FROM SCRATCH)**

In [3]:
class LogisticRegressionOVA:
    def __init__(self, alpha=0.1, iterations=1000, num_labels=10):
        self.alpha = alpha
        self.iterations = iterations
        self.num_labels = num_labels
        self.all_theta = None

    # --------------------------------------------------
    # Sigmoid Function
    # --------------------------------------------------
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    # --------------------------------------------------
    # Cost Function 
    # --------------------------------------------------
    def compute_cost(self, X, y, theta):
        m = len(y)
        h = self.sigmoid(X @ theta)

        cost = (-1 / m) * np.sum(
            y * np.log(h + 1e-8) +
            (1 - y) * np.log(1 - h + 1e-8)
        )
        return cost

    # --------------------------------------------------
    # Gradient Descent 
    # --------------------------------------------------
    def gradient_descent(self, X, y):
        m, n = X.shape
        theta = np.zeros(n)
        cost_history = []

        for i in range(self.iterations):
            h = self.sigmoid(X @ theta)
            error = h - y

            grad = (X.T @ error) / m
            theta -= self.alpha * grad

            cost = self.compute_cost(X, y, theta)
            cost_history.append(cost)

            if i % 100 == 0:
                print(f"Iteration {i:4d} | Cost = {cost:.6f}")

        return theta, cost_history

    # --------------------------------------------------
    # One-vs-All Training
    # --------------------------------------------------
    def fit(self, X, y):
        m, n = X.shape
        self.all_theta = np.zeros((self.num_labels, n))

        for label in range(self.num_labels):
            print(f"\nTraining classifier for digit {label}")
            y_binary = np.where(y == label, 1, 0)

            theta, _ = self.gradient_descent(X, y_binary)
            self.all_theta[label] = theta

    # --------------------------------------------------
    # Prediction
    # --------------------------------------------------
    def predict(self, X):
        probs = self.sigmoid(X @ self.all_theta.T)
        return np.argmax(probs, axis=1)


**Load Data**

In [4]:
with open('preprocessed_data.pkl', 'rb') as f:
    data = pickle.load(f)

X_train = data['X_train']
X_test  = data['X_test']
y_train = data['y_train']
y_test  = data['y_test']

# Add bias term
X_train = np.insert(X_train, 0, 1, axis=1)
X_test  = np.insert(X_test, 0, 1, axis=1)

print("Training set:", X_train.shape)
print("Testing set :", X_test.shape)



Training set: (8000, 785)
Testing set : (2000, 785)


Train the Model

In [5]:
model = LogisticRegressionOVA(
    alpha=0.1,
    iterations=1000,
    num_labels=10
)

model.fit(X_train, y_train)


Training classifier for digit 0
Iteration    0 | Cost = 0.606377
Iteration  100 | Cost = 0.233773
Iteration  200 | Cost = 0.201506
Iteration  300 | Cost = 0.186260
Iteration  400 | Cost = 0.176965
Iteration  500 | Cost = 0.170541
Iteration  600 | Cost = 0.165768
Iteration  700 | Cost = 0.162050
Iteration  800 | Cost = 0.159057
Iteration  900 | Cost = 0.156585

Training classifier for digit 1
Iteration    0 | Cost = 0.563244
Iteration  100 | Cost = 0.237021
Iteration  200 | Cost = 0.207099
Iteration  300 | Cost = 0.192308
Iteration  400 | Cost = 0.183221
Iteration  500 | Cost = 0.176911
Iteration  600 | Cost = 0.172187
Iteration  700 | Cost = 0.168471
Iteration  800 | Cost = 0.165443
Iteration  900 | Cost = 0.162910

Training classifier for digit 2
Iteration    0 | Cost = 0.579318
Iteration  100 | Cost = 0.280824
Iteration  200 | Cost = 0.250326
Iteration  300 | Cost = 0.230477
Iteration  400 | Cost = 0.216781
Iteration  500 | Cost = 0.206829
Iteration  600 | Cost = 0.199290
Iteration 

Predictions

In [6]:
y_pred = model.predict(X_test)


**Evaluation Metrics (Accuracy, Precision, Recall, F1 , Confusion Matrix)**

In [7]:
accuracy  = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall    = recall_score(y_test, y_pred, average='macro')
f1        = f1_score(y_test, y_pred, average='macro')

print("游늵 MODEL PERFORMANCE")
print("====================")
print(f"Accuracy : {accuracy*100:.2f}%")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1-score : {f1:.4f}")

#================================================
# Confusion Matrix
#================================================

cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", cm)
print("=================================================")

# disp = ConfusionMatrixDisplay(
#     confusion_matrix=cm,
#     display_labels=[str(i) for i in range(10)]
# )

# plt.figure(figsize=(8, 8))
# disp.plot(cmap='Blues', values_format='d')
# plt.title("Confusion Matrix - Logistic Regression (One-vs-All)")
# plt.show()


游늵 MODEL PERFORMANCE
Accuracy : 74.20%
Precision: 0.7440
Recall   : 0.7420
F1-score : 0.7408

Confusion Matrix:
 [[158  11   1   2   8   4   8   0   2   6]
 [  3 157   5   7   3   3   5   9   6   2]
 [  3   7 146  11   7   1   3   7   5  10]
 [  6   3   7 157  11   2   0   3   7   4]
 [  6   8   3   2 151   3  10   4   6   7]
 [  8   6   1   4   5 146  17   3   8   2]
 [  0  13   0   1   1  11 170   1   3   0]
 [  3  13   7   3   8   3   3 130   7  23]
 [ 10   7   3   9  16  16   9   8 117   5]
 [  6   4   3   9  12   0   0  11   3 152]]


hyperparameter_sensitivity_analysis

In [12]:
def hyperparameter_sensitivity_analysis(
    X_train, y_train, X_test, y_test,
    iterations=1000,
    num_labels=10
):
    """
    Study the impact of different learning rates (alpha)
    """

    learning_rates = {
        "LowerALpha"   : 0.05,
        "BaseAlpha"    : 0.1,
        "HigerAlpha"    : 0.2
    }

    results = {}

    for name, alpha in learning_rates.items():
        print(f"\n==============================")
        print(f"Training with {name} (alpha = {alpha})")
        print(f"==============================")

        model = LogisticRegressionOVA(
            alpha=alpha,
            iterations=iterations,
            num_labels=num_labels
        )

        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)

        results[name] = acc

        print(f"Accuracy with {name}: {acc * 100:.2f}%")

    return results


In [13]:
results = hyperparameter_sensitivity_analysis(
    X_train, y_train,
    X_test, y_test,
    iterations=202,
    num_labels=10
)


print("\n游늷 Hyperparameter Sensitivity Results")
print("===================================")
for k, v in results.items():
    print(f"{k:<15}: {v*100:.2f}%")




Training with LowerALpha (alpha = 0.05)

Training classifier for digit 0
Iteration    0 | Cost = 0.364381
Iteration  100 | Cost = 0.270407
Iteration  200 | Cost = 0.234088

Training classifier for digit 1
Iteration    0 | Cost = 0.339386
Iteration  100 | Cost = 0.267727
Iteration  200 | Cost = 0.237336

Training classifier for digit 2
Iteration    0 | Cost = 0.348564
Iteration  100 | Cost = 0.302651
Iteration  200 | Cost = 0.281024

Training classifier for digit 3
Iteration    0 | Cost = 0.361045
Iteration  100 | Cost = 0.290165
Iteration  200 | Cost = 0.256469

Training classifier for digit 4
Iteration    0 | Cost = 0.351598
Iteration  100 | Cost = 0.292464
Iteration  200 | Cost = 0.265253

Training classifier for digit 5
Iteration    0 | Cost = 0.351864
Iteration  100 | Cost = 0.296071
Iteration  200 | Cost = 0.271237

Training classifier for digit 6
Iteration    0 | Cost = 0.356047
Iteration  100 | Cost = 0.277664
Iteration  200 | Cost = 0.241791

Training classifier for digit 7
It

5-FOLD CROSS-VALIDATION 

In [14]:
from sklearn.model_selection import KFold
import time

# =========================================================
# 5-Fold Cross Validation (NO Regularization)
# =========================================================
def k_fold_cross_validation(X, y, k=5, alpha=0.1, iterations=300):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)

    accuracy_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    training_time_list = []

    fold = 1

    for train_idx, val_idx in kf.split(X):
        print(f"\n游대 Fold {fold}/{k}")

        X_train_cv, X_val_cv = X[train_idx], X[val_idx]
        y_train_cv, y_val_cv = y[train_idx], y[val_idx]

        model = LogisticRegressionOVA(
            alpha=alpha,
            iterations=iterations,
            num_labels=10
        )

        start_time = time.time()
        model.fit(X_train_cv, y_train_cv)
        training_time_list.append(time.time() - start_time)

        y_pred_cv = model.predict(X_val_cv)

        accuracy_list.append(accuracy_score(y_val_cv, y_pred_cv))
        precision_list.append(precision_score(y_val_cv, y_pred_cv, average='macro'))
        recall_list.append(recall_score(y_val_cv, y_pred_cv, average='macro'))
        f1_list.append(f1_score(y_val_cv, y_pred_cv, average='macro'))

        fold += 1

    return {
        "accuracy": accuracy_list,
        "precision": precision_list,
        "recall": recall_list,
        "f1": f1_list,
        "time": training_time_list
    }


Run Cross-Validation

In [15]:
cv_results = k_fold_cross_validation(
    X_train,
    y_train,
    k=5,
    alpha=0.1,
    iterations=300   # reduced for time
)



游대 Fold 1/5

Training classifier for digit 0
Iteration    0 | Cost = 0.606684
Iteration  100 | Cost = 0.233807
Iteration  200 | Cost = 0.201758

Training classifier for digit 1
Iteration    0 | Cost = 0.574561
Iteration  100 | Cost = 0.238712
Iteration  200 | Cost = 0.208993

Training classifier for digit 2
Iteration    0 | Cost = 0.571982
Iteration  100 | Cost = 0.277824
Iteration  200 | Cost = 0.248249

Training classifier for digit 3
Iteration    0 | Cost = 0.599598
Iteration  100 | Cost = 0.257039
Iteration  200 | Cost = 0.218518

Training classifier for digit 4
Iteration    0 | Cost = 0.590590
Iteration  100 | Cost = 0.268979
Iteration  200 | Cost = 0.237213

Training classifier for digit 5
Iteration    0 | Cost = 0.583669
Iteration  100 | Cost = 0.271365
Iteration  200 | Cost = 0.242748

Training classifier for digit 6
Iteration    0 | Cost = 0.601835
Iteration  100 | Cost = 0.243360
Iteration  200 | Cost = 0.202943

Training classifier for digit 7
Iteration    0 | Cost = 0.5754

Mean 췀 Std Report

In [16]:
print("\n游늵 5-FOLD CROSS-VALIDATION RESULTS (Mean 췀 Std)")
print("=============================================")

for metric in cv_results:
    mean = np.mean(cv_results[metric])
    std  = np.std(cv_results[metric])
    print(f"{metric.capitalize():10s}: {mean:.4f} 췀 {std:.4f}")



游늵 5-FOLD CROSS-VALIDATION RESULTS (Mean 췀 Std)
Accuracy  : 0.6975 췀 0.0139
Precision : 0.6979 췀 0.0140
Recall    : 0.6976 췀 0.0136
F1        : 0.6956 췀 0.0141
Time      : 176.2878 췀 2.9360
