## 1. Import Libraries and Setup

In [1]:
# Import all necessary libraries
import numpy as np
import pandas as pd
import time
import warnings
import pickle

# Scikit-learn imports
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report
)

# Settings
warnings.filterwarnings('ignore')


print("All libraries imported successfully!")
print(f"NumPy version: {np.__version__}")
print(f"Pandas version: {pd.__version__}")

All libraries imported successfully!
NumPy version: 2.3.5
Pandas version: 2.3.3


## 2. Data Loading

In [2]:
def load_preprocessed_data(input_file='preprocessed_data.pkl'):

    print(f"Loading preprocessed data from {input_file}...")
    
    with open(input_file, 'rb') as f:
        data = pickle.load(f)
    
    X_train = data['X_train']
    X_test = data['X_test']
    y_train = data['y_train']
    y_test = data['y_test']
    
    print("Preprocessed data loaded successfully!")
    print(f"Training set: {X_train.shape[0]} samples")
    print(f"Testing set: {X_test.shape[0]} samples")
    
    return X_train, X_test, y_train, y_test


def print_data_statistics(X_train, y_train, X_test, y_test):

    print("\nDATASET STATISTICS")
    print("-"*40)
    print(f"Total Samples: {len(X_train) + len(X_test)}")
    print(f"Training Samples: {len(X_train)}")
    print(f"Test Samples: {len(X_test)}")
    print(f"Features: {X_train.shape[1]}")
    print(f"Number of Classes: {len(np.unique(y_train))}")
    print(f"\nTraining Set Distribution: {np.bincount(y_train)}")
    print(f"Test Set Distribution: {np.bincount(y_test)}")
    print(f"\nPixel Value Mean: {np.mean(X_train):.4f}")
    print(f"Pixel Value Std: {np.std(X_train):.4f}")
    print("-"*40)

In [3]:
X_train, X_test, y_train, y_test = load_preprocessed_data()
print_data_statistics(X_train, y_train, X_test, y_test)

Loading preprocessed data from preprocessed_data.pkl...
Preprocessed data loaded successfully!
Training set: 8000 samples
Testing set: 2000 samples

DATASET STATISTICS
----------------------------------------
Total Samples: 10000
Training Samples: 8000
Test Samples: 2000
Features: 784
Number of Classes: 10

Training Set Distribution: [800 800 800 800 800 800 800 800 800 800]
Test Set Distribution: [200 200 200 200 200 200 200 200 200 200]

Pixel Value Mean: 0.4265
Pixel Value Std: 0.1665
----------------------------------------


## 3. Linear Regression (One-vs-All) Functions

In [4]:
class LinearRegressionBGD:

    def __init__(self, alpha=0.01, n_iters=1000, random_state=42):
        self.alpha = alpha
        self.n_iters = n_iters
        self.random_state = random_state
        self.theta = None

    
    def _add_bias(self, X):
        return np.hstack([np.ones((X.shape[0], 1)), X])

    def _compute_cost(self, X, y, theta):
        m = len(y)
        predictions = X @ theta
        cost = (1/m) * np.sum((predictions - y)**2)
        return cost
    
    def _bgd(self, X, y):
        m, n = X.shape
        self.theta = np.zeros(n)
        old_cost = 1e9
        count = 0
        for i in range(self.n_iters):
            predictions = X @ self.theta
            error = predictions - y
            grad = (2/m) * (X.T @ error)
            self.theta -= self.alpha * grad
            if ( i%100 == 0 ):
                new_cost = self._compute_cost(X,y,self.theta)
                print(f"Cost\t:\t{new_cost}") 
                if (new_cost > old_cost):
                    count +=1
                    if count == 5 :
                        break
                old_cost = new_cost 
            
        return self.theta
    
    def fit(self, X, y):
        X_b = self._add_bias(X)
        self.theta = self._bgd(X_b, y)
        return self
    
    def predict(self, X):
        if self.theta is None:
            raise ValueError("Model must be trained before prediction")
        
        X_b = self._add_bias(X)
        return X_b @ self.theta


def train_one_vs_all_linear_regression(X_train, y_train, alpha=0.01, n_iters=1000, verbose=True):


    if verbose:
        print("\n" + "="*60)
        print("TRAINING LINEAR REGRESSION (ONE-VS-ALL)")
        print("="*60)
        print(f"Learning rate (alpha): {alpha}")
        print(f"Iterations: {n_iters}")
        print("-"*60)
    
    classes = np.unique(y_train)
    models = {}
    
    start_time = time.time()
    
    for cls in classes:
        if verbose:
            print(f"Training classifier for class {cls}...")
        
        y_binary = (y_train == cls).astype(float)
        
        model = LinearRegressionBGD(alpha=alpha, n_iters=n_iters, random_state=42)
        model.fit(X_train, y_binary)
        
        models[cls] = model
    
    training_time = time.time() - start_time
    
    if verbose:
        print(f"\nTraining completed in {training_time:.2f} seconds")
        print("="*60)
    
    return models, training_time


def predict_linear_regression(models, X):

    n_samples = X.shape[0]
    n_classes = len(models)
    

    continuous_predictions = np.zeros((n_samples, n_classes))
    
    for cls, model in models.items():
        continuous_predictions[:, cls] = model.predict(X)
    
    y_pred = np.argmax(continuous_predictions, axis=1)
    
    return y_pred


In [23]:
models, training_time = train_one_vs_all_linear_regression(
    X_train, y_train,
    alpha=0.0055,
    n_iters=1000
)


TRAINING LINEAR REGRESSION (ONE-VS-ALL)
Learning rate (alpha): 0.0055
Iterations: 1000
------------------------------------------------------------
Training classifier for class 0...
Cost	:	0.09370051701806588
Cost	:	0.0663244753410007
Cost	:	0.059513742263717355
Cost	:	0.05658776532838408
Cost	:	0.05502476565145476
Cost	:	0.054090555589155986
Cost	:	0.05349084545264712
Cost	:	0.053082789904571374
Cost	:	0.052790285383783414
Cost	:	0.052570628092225534
Training classifier for class 1...
Cost	:	0.09477709200656395
Cost	:	0.07202848531995386
Cost	:	0.06682740660643509
Cost	:	0.06480564563851925
Cost	:	0.0637636022273404
Cost	:	0.06312346131356039
Cost	:	0.0626870244938394
Cost	:	0.06236850093123314
Cost	:	0.06212414436455294
Cost	:	0.061929166426882634
Training classifier for class 2...
Cost	:	0.09465305234123923
Cost	:	0.07989902175202246
Cost	:	0.07366517770240211
Cost	:	0.06967669960825938
Cost	:	0.06699118399645397
Cost	:	0.06511770592599289
Cost	:	0.06377292041940902
Cost	:	0.06278

In [24]:
y_pred = predict_linear_regression(models, X_test)

### Evaluation Metrics (Accuracy, Precision, Recall, F1 , Confusion Matrix)

In [25]:
accuracy  = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall    = recall_score(y_test, y_pred, average='macro')
f1        = f1_score(y_test, y_pred, average='macro')

print("MODEL PERFORMANCE")
print("====================")
print(f"Accuracy : {accuracy*100:.2f}%")
print(f"Precision: {precision*100:.4f}%")
print(f"Recall   : {recall*100:.4f}%")
print(f"F1-score : {f1*100:.4f}%")

cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", cm)
print("=================================================")

MODEL PERFORMANCE
Accuracy : 72.35%
Precision: 72.5494%
Recall   : 72.3500%
F1-score : 72.2062%

Confusion Matrix:
 [[153   6   2   5  10   6  10   3   0   5]
 [  7 158   0   1  13   6   3   4   7   1]
 [  3   3 146   8   5   6  10   9   3   7]
 [  3   6  10 165   4   2   0   2   4   4]
 [  2  23   2   3 140   4   9   2  10   5]
 [ 19   4   1   5   5 131  17   9   5   4]
 [  4   7   4   1   8  11 162   0   3   0]
 [  3  14   5   1   4   3   1 136   7  26]
 [  5   7   3  14  21  12  10   8 110  10]
 [  5   2   7   6  10   2   1  19   2 146]]


## 4 . Linear Regression Truning

In [None]:
def tune_linear_regression_alpha(X_train, y_train, X_val, y_val):

    print("\n" + "="*60)
    print("LINEAR REGRESSION HYPERPARAMETER TUNING")
    print("="*60)
    
    alphas = [0.001, 0.005, 0.0055, 0.01, 0.1]
    best_score = 0
    best_alpha = 1.0
    results = []
    
    for alpha in alphas:
        print(f"\nTesting alpha={alpha}")
        models, train_time = train_one_vs_all_linear_regression(
            X_train, y_train, alpha=alpha, n_iters=30, verbose=False
        )
    
        y_pred_val = predict_linear_regression(models, X_val)
        y_pred_train = predict_linear_regression(models, X_train)
    
        val_accuracy = np.mean(y_pred_val == y_val)
        train_accuracy = np.mean(y_pred_train == y_train)
    
        result = {
            'alpha': alpha,
            'train_accuracy': train_accuracy,
            'val_accuracy': val_accuracy,
            'training_time': train_time
        }
        results.append(result)
    
        print(f"\tTrain: {train_accuracy:.4f}, Val: {val_accuracy:.4f}, Time: {train_time:.2f}s")
    
        if val_accuracy > best_score:
            best_score = val_accuracy
            best_alpha = alpha
    
    print("\n" + "="*60)
    print("BEST ALPHA")
    print("="*60)
    print(f"Alpha: {best_alpha}")
    print(f"Validation Accuracy: {best_score:.4f}")
    print("="*60)
    
    return best_alpha,  results

## 4.1. Linear Regression Truning
###     Create Validation Set for Hyperparameter Tuning

In [81]:
X_train_tune, X_val_tune, y_train_tune, y_val_tune = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)

print(f"Training set for tuning: {X_train_tune.shape[0]} samples")
print(f"Validation set for tuning: {X_val_tune.shape[0]} samples")

Training set for tuning: 6400 samples
Validation set for tuning: 1600 samples


## 4.2. Linear Regression Truning

**Parameter to test:**
- **Alpha**: Learning rate 

In [87]:
linear_best_alpha, linear_tuning_results = tune_linear_regression_alpha(
    X_train_tune, y_train_tune, X_val_tune, y_val_tune
)


LINEAR REGRESSION HYPERPARAMETER TUNING

Testing alpha=0.005
Cost	:	0.11580866463789818
Cost	:	0.11831960706867392
Cost	:	0.11759277536804337
Cost	:	0.11625586858398337
Cost	:	0.11731107555773708
Cost	:	0.11707781431231054
Cost	:	0.11642887494325482
Cost	:	0.11771246595048811
Cost	:	0.11658407807551832
Cost	:	0.11753950912587559
	Train: 0.1000, Val: 0.1000, Time: 2.55s

Testing alpha=0.0055
Cost	:	0.12455448943344212
Cost	:	0.128380559605793
Cost	:	0.12718056715502463
Cost	:	0.12516597137087465
Cost	:	0.12677941866713752
Cost	:	0.1264077805422388
Cost	:	0.1254591868840837
Cost	:	0.1273943768563181
Cost	:	0.12563112675713353
Cost	:	0.1271240829398073
	Train: 0.1000, Val: 0.1000, Time: 2.83s

BEST ALPHA
Alpha: 0.005
Validation Accuracy: 0.1000


## 5 . Performance Analysis

### 5.1 . CROSS-VALIDATION (k=5)

In [None]:
def perform_cross_validation(X, y, alpha=0.005, n_iters=1000, k=5):

    print("\n" + "="*60)
    print("K-FOLD CROSS-VALIDATION (k=5)")
    print("="*60)
    
    kf = KFold(n_splits=k, shuffle=True, random_state=42)  
    cv_scores = {
        'accuracy': [],
        'precision': [],
        'recall': [],
        'f1': []
    }
    
    fold = 1
    for train_idx, val_idx in kf.split(X):
        print(f"Fold {fold}/{k}... \n", end='')
        
        X_train_fold = X[train_idx]
        y_train_fold = y[train_idx]
        X_val_fold = X[val_idx]
        y_val_fold = y[val_idx]
        
        # Train model
        models, _ = train_one_vs_all_linear_regression(
            X_train_fold, y_train_fold, 
            alpha=alpha, n_iters=n_iters, verbose=False
        )
        
        # Predict
        y_pred = predict_linear_regression(models, X_val_fold)
        
        # Calculate metrics
        accuracy = accuracy_score(y_val_fold, y_pred)
        precision = precision_score(y_val_fold, y_pred, average='weighted', zero_division=0)
        recall = recall_score(y_val_fold, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_val_fold, y_pred, average='weighted', zero_division=0)
        
        cv_scores['accuracy'].append(accuracy)
        cv_scores['precision'].append(precision)
        cv_scores['recall'].append(recall)
        cv_scores['f1'].append(f1)
        
        print(f"Accuracy: {accuracy:.4f}")
        fold += 1
    
    # Calculate mean and std
    print("\n" + "="*60)
    print("CROSS-VALIDATION SUMMARY")
    print("="*60)
    print(f"Accuracy:  {np.mean(cv_scores['accuracy']):.4f} ± {np.std(cv_scores['accuracy']):.4f}")
    print(f"Precision: {np.mean(cv_scores['precision']):.4f} ± {np.std(cv_scores['precision']):.4f}")
    print(f"Recall:    {np.mean(cv_scores['recall']):.4f} ± {np.std(cv_scores['recall']):.4f}")
    print(f"F1-Score:  {np.mean(cv_scores['f1']):.4f} ± {np.std(cv_scores['f1']):.4f}")
    print("="*60)
    
    return cv_scores


### 5.2 . COMPREHENSIVE METRICS ANALYSIS

In [None]:
def show_all_metrics(y_test, y_pred):

    print("\n" + "="*60)
    print("COMPREHENSIVE METRICS")
    print("="*60)
    
    # Overall metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"\nOverall Performance:")
    print(f"  Accuracy:  {accuracy:.4f}")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall:    {recall:.4f}")
    print(f"  F1-Score:  {f1:.4f}")
    
    # Per-class report
    print(f"\nPer-Class Performance:")
    print(classification_report(y_test, y_pred))
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)

In [None]:
# Run this in your notebook after training your model:

# 1. Cross-Validation
cv_scores = perform_cross_validation(X_train, y_train, alpha=0.005, n_iters=100)

# 2. Show all metrics
show_all_metrics(y_test, y_pred)


K-FOLD CROSS-VALIDATION (k=5)
Fold 1/5... 
Cost	:	0.09197371988013903
Cost	:	0.09498329823925492
Cost	:	0.09118665777898045
Cost	:	0.09150396118333694
Cost	:	0.09315716204683754
Cost	:	0.0921772240023099
Cost	:	0.09375840429131575
Cost	:	0.09306289002847581
Cost	:	0.09386977794232367
Cost	:	0.09058506952713029


KeyboardInterrupt: 