<a href="https://colab.research.google.com/github/chasslayy/MNIST-Ensemble-Learning-Project/blob/main/ensemble_learning_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Ensemble Learning on MNIST

This notebook implements Questions **8 and 9** from the Ensemble Learning assignment:

- Train multiple classifiers on the MNIST dataset (Random Forest, Extra Trees, SVM).
- Combine them using **voting** (hard/soft).
- Build a **stacking ensemble** (manual blender + `StackingClassifier`).

In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, VotingClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

RANDOM_STATE = 42


## 1. Load and split the MNIST dataset

In [2]:
# This may take a moment the first time because it downloads the dataset.
mnist = fetch_openml('mnist_784', as_frame=False)
X = mnist.data.astype('float32')
y = mnist.target.astype('int64')  # convert labels to integers

print('Full dataset shape:', X.shape, 'Labels shape:', y.shape)

Full dataset shape: (70000, 784) Labels shape: (70000,)


In [3]:
# Use 50,000 for training, 10,000 for validation, 10,000 for testing

# First, create a train (60k) / test (10k) split
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X, y, test_size=10000, random_state=RANDOM_STATE, stratify=y
)

# Then split the 60k into 50k train / 10k validation
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, test_size=10000, random_state=RANDOM_STATE, stratify=y_train_full
)

print('Train set:', X_train.shape, 'Validation set:', X_valid.shape, 'Test set:', X_test.shape)

Train set: (50000, 784) Validation set: (10000, 784) Test set: (10000, 784)


## 2. Define base classifiers

In [4]:
# Tree-based models (do not require feature scaling)
rf_clf = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    n_jobs=-1,
    random_state=RANDOM_STATE
)

et_clf = ExtraTreesClassifier(
    n_estimators=200,
    max_depth=None,
    n_jobs=-1,
    random_state=RANDOM_STATE
)

# SVM works better with feature scaling. We use a pipeline.
svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='rbf', gamma='scale', probability=True, random_state=RANDOM_STATE))
])

base_estimators = [('rf', rf_clf), ('et', et_clf), ('svm', svm_clf)]
base_estimators

[('rf', RandomForestClassifier(n_estimators=200, n_jobs=-1, random_state=42)),
 ('et', ExtraTreesClassifier(n_estimators=200, n_jobs=-1, random_state=42)),
 ('svm',
  Pipeline(steps=[('scaler', StandardScaler()),
                  ('svc', SVC(probability=True, random_state=42))]))]

## 3. Train and evaluate individual classifiers

In [None]:
def train_and_eval(clf, X_train, y_train, X_valid, y_valid, name='model'):
    print(f'\nTraining {name}...')
    clf.fit(X_train, y_train)
    y_valid_pred = clf.predict(X_valid)
    valid_acc = accuracy_score(y_valid, y_valid_pred)
    print(f'{name} validation accuracy: {valid_acc:.4f}')
    return clf, valid_acc

trained_clfs = {}
valid_scores = {}

for name, clf in base_estimators:
    model, acc = train_and_eval(clf, X_train, y_train, X_valid, y_valid, name=name)
    trained_clfs[name] = model
    valid_scores[name] = acc

print('\nValidation accuracies:')
for name, acc in valid_scores.items():
    print(f'{name}: {acc:.4f}')


Training rf...
rf validation accuracy: 0.9717

Training et...
et validation accuracy: 0.9739

Training svm...


## 4. Voting ensemble (hard and soft voting)

In [None]:
# Hard voting classifier
hard_voting_clf = VotingClassifier(
    estimators=[('rf', trained_clfs['rf']), ('et', trained_clfs['et']), ('svm', trained_clfs['svm'])],
    voting='hard'
)

print('\nFitting hard voting classifier (re-uses already fitted estimators)...')
hard_voting_clf.fit(X_valid, y_valid)  # this just checks/uses estimators; no refit by default
y_valid_pred_hard = hard_voting_clf.predict(X_valid)
hard_acc = accuracy_score(y_valid, y_valid_pred_hard)
print(f'Hard voting validation accuracy: {hard_acc:.4f}')

# Soft voting classifier
soft_voting_clf = VotingClassifier(
    estimators=[('rf', trained_clfs['rf']), ('et', trained_clfs['et']), ('svm', trained_clfs['svm'])],
    voting='soft'
)

print('\nFitting soft voting classifier (re-uses already fitted estimators)...')
soft_voting_clf.fit(X_valid, y_valid)
y_valid_pred_soft = soft_voting_clf.predict(X_valid)
soft_acc = accuracy_score(y_valid, y_valid_pred_soft)
print(f'Soft voting validation accuracy: {soft_acc:.4f}')

## 5. Evaluate best model on the test set

In [None]:
# Pick the best model on the validation set
all_valid_scores = valid_scores.copy()
all_valid_scores['hard_voting'] = hard_acc
all_valid_scores['soft_voting'] = soft_acc

print('\nAll validation scores:')
for name, acc in all_valid_scores.items():
    print(f'{name}: {acc:.4f}')

best_name = max(all_valid_scores, key=all_valid_scores.get)
print(f'\nBest model on validation set: {best_name}')

if best_name == 'hard_voting':
    best_model = hard_voting_clf
elif best_name == 'soft_voting':
    best_model = soft_voting_clf
else:
    best_model = trained_clfs[best_name]

# Evaluate on the test set
print(f'\nEvaluating {best_name} on the test set...')
y_test_pred = best_model.predict(X_test)
test_acc = accuracy_score(y_test, y_test_pred)
print(f'Test accuracy: {test_acc:.4f}')

## 6. Manual stacking ensemble (blender)

In [None]:
# For stacking, we use the trained base models to generate predictions on the validation set.
# Each instance in the new training set is a vector of predictions from each classifier.

def get_meta_features(models, X):
    """Return a 2D array where each column is the predictions of one model."""
    meta_features = []
    for name, clf in models.items():
        preds = clf.predict(X)
        meta_features.append(preds)
    return np.vstack(meta_features).T  # shape: [n_samples, n_models]

# Create meta-features for validation and test sets
meta_X_train = get_meta_features(trained_clfs, X_valid)
meta_y_train = y_valid

meta_X_test = get_meta_features(trained_clfs, X_test)
meta_y_test = y_test

print('Meta-feature shape (train):', meta_X_train.shape)

# Use a simple logistic regression as the blender
blender = LogisticRegression(max_iter=1000, multi_class='auto')
blender.fit(meta_X_train, meta_y_train)

meta_test_pred = blender.predict(meta_X_test)
stacking_acc = accuracy_score(meta_y_test, meta_test_pred)
print(f'Stacking ensemble (manual blender) test accuracy: {stacking_acc:.4f}')

## 7. Using `StackingClassifier`

In [None]:
stack_clf = StackingClassifier(
    estimators=[('rf', rf_clf), ('et', et_clf), ('svm', svm_clf)],
    final_estimator=LogisticRegression(max_iter=1000),
    n_jobs=-1
)

print('\nTraining StackingClassifier on the training set...')
stack_clf.fit(X_train, y_train)

y_valid_stack = stack_clf.predict(X_valid)
stack_valid_acc = accuracy_score(y_valid, y_valid_stack)
print(f'StackingClassifier validation accuracy: {stack_valid_acc:.4f}')

y_test_stack = stack_clf.predict(X_test)
stack_test_acc = accuracy_score(y_test, y_test_stack)
print(f'StackingClassifier test accuracy: {stack_test_acc:.4f}')

## 8. Summary of results

In [None]:
print('\n=== FINAL COMPARISON (Validation) ===')
print('Base models:')
for name, acc in valid_scores.items():
    print(f'{name}: {acc:.4f}')
print(f'Hard voting: {hard_acc:.4f}')
print(f'Soft voting: {soft_acc:.4f}')
print(f'StackingClassifier (valid): {stack_valid_acc:.4f}')

print('\n=== FINAL COMPARISON (Test) ===')
print(f'Best voting/test ({best_name}): {test_acc:.4f}')
print(f'Manual stacking (blender) test: {stacking_acc:.4f}')
print(f'StackingClassifier test: {stack_test_acc:.4f}')