<a href="https://colab.research.google.com/github/frankfurtmacmoses/cnn_bagging/blob/main/bagging_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import tensorflow as tf
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import itertools
from sklearn.metrics import confusion_matrix
from plotnine import ggplot, aes, geom_tile, geom_text, scale_fill_gradient, labs, theme_minimal
from sklearn.metrics import accuracy_score

print(tf.__version__)

2.17.0


In [None]:
cifar100 = tf.keras.datasets.cifar100
(x_train, y_train), (x_test, y_test) = cifar100.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
[1m169001437/169001437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


In [None]:
y_train = y_train.flatten()
y_test = y_test.flatten()

In [None]:
classes = [
    'beaver', 'dolphin', 'otter', 'seal', 'whale',          # Aquatic mammals
    'aquarium fish', 'flatfish', 'ray', 'shark', 'trout',   # Fish
    'orchids', 'poppies', 'roses', 'sunflowers', 'tulips',  # Flowers
    'bottles', 'bowls', 'cans', 'cups', 'plates',           # Food containers
    'apples', 'mushrooms', 'oranges', 'pears', 'sweet peppers',  # Fruit and vegetables
    'clock', 'computer keyboard', 'lamp', 'telephone', 'television',  # Household electrical devices
    'bed', 'chair', 'couch', 'table', 'wardrobe',           # Household furniture
    'bee', 'beetle', 'butterfly', 'caterpillar', 'cockroach',  # Insects
    'bear', 'leopard', 'lion', 'tiger', 'wolf',             # Large carnivores
    'bridge', 'castle', 'house', 'road', 'skyscraper',      # Large man-made outdoor things
    'cloud', 'forest', 'mountain', 'plain', 'sea',          # Large natural outdoor scenes
    'camel', 'cattle', 'chimpanzee', 'elephant', 'kangaroo',  # Large omnivores and herbivores
    'fox', 'porcupine', 'possum', 'raccoon', 'skunk',       # Medium-sized mammals
    'crab', 'lobster', 'snail', 'spider', 'worm',           # Non-insect invertebrates
    'baby', 'boy', 'girl', 'man', 'woman',                  # People
    'crocodile', 'dinosaur', 'lizard', 'snake', 'turtle',   # Reptiles
    'hamster', 'mouse', 'rabbit', 'shrew', 'squirrel',      # Small mammals
    'maple', 'oak', 'palm', 'pine', 'willow',               # Trees
    'bicycle', 'bus', 'motorcycle', 'pickup truck', 'train',  # Vehicles 1
    'lawn-mower', 'rocket', 'streetcar', 'tank', 'tractor'  # Vehicles 2
]


plt.figure(figsize=(100,7))
p = sns.countplot(y_train.flatten())
p.set(xticklabels=classes)

In [None]:
input_shape = (32, 32, 3)

x_train=x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 3)
x_train=x_train / 255.0
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2], 3)
x_test=x_test / 255.0

In [None]:
y_train = tf.one_hot(y_train.astype(np.int32), depth=100)
y_test = tf.one_hot(y_test.astype(np.int32), depth=100)

# Define CNN Model Architecture


In [None]:
def create_cnn_model():
    model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, 3, padding='same', input_shape=x_train.shape[1:], activation='relu'),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Dropout(0.25),

    tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Dropout(0.25),

    tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(128, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),


    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Dense(num_classes, activation='softmax'),
])
   model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.00001, decay=1e-06),
            loss='categorical_crossentropy', metrics=['acc'])
   return model


# Implement the model

In [None]:
# Parameters
num_samples = 5       # Number of subsets
subset_fraction = 0.25  # Fraction of population in each subset
subset_size = int(len(x_train) * subset_fraction)

num_models = 5
models = []
for _ in range(num_models):
    model = create_cnn_model()
    indices = np.random.choice(range(len(x_train)), size=subset_size, replace=True)
    x_train_bootstrap = x_train[indices]
    y_train_bootstrap = y_train[indices]

    model.fit(x_train_bootstrap, y_train_bootstrap, epochs=10, batch_size=32, verbose=0)
    models.append(model)

# Generate Prediction with bagging approach





In [None]:
def ensemble_predictions(models, x_test):
    # Collect predictions from each model
    num_classes = 100  # CIFAR-100 has 100 classes
    predictions = np.zeros((x_test.shape[0], num_classes))

    for model in models:
        predictions += model.predict(x_test)

    # Average predictions
    predictions /= len(models)
    return np.argmax(predictions, axis=1)

# Get ensemble predictions
y_pred = ensemble_predictions(models, x_test)

# Evaluate ensemble performance
ensemble_accuracy = accuracy_score(np.argmax(y_test, axis=1), y_pred)
print(f'Ensemble Accuracy: {ensemble_accuracy * 100:.2f}%')


# Generate loss and accuracy curve

In [None]:
history = []  # To store loss and accuracy for each model

# Train multiple models in the ensemble
for i, model in enumerate(models):
    print(f"Training model {i+1}")
    hist = model.fit(
        x_train,
        y_train,
        validation_data=(x_test, y_test),
        epochs=10,  # Number of epochs
        batch_size=64,
        verbose=1,
    )
    # Append training history for plotting
    history.append(hist.history)


In [None]:

# Combine history from all models
history_df = pd.DataFrame()
for i, hist in enumerate(history):
    temp_df = pd.DataFrame(hist)
    temp_df["Epoch"] = range(1, len(hist["loss"]) + 1)
    temp_df["Model"] = f"Model {i+1}"
    history_df = pd.concat([history_df, temp_df])

melted_df = pd.melt(
    history_df,
    id_vars=["Epoch", "Model"],
    value_vars=["loss", "val_loss", "accuracy", "val_accuracy"],
    var_name="Metric",
    value_name="Value",
)


# Plot the curve


In [None]:
from plotnine import ggplot, aes, geom_line, facet_wrap, labs, theme_minimal

loss_plot = (
    ggplot(melted_df[melted_df["Metric"].isin(["loss", "val_loss"])], aes(x="Epoch", y="Value", color="Metric"))
    + geom_line()
    + facet_wrap("~Model")
    + labs(
        title="Training and Validation Loss",
        x="Epoch",
        y="Loss",
        color="Metric",
    )
    + theme_minimal()
)
print(loss_plot)


In [None]:
accuracy_plot = (
    ggplot(melted_df[melted_df["Metric"].isin(["accuracy", "val_accuracy"])], aes(x="Epoch", y="Value", color="Metric"))
    + geom_line()
    + facet_wrap("~Model")
    + labs(
        title="Training and Validation Accuracy",
        x="Epoch",
        y="Accuracy",
        color="Metric",
    )
    + theme_minimal()
)
print(accuracy_plot)


# Plot confusion matrix

In [None]:
# Compute confusion matrix
cm = confusion_matrix(np.argmax(y_test, axis=1), y_pred)
cm_df = pd.DataFrame(
    cm,
    index=[f"Class {i}" for i in range(cm.shape[0])],
    columns=[f"Class {i}" for i in range(cm.shape[1])],
)
cm_melted = cm_df.reset_index().melt(id_vars="index")
cm_melted.columns = ["True Class", "Predicted Class", "Count"]

confusion_matrix_plot = (
    ggplot(cm_melted, aes(x="Predicted Class", y="True Class", fill="Count"))
    + geom_tile(color="white")  # Create the heatmap tiles
    + geom_text(aes(label="Count"), size=6, color="black")  # Add text annotations
    + scale_fill_gradient(low="white", high="blue")  # Set color gradient
    + labs(
        title="Confusion Matrix",
        x="Predicted Class",
        y="True Class",
        fill="Count",
    )
    + theme_minimal()
)
print(confusion_matrix_plot)
