
# CIFAR-10 CNN (4 Conv Layers) — ReLU vs Tanh  
**Early stop at 25% train error + Per-epoch timing & combined figure**



## 0) Setup & Imports


In [4]:

# If TensorFlow is not installed in your environment, uncomment the next line:
# !pip install -q tensorflow

import os, random, sys, time, math
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

print("TensorFlow:", tf.__version__)
print("GPUs:", tf.config.list_physical_devices('GPU'))


TensorFlow: 2.20.0
GPUs: []



## 1) Reproducibility & Config


In [5]:

SEED = 42
random.seed(SEED); np.random.seed(SEED); tf.random.set_seed(SEED)

BATCH_SIZE = 128
MAX_EPOCHS = 100
TARGET_TRAIN_ACCURACY = 0.75   # 1 - 0.25 training error



## 2) Load CIFAR-10 & Preprocess


In [6]:

(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
y_train = y_train.squeeze().astype("int32")
y_test  = y_test.squeeze().astype("int32")
x_train = x_train.astype("float32")/255.0
x_test  = x_test.astype("float32")/255.0

print("Train:", x_train.shape, y_train.shape, " Test:", x_test.shape, y_test.shape)


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 0us/step
Train: (50000, 32, 32, 3) (50000,)  Test: (10000, 32, 32, 3) (10000,)



## 3) Datasets & (Optional) Augmentation


In [7]:

AUTOTUNE = tf.data.AUTOTUNE

# Fixed validation split (10% of training)
val_fraction = 0.1
val_size = int(len(x_train) * val_fraction)
x_val, y_val = x_train[:val_size], y_train[:val_size]
x_train_sub, y_train_sub = x_train[val_size:], y_train[val_size:]

train_ds = (tf.data.Dataset.from_tensor_slices((x_train_sub, y_train_sub))
            .shuffle(10_000, seed=SEED, reshuffle_each_iteration=True)
            .batch(BATCH_SIZE).prefetch(AUTOTUNE))
val_ds   = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(BATCH_SIZE).prefetch(AUTOTUNE)
test_ds  = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE).prefetch(AUTOTUNE)

do_augment = True
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1),
], name="augment")



## 4) Model Builder (Four Conv Layers)


In [8]:

def build_cnn(activation: str = "relu", input_shape=(32, 32, 3), n_classes: int = 10) -> keras.Model:
    inputs = keras.Input(shape=input_shape, name="images")
    x = inputs
    if do_augment:
        x = data_augmentation(x)

    # Block A (2 conv layers) -> Pool -> Dropout
    x = layers.Conv2D(32, 3, padding="same", activation=activation)(x)
    x = layers.Conv2D(32, 3, padding="same", activation=activation)(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.25)(x)

    # Block B (2 conv layers) -> Pool -> Dropout
    x = layers.Conv2D(64, 3, padding="same", activation=activation)(x)
    x = layers.Conv2D(64, 3, padding="same", activation=activation)(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.25)(x)

    # Classifier head
    x = layers.Flatten()(x)
    x = layers.Dense(256, activation=activation)(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(n_classes, activation="softmax")(x)
    return keras.Model(inputs, outputs, name=f"cnn_4conv_{activation}")

_ = build_cnn("relu").summary()



## 5) Callbacks: Early Stop (train acc ≥ 0.75) + Epoch Timing


In [9]:

class StopOnTrainingAccuracy(keras.callbacks.Callback):
    def __init__(self, target_accuracy: float = 0.75):
        super().__init__()
        self.target_accuracy = target_accuracy

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        train_acc = logs.get("accuracy") or logs.get("sparse_categorical_accuracy")
        if train_acc is not None and train_acc >= self.target_accuracy:
            print(f"\nReached training accuracy {train_acc:.3f} ≥ {self.target_accuracy:.2f}. Stopping.")
            self.model.stop_training = True

class EpochTimeHistory(keras.callbacks.Callback):
    # Record time (seconds) for each epoch in self.epoch_times
    def on_train_begin(self, logs=None):
        self._epoch_start = None
        self.epoch_times = []

    def on_epoch_begin(self, epoch, logs=None):
        self._epoch_start = time.time()

    def on_epoch_end(self, epoch, logs=None):
        self.epoch_times.append(time.time() - self._epoch_start)



## 6) Compile Helper


In [10]:

def compile_model(model: keras.Model, lr: float = 1e-3):
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss=keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"],
    )
    return model



## 7) Train Utility


In [11]:

def train_model(activation: str):
    model = build_cnn(activation=activation)
    compile_model(model)
    stop_cb = StopOnTrainingAccuracy(target_accuracy=TARGET_TRAIN_ACCURACY)
    time_cb = EpochTimeHistory()
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=MAX_EPOCHS,
        callbacks=[stop_cb, time_cb],
        verbose=2
    )
    return model, history, time_cb.epoch_times



## 8) Run: ReLU vs Tanh


In [None]:

histories = {}
epoch_times = {}

print("\n=== Training with ReLU activation ===")
model_relu, hist_relu, times_relu = train_model("relu")
histories["relu"] = hist_relu.history
epoch_times["relu"] = times_relu

print("\n=== Training with Tanh activation ===")
model_tanh, hist_tanh, times_tanh = train_model("tanh")
histories["tanh"] = hist_tanh.history
epoch_times["tanh"] = times_tanh

print("ReLU epochs:", len(histories["relu"]["accuracy"]), "Tanh epochs:", len(histories["tanh"]["accuracy"]))



=== Training with ReLU activation ===
Epoch 1/100
352/352 - 21s - 58ms/step - accuracy: 0.3688 - loss: 1.7224 - val_accuracy: 0.5208 - val_loss: 1.3479
Epoch 2/100
352/352 - 19s - 53ms/step - accuracy: 0.4984 - loss: 1.3904 - val_accuracy: 0.5986 - val_loss: 1.1474
Epoch 3/100



## 9) Single Figure: Training Error & Time per Epoch
- Left y-axis: Training error (1 - accuracy)
- Right y-axis: Seconds per epoch
- X-axis: Epoch number


In [None]:

# Compute training error arrays
relu_train_err = 1 - np.array(histories["relu"]["accuracy"], dtype=float)
tanh_train_err = 1 - np.array(histories["tanh"]["accuracy"], dtype=float)

epochs_relu = np.arange(1, len(relu_train_err)+1)
epochs_tanh = np.arange(1, len(tanh_train_err)+1)

fig, ax1 = plt.subplots(figsize=(9, 5))

# Left axis: training error
line1, = ax1.plot(epochs_relu, relu_train_err, label="ReLU — train error")
line2, = ax1.plot(epochs_tanh, tanh_train_err, linestyle="--", label="Tanh — train error")
ax1.set_xlabel("Epochs")
ax1.set_ylabel("Training error rate")
ax1.grid(True)
ax1.axhline(0.25, linestyle=":", linewidth=1, label="Target error 0.25")

# Right axis: time per epoch
ax2 = ax1.twinx()
line3, = ax2.plot(np.arange(1, len(epoch_times["relu"])+1), epoch_times["relu"], marker="o", linewidth=1, label="ReLU — sec/epoch")
line4, = ax2.plot(np.arange(1, len(epoch_times["tanh"])+1), epoch_times["tanh"], marker="x", linewidth=1, label="Tanh — sec/epoch")
ax2.set_ylabel("Time per epoch (sec)")

# Combine legends
lines = [line1, line2, line3, line4]
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc="best")
plt.title("Training Error (left) and Seconds/Epoch (right) — ReLU vs Tanh")
plt.show()



## 10) Evaluate on Test Set


In [None]:

loss, acc_relu = model_relu.evaluate(test_ds, verbose=0)
loss, acc_tanh = model_tanh.evaluate(test_ds, verbose=0)
print({"relu_test_acc": float(acc_relu), "tanh_test_acc": float(acc_tanh)})
