
# CIFAR-10: Four-Layer CNN in TensorFlow — ReLU vs Tanh (Early Stop at 25% Training Error)


## 0) Setup & Imports
If TensorFlow isn't installed, install it (e.g., `pip install tensorflow`). Then import the libraries.


In [1]:

# If TensorFlow is not installed in your environment, uncomment the next line:
# !pip install -q tensorflow

import os, random, sys, time, math
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

print("TensorFlow version:", tf.__version__)
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))


ModuleNotFoundError: No module named 'numpy'


## 1) Reproducibility & Global Config


In [None]:

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

BATCH_SIZE = 128
MAX_EPOCHS = 100              # upper bound — we will stop early once accuracy >= 0.75
TARGET_TRAIN_ACCURACY = 0.75  # 1 - 0.25



## 2) Load & Prepare CIFAR-10
- CIFAR-10: 50k train / 10k test images (32×32×3), 10 classes.
- Normalize images to [0,1].
- Keep labels as integers and use SparseCategoricalCrossentropy.


In [None]:

(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Flatten label shape from (N,1) -> (N,)
y_train = y_train.squeeze().astype("int32")
y_test = y_test.squeeze().astype("int32")

# Normalize to [0,1]
x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0

print("Train:", x_train.shape, y_train.shape)
print("Test :", x_test.shape,  y_test.shape)



## 3) Input Pipeline (tf.data) + Augmentation
- Shuffle, batch, prefetch for performance.
- Optional Keras preprocessing layers for augmentation.


In [None]:
AUTOTUNE = tf.data.AUTOTUNE

# Validation split (10% of training)
val_fraction = 0.1
val_size = int(len(x_train) * val_fraction)
x_val, y_val = x_train[:val_size], y_train[:val_size]
x_train_sub, y_train_sub = x_train[val_size:], y_train[val_size:]

# Datasets
train_ds = tf.data.Dataset.from_tensor_slices((x_train_sub, y_train_sub)).shuffle(10_000, seed=SEED, reshuffle_each_iteration=True).batch(BATCH_SIZE).prefetch(AUTOTUNE)
val_ds   = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(BATCH_SIZE).prefetch(AUTOTUNE)
test_ds  = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE).prefetch(AUTOTUNE)

# Simple augmentation (disable by setting do_augment=False)
do_augment = True
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1),
], name="augment")

print(train_ds, val_ds, test_ds)


## 4) Model Builder: Four Conv Layers
- Two conv blocks (Conv, Conv, MaxPool, Dropout) ×2 ⇒ **4 Conv layers total**.
- Classifier head: Flatten → Dense(256) → Dropout → Dense(10, softmax).
- Hidden layers use either **ReLU** or **Tanh** as requested.


In [None]:
def build_cnn(activation: str = "relu", input_shape=(32, 32, 3), n_classes: int = 10) -> keras.Model:
    # Build a four-convolutional-layer CNN for CIFAR-10.
    inputs = keras.Input(shape=input_shape, name="images")
    x = inputs
    if do_augment:
        x = data_augmentation(x)

    # Block A (2 conv layers)
    x = layers.Conv2D(32, 3, padding="same", activation=activation)(x)
    x = layers.Conv2D(32, 3, padding="same", activation=activation)(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.25)(x)

    # Block B (2 conv layers)
    x = layers.Conv2D(64, 3, padding="same", activation=activation)(x)
    x = layers.Conv2D(64, 3, padding="same", activation=activation)(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.25)(x)

    # Classifier head
    x = layers.Flatten()(x)
    x = layers.Dense(256, activation=activation)(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(n_classes, activation="softmax")(x)

    model = keras.Model(inputs, outputs, name=f"cnn_4conv_{activation}")
    return model

# Quick check
_ = build_cnn("relu").summary()


## 5) Early Stop on Training Error ≤ 25%
Stop when training accuracy ≥ 0.75 (i.e., error ≤ 25%). We check it at the end of each epoch.


In [None]:
class StopOnTrainingAccuracy(keras.callbacks.Callback):
    def __init__(self, target_accuracy: float = 0.75):
        super().__init__()
        self.target_accuracy = target_accuracy

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        train_acc = logs.get("accuracy") or logs.get("sparse_categorical_accuracy")
        if train_acc is not None and train_acc >= self.target_accuracy:
            print(f"\nReached training accuracy {train_acc:.3f} ≥ {self.target_accuracy:.2f}. Stopping.")
            self.model.stop_training = True


## 6) Compile Helper
Adam optimizer + SparseCategoricalCrossentropy + accuracy metric.


In [None]:

def compile_model(model: keras.Model, lr: float = 1e-3):
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss=keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"],
    )
    return model


## 7) Train Function
Trains up to `MAX_EPOCHS` but stops early once training error ≤ 25%.


In [None]:

def train_model(activation: str):
    model = build_cnn(activation=activation)
    compile_model(model)
    callbacks = [StopOnTrainingAccuracy(target_accuracy=TARGET_TRAIN_ACCURACY)]
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=MAX_EPOCHS,
        callbacks=callbacks,
        verbose=2
    )
    return model, history



## 8) Run Two Experiments: ReLU vs Tanh
Only difference is the hidden-layer activation.


In [None]:

histories = {}

print("\n=== Training with ReLU activation ===")
model_relu, hist_relu = train_model("relu")
histories["relu"] = hist_relu.history

print("\n=== Training with Tanh activation ===")
model_tanh, hist_tanh = train_model("tanh")
histories["tanh"] = hist_tanh.history

epochs_relu = len(histories["relu"]["loss"])
epochs_tanh = len(histories["tanh"]["loss"])
print(f"ReLU stopped after {epochs_relu} epoch(s).")
print(f"Tanh stopped after {epochs_tanh} epoch(s).")



## 9) Plots: Accuracy, Loss, and Training Error
We overlay curves for ReLU and Tanh. The target line shows the 25% training error threshold.


In [None]:

def plot_series(ax, y, label):
    ax.plot(np.arange(1, len(y)+1), y, label=label)
    ax.set_xlabel("Epoch")
    ax.grid(True)

# Accuracy plot
plt.figure(figsize=(8, 5))
plot_series(plt.gca(), histories["relu"]["accuracy"], "ReLU — train acc")
plot_series(plt.gca(), histories["relu"]["val_accuracy"], "ReLU — val acc")
plot_series(plt.gca(), histories["tanh"]["accuracy"], "Tanh — train acc")
plot_series(plt.gca(), histories["tanh"]["val_accuracy"], "Tanh — val acc")
plt.axhline(TARGET_TRAIN_ACCURACY, linestyle="--", linewidth=1, label="Target train acc (0.75)")
plt.title("Accuracy vs Epochs — ReLU vs Tanh")
plt.legend()
plt.show()

# Loss plot
plt.figure(figsize=(8, 5))
plot_series(plt.gca(), histories["relu"]["loss"], "ReLU — train loss")
plot_series(plt.gca(), histories["relu"]["val_loss"], "ReLU — val loss")
plot_series(plt.gca(), histories["tanh"]["loss"], "Tanh — train loss")
plot_series(plt.gca(), histories["tanh"]["val_loss"], "Tanh — val loss")
plt.title("Loss vs Epochs — ReLU vs Tanh")
plt.legend()
plt.xlabel("Epoch")
plt.grid(True)
plt.show()

# Training error plot
plt.figure(figsize=(8, 5))
relu_train_error = 1 - np.array(histories["relu"]["accuracy"])
tanh_train_error = 1 - np.array(histories["tanh"]["accuracy"])
plot_series(plt.gca(), relu_train_error, "ReLU — train error")
plot_series(plt.gca(), tanh_train_error, "Tanh — train error")
plt.axhline(0.25, linestyle="--", linewidth=1, label="Target training error (0.25)")
plt.title("Training Error vs Epochs — ReLU vs Tanh")
plt.legend()
plt.xlabel("Epoch")
plt.grid(True)
plt.show()



## 10) Evaluate on Test Set
Report final test accuracies.


In [None]:

test_accs = {}
loss, acc = model_relu.evaluate(test_ds, verbose=0)
test_accs["relu"] = acc
loss, acc = model_tanh.evaluate(test_ds, verbose=0)
test_accs["tanh"] = acc

print("Test accuracies:", {k: float(v) for k, v in test_accs.items()})



## 11) (Optional) Save Histories for Reporting


In [None]:

np.savez("histories_relu_tanh.npz",
         relu_acc=np.array(histories["relu"]["accuracy"]),
         relu_val_acc=np.array(histories["relu"]["val_accuracy"]),
         relu_loss=np.array(histories["relu"]["loss"]),
         relu_val_loss=np.array(histories["relu"]["val_loss"]),
         tanh_acc=np.array(histories["tanh"]["accuracy"]),
         tanh_val_acc=np.array(histories["tanh"]["val_accuracy"]),
         tanh_loss=np.array(histories["tanh"]["loss"]),
         tanh_val_loss=np.array(histories["tanh"]["val_loss"]))
print("Saved histories to histories_relu_tanh.npz")



## 12) Notes
- If training halts too soon, slightly lower `TARGET_TRAIN_ACCURACY` or reduce dropout/augmentation.
- If you need higher capacity, increase filters or add BatchNorm after Conv2D layers.
- A GPU is highly recommended.
