In [1]:
# -------------------- 1. Imports --------------------
import os, random, numpy as np, tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.metrics import precision_recall_fscore_support
import matplotlib.pyplot as plt

In [2]:
# -------------------- 2. Reproducibility --------------------
SEED = 42
random.seed(SEED); np.random.seed(SEED); tf.random.set_seed(SEED)

In [3]:
# -------------------- 3. Load CIFAR-10 --------------------
(num_classes, input_shape) = (10, (32, 32, 3))
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
y_train = y_train.flatten()
y_test  = y_test.flatten()

In [4]:
# Train/val split
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train, test_size=0.1, random_state=SEED, stratify=y_train
)

In [5]:
# Normalize to [0,1]
x_train = x_train.astype("float32") / 255.0
x_val   = x_val.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0

In [6]:
# -------------------- 4. Define CNN --------------------
def make_scratch_cnn(input_shape=(32,32,3), num_classes=10):
    inputs = keras.Input(shape=input_shape)
    x = inputs

    # Block 1
    x = layers.Conv2D(32, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.Conv2D(32, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.25)(x)

    # Block 2
    x = layers.Conv2D(64, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.Conv2D(64, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.25)(x)

    # Block 3
    x = layers.Conv2D(128, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.Conv2D(128, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.25)(x)

    x = layers.Flatten()(x)
    x = layers.Dense(256)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.Dropout(0.5)(x)

    outputs = layers.Dense(num_classes, activation="softmax")(x)
    return keras.Model(inputs, outputs, name="scratch_cnn")


In [7]:
model_scratch = make_scratch_cnn(input_shape, num_classes)
model_scratch.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)
model_scratch.summary()

In [8]:
# -------------------- 5. Train --------------------
early = keras.callbacks.EarlyStopping(
    patience=5, restore_best_weights=True, monitor="val_accuracy"
)

history = model_scratch.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=30,
    batch_size=128,
    callbacks=[early],
    verbose=2
)

Epoch 1/30
352/352 - 90s - 257ms/step - accuracy: 0.4469 - loss: 1.5555 - val_accuracy: 0.3042 - val_loss: 2.0565
Epoch 2/30
352/352 - 64s - 181ms/step - accuracy: 0.6272 - loss: 1.0462 - val_accuracy: 0.5644 - val_loss: 1.3044
Epoch 3/30
352/352 - 128s - 363ms/step - accuracy: 0.6917 - loss: 0.8724 - val_accuracy: 0.6780 - val_loss: 0.8986
Epoch 4/30
352/352 - 133s - 378ms/step - accuracy: 0.7251 - loss: 0.7759 - val_accuracy: 0.6360 - val_loss: 1.0840
Epoch 5/30
352/352 - 137s - 389ms/step - accuracy: 0.7493 - loss: 0.7084 - val_accuracy: 0.7682 - val_loss: 0.6560
Epoch 6/30
352/352 - 134s - 381ms/step - accuracy: 0.7716 - loss: 0.6559 - val_accuracy: 0.6882 - val_loss: 0.9182
Epoch 7/30
352/352 - 172s - 488ms/step - accuracy: 0.7886 - loss: 0.6101 - val_accuracy: 0.7456 - val_loss: 0.7218
Epoch 8/30
352/352 - 195s - 554ms/step - accuracy: 0.7998 - loss: 0.5717 - val_accuracy: 0.8008 - val_loss: 0.5648
Epoch 9/30
352/352 - 123s - 348ms/step - accuracy: 0.8122 - loss: 0.5353 - val_acc

In [9]:
# -------------------- 6. Evaluate + Metrics --------------------
test_probs = model_scratch.predict(x_test, batch_size=256, verbose=0)
y_pred = test_probs.argmax(axis=1)

acc = (y_pred == y_test).mean()
print(f"[Scratch CNN] Test Accuracy: {acc:.4f}")

print("\nClassification report (per-class precision/recall/F1):")
print(classification_report(y_test, y_pred, digits=4))

cm = confusion_matrix(y_test, y_pred)
print("Confusion matrix:\n", cm)

[Scratch CNN] Test Accuracy: 0.8259

Classification report (per-class precision/recall/F1):
              precision    recall  f1-score   support

           0     0.7840    0.8820    0.8301      1000
           1     0.8382    0.9380    0.8853      1000
           2     0.8210    0.6880    0.7486      1000
           3     0.6980    0.6840    0.6909      1000
           4     0.8459    0.7850    0.8143      1000
           5     0.7515    0.7710    0.7611      1000
           6     0.8910    0.8500    0.8700      1000
           7     0.8646    0.8810    0.8727      1000
           8     0.9193    0.8660    0.8919      1000
           9     0.8550    0.9140    0.8835      1000

    accuracy                         0.8259     10000
   macro avg     0.8268    0.8259    0.8249     10000
weighted avg     0.8268    0.8259    0.8249     10000

Confusion matrix:
 [[882  27  13   9   2   2   1   4  23  37]
 [  7 938   0   0   1   1   1   0   1  51]
 [ 83  12 688  44  55  40  40  21   6  11]
 

In [10]:
# Macro/micro precision, recall, f1
prec, rec, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='macro')
print(f"Macro Precision: {prec:.4f} | Macro Recall: {rec:.4f} | Macro F1: {f1:.4f}")
prec, rec, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='micro')
print(f"Micro Precision: {prec:.4f} | Micro Recall: {rec:.4f} | Micro F1: {f1:.4f}")


Macro Precision: 0.8268 | Macro Recall: 0.8259 | Macro F1: 0.8249
Micro Precision: 0.8259 | Micro Recall: 0.8259 | Micro F1: 0.8259


In [11]:
# ROC-AUC (one-vs-rest, needs probabilities)
y_test_oh = keras.utils.to_categorical(y_test, num_classes)
try:
    auc_ovr = roc_auc_score(y_test_oh, test_probs, average="macro", multi_class="ovr")
    auc_ovo = roc_auc_score(y_test_oh, test_probs, average="macro", multi_class="ovo")
    print(f"ROC-AUC (macro, OVR): {auc_ovr:.4f} | ROC-AUC (macro, OVO): {auc_ovo:.4f}")
except Exception as e:
    print("ROC-AUC could not be computed:", e)

ROC-AUC (macro, OVR): 0.9843 | ROC-AUC (macro, OVO): 0.9843
