In [23]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar100
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import tensorflow.keras.layers as tfl

# 1. Load CIFAR-100 data
(X_train, y_train), (X_test, y_test) = cifar100.load_data(label_mode='fine')

# 2. One-hot encode the labels
enc = OneHotEncoder(sparse_output=False)
y_train = enc.fit_transform(y_train)
y_test = enc.transform(y_test)

# 3. Define preprocessing function (resize + normalize)
def preprocess(image, label):
    image = tf.image.resize(image, [128, 128])
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

# 4. Shuffle training data before splitting
train_indices = np.arange(len(X_train))
np.random.shuffle(train_indices)

X_train = X_train[train_indices]
y_train = y_train[train_indices]

# 5. Compute split sizes
val_size = int(0.1 * len(X_train))
X_val = X_train[:val_size]
y_val = y_train[:val_size]
X_train = X_train[val_size:]
y_train = y_train[val_size:]

# 6. Create tf.data.Dataset pipelines
batch_size = 64

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.shuffle(buffer_size=1000)
train_dataset = train_dataset.batch(batch_size)
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)

val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_dataset = val_dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(batch_size)
val_dataset = val_dataset.prefetch(tf.data.AUTOTUNE)

test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)


In [None]:
def identity_block(X, f, filters):
    X_shortcut = X

    X = tfl.Conv2D(filters=filters[0], kernel_size=1, strides=(1,1), padding='valid')(X)
    X = tfl.BatchNormalization(axis=3)(X, training=True)
    X = tfl.Activation('relu')(X)

    X = tfl.Conv2D(filters=filters[1], kernel_size=f, strides=(1,1), padding='same')(X)
    X = tfl.BatchNormalization(axis=3)(X, training=True)
    X = tfl.Activation('relu')(X)

    X = tfl.Conv2D(filters=filters[2], kernel_size=1, strides=(1,1), padding='valid')(X)
    X = tfl.BatchNormalization(axis=3)(X, training=True)

    X = tfl.Add()([X_shortcut, X])
    X = tfl.Activation('relu')(X)

    return X

def convolutional_block(X, f, filters, s=2):
    X_shortcut = X

    X = tfl.Conv2D(filters=filters[0], kernel_size=1, strides=(s,s), padding='valid')(X)
    X = tfl.BatchNormalization(axis=3)(X, training=True)
    X = tfl.Activation('relu')(X)

    X = tfl.Conv2D(filters=filters[1], kernel_size=f, strides=(1,1), padding='same')(X)
    X = tfl.BatchNormalization(axis=3)(X, training=True)
    X = tfl.Activation('relu')(X)

    X = tfl.Conv2D(filters=filters[2], kernel_size=1, strides=(1,1), padding='valid')(X)
    X = tfl.BatchNormalization(axis=3)(X, training=True)

    X_shortcut = tfl.Conv2D(filters=filters[2], kernel_size=1, strides=(s,s), padding='valid')(X_shortcut)
    X_shortcut = tfl.BatchNormalization(axis=3)(X_shortcut, training=True)

    X = tfl.Add()([X_shortcut, X])
    X = tfl.Activation('relu')(X)

    return X

def resnet(input_shape):
    input_img = tf.keras.Input(shape=input_shape)

    layer = tfl.ZeroPadding2D((3, 3))(input_img)
    layer = tfl.Conv2D(64, (7, 7), strides=(2, 2), padding='valid')(layer)
    layer = tfl.BatchNormalization(axis=3)(layer, training=True)
    layer = tfl.Activation('relu')(layer)
    layer = tfl.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(layer)

    # Stage 1 (3 blocks)
    layer = convolutional_block(layer, 3, [64, 64, 256], s=1)
    layer = identity_block(layer, 3, [64, 64, 256])
    layer = identity_block(layer, 3, [64, 64, 256])

    # Stage 2 (4 blocks)
    layer = convolutional_block(layer, 3, [128, 128, 512], s=2)
    layer = identity_block(layer, 3, [128, 128, 512])
    layer = identity_block(layer, 3, [128, 128, 512])
    layer = identity_block(layer, 3, [128, 128, 512])

    # Stage 3 (6 blocks)
    layer = convolutional_block(layer, 3, [256, 256, 1024], s=2)
    layer = identity_block(layer, 3, [256, 256, 1024])
    layer = identity_block(layer, 3, [256, 256, 1024])
    layer = identity_block(layer, 3, [256, 256, 1024])
    layer = identity_block(layer, 3, [256, 256, 1024])
    layer = identity_block(layer, 3, [256, 256, 1024])

    # Stage 4 (3 blocks)
    layer = convolutional_block(layer, 3, [512, 512, 2048], s=2)
    layer = identity_block(layer, 3, [512, 512, 2048])
    layer = identity_block(layer, 3, [512, 512, 2048])

    # Average pooling and output
    layer = tfl.AveragePooling2D(pool_size=(2, 2), padding='same')(layer)
    layer = tfl.Flatten()(layer)
    outputs = tfl.Dense(100, activation='softmax')(layer)

    model = tf.keras.Model(inputs=input_img, outputs=outputs)
    return model


In [30]:
model = resnet((128, 128, 3))
model.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

model.summary()

In [31]:
model.fit(
    train_dataset,
    epochs=50,
    validation_data=val_dataset,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)],
)

Epoch 1/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 232ms/step - accuracy: 0.0525 - loss: 5.4539 - val_accuracy: 0.1614 - val_loss: 3.4953
Epoch 2/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 203ms/step - accuracy: 0.1822 - loss: 3.4236 - val_accuracy: 0.0230 - val_loss: 50.9870
Epoch 3/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 202ms/step - accuracy: 0.2737 - loss: 2.9195 - val_accuracy: 0.2662 - val_loss: 4.6721
Epoch 4/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 202ms/step - accuracy: 0.3508 - loss: 2.5414 - val_accuracy: 0.2286 - val_loss: 4.1512
Epoch 5/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 202ms/step - accuracy: 0.4309 - loss: 2.1582 - val_accuracy: 0.3276 - val_loss: 6.9304
Epoch 6/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 202ms/step - accuracy: 0.4698 - loss: 1.9914 - val_accuracy: 0.3902 - val_loss: 2.4980
Epo

<keras.src.callbacks.history.History at 0x7add00eb6610>

In [32]:
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.4f}")

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 63ms/step - accuracy: 0.3876 - loss: 2.4841
Test accuracy: 0.3884
