In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar100
from sklearn.preprocessing import OneHotEncoder

# 1. Load CIFAR-100 data
(X_train, y_train), (X_test, y_test) = cifar100.load_data(label_mode='fine')

In [2]:
# 2. One-hot encode the labels
enc = OneHotEncoder(sparse_output=False)
y_train = enc.fit_transform(y_train)
y_test = enc.transform(y_test)

In [3]:
# 3. Define preprocessing function (resize + normalize)
def preprocess(image, label):
    image = tf.image.resize(image, [128, 128])  
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

In [4]:
# 4. Create tf.data.Dataset pipelines
batch_size = 64

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.shuffle(buffer_size=1000)
train_dataset = train_dataset.batch(batch_size)
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)

test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)


In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar100
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# 1. Load CIFAR-100 data
(X_train, y_train), (X_test, y_test) = cifar100.load_data(label_mode='fine')

# 2. One-hot encode the labels
enc = OneHotEncoder(sparse_output=False)
y_train = enc.fit_transform(y_train)
y_test = enc.transform(y_test)

# 3. Define preprocessing function (resize + normalize)
def preprocess(image, label):
    image = tf.image.resize(image, [128, 128])
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

# 4. Shuffle training data before splitting
train_indices = np.arange(len(X_train))
np.random.shuffle(train_indices)

X_train = X_train[train_indices]
y_train = y_train[train_indices]

# 5. Compute split sizes
val_size = int(0.1 * len(X_train))
X_val = X_train[:val_size]
y_val = y_train[:val_size]
X_train = X_train[val_size:]
y_train = y_train[val_size:]

# 6. Create tf.data.Dataset pipelines
batch_size = 64

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.shuffle(buffer_size=1000)
train_dataset = train_dataset.batch(batch_size)
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)

val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_dataset = val_dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(batch_size)
val_dataset = val_dataset.prefetch(tf.data.AUTOTUNE)

test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)


In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers

def conv_bn_act(x, filters, kernel_size, strides=1):
    x = layers.Conv2D(filters, kernel_size, strides=strides, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('swish')(x)  # or ReLU
    return x

def depthwise_separable_block(x, expand_filters, output_filters, strides=1):
    input_tensor = x

    # Expansion
    x = layers.Conv2D(expand_filters, 1, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('swish')(x)

    # Depthwise conv
    x = layers.DepthwiseConv2D(3, strides=strides, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('swish')(x)

    # Squeeze-and-Excitation (simplified)
    se = layers.GlobalAveragePooling2D()(x)
    se = layers.Reshape((1, 1, expand_filters))(se)
    se = layers.Dense(expand_filters // 4, activation='swish')(se)
    se = layers.Dense(expand_filters, activation='sigmoid')(se)
    x = layers.Multiply()([x, se])

    # Projection
    x = layers.Conv2D(output_filters, 1, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)

    # Skip connection
    if strides == 1 and input_tensor.shape[-1] == output_filters:
        x = layers.Add()([x, input_tensor])

    return x

def efficientnet_b0(input_shape=(128, 128, 3), num_classes=100):
    inputs = tf.keras.Input(shape=input_shape)
    x = conv_bn_act(inputs, 32, 3, strides=2)  # Stem

    # MBConv blocks
    x = depthwise_separable_block(x, 32, 16, strides=1)
    x = depthwise_separable_block(x, 96, 24, strides=2)
    x = depthwise_separable_block(x, 144, 24, strides=1)
    x = depthwise_separable_block(x, 144, 40, strides=2)
    x = depthwise_separable_block(x, 240, 40, strides=1)
    x = depthwise_separable_block(x, 240, 80, strides=2)
    x = depthwise_separable_block(x, 480, 80, strides=1)
    x = depthwise_separable_block(x, 480, 112, strides=1)
    x = depthwise_separable_block(x, 672, 112, strides=1)
    x = depthwise_separable_block(x, 672, 192, strides=2)
    x = depthwise_separable_block(x, 1152, 192, strides=1)
    x = depthwise_separable_block(x, 1152, 320, strides=1)

    # Head
    x = conv_bn_act(x, 1280, 1)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    return model


In [6]:
model = efficientnet_b0()

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 64, 64, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 64, 64, 32)   128         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 64, 64, 32)   0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [None]:
# # Then train with your resized CIFAR-100 data:
# model.fit(
#     X_train, y_train,
#     epochs=4,
#     batch_size=64,
#     validation_split=0.1,
#     callbacks=[tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)],
#     verbose=1
# )

# Train using the tf.data pipeline
model.fit(
    train_dataset,
    epochs=4,
    validation_data=val_dataset,
    # callbacks=[tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)],
    # verbose=1
)

Epoch 1/4

KeyboardInterrupt: 

In [None]:
test_loss, test_accuracy = model.evaluate(test_dataset, verbose=1)
print(f'Test accuracy: {test_accuracy:.4f}')

Test accuracy: 0.4508
