In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar100
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

def get_cifar100_datasets(image_size=(96, 96), batch_size=64, val_split=0.1):
    # Load CIFAR-100 data
    (X_train_full, y_train_full), (X_test, y_test) = cifar100.load_data(label_mode='fine')

    # One-hot encode the labels
    enc = OneHotEncoder(sparse_output=False)
    y_train_full = enc.fit_transform(y_train_full)
    y_test = enc.transform(y_test)

    # Split training into train + val
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_full, y_train_full,
        test_size=val_split,
        random_state=42,
        stratify=y_train_full
    )

    # Preprocessing function
    def preprocess(image, label):
        image = tf.image.resize(image, image_size)
        image = tf.cast(image, tf.float32) / 255.0
        return image, label

    def build_dataset(X, y, shuffle=False):
        ds = tf.data.Dataset.from_tensor_slices((X, y))
        ds = ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
        if shuffle:
            ds = ds.shuffle(buffer_size=1000)
        ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
        return ds

    train_dataset = build_dataset(X_train, y_train, shuffle=True)
    val_dataset = build_dataset(X_val, y_val)
    test_dataset = build_dataset(X_test, y_test)

    return train_dataset, val_dataset, test_dataset


In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models

# from preprocess import get_cifar100_datasets

train_ds, val_ds, test_ds = get_cifar100_datasets(image_size=(128, 128), batch_size=32)

In [3]:
def conv_block(x, growth_rate):
    x1 = layers.BatchNormalization()(x)
    x1 = layers.Activation('relu')(x1)
    x1 = layers.Conv2D(4 * growth_rate, (1, 1), use_bias=False)(x1)

    x1 = layers.BatchNormalization()(x1)
    x1 = layers.Activation('relu')(x1)
    x1 = layers.Conv2D(growth_rate, (3, 3), padding='same', use_bias=False)(x1)

    x = layers.Concatenate()([x, x1])
    return x

def dense_block(x, layers_count, growth_rate):
    for _ in range(layers_count):
        x = conv_block(x, growth_rate)
    return x

def transition_layer(x, compression=0.5):
    reduced_filters = int(tf.keras.backend.int_shape(x)[-1] * compression)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(reduced_filters, (1, 1), use_bias=False)(x)
    x = layers.AveragePooling2D((2, 2), strides=2)(x)
    return x

def build_densenet121(input_shape=(128, 128, 3), num_classes=100):
    growth_rate = 32
    block_layers = [6, 12, 24, 16]

    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(64, (7, 7), strides=2, padding='same', use_bias=False)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D((3, 3), strides=2, padding='same')(x)

    for i, num_layers in enumerate(block_layers):
        x = dense_block(x, num_layers, growth_rate)
        if i != len(block_layers) - 1:
            x = transition_layer(x)

    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs, name="DenseNet121_Custom")
    return model


In [4]:
model = build_densenet121()

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


Model: "DenseNet121_Custom"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 64, 64, 64)   9408        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 64, 64, 64)   256         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 64, 64, 64)   0           batch_normalization[0][0]        
_________________________________________________________________________________

In [5]:
model.fit(
    train_ds,
    epochs=4,
    validation_data=val_ds,
    # callbacks=[tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)],
    # verbose=1
)

Epoch 1/4


InvalidArgumentError:  logits and labels must have the same first dimension, got logits shape [32,100] and labels shape [3200]
	 [[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits (defined at C:\Users\chapp\AppData\Local\Temp\ipykernel_2168\2905861612.py:1) ]] [Op:__inference_train_function_22456]

Function call stack:
train_function


In [None]:
test_loss, test_accuracy = model.evaluate(test_ds, verbose=1)
print(f'Test accuracy: {test_accuracy:.4f}')