# Component 08: DenseNet121 (Advanced Training)✅ AdamW + Weight Decay | ✅ Cosine Annealing + Warmup | ✅ Label Smoothing | ✅ Mixed Precision

In [None]:
import tensorflow as tfimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport json, os, mathfrom sklearn.utils.class_weight import compute_class_weightSEED = 42tf.random.set_seed(SEED)np.random.seed(SEED)tf.keras.mixed_precision.set_global_policy('mixed_float16')OUTPUT_DIR = '../outputs'os.makedirs(f'{OUTPUT_DIR}/models', exist_ok=True)os.makedirs(f'{OUTPUT_DIR}/training_history', exist_ok=True)print('✅ Mixed precision enabled')

## Config & Data

In [None]:
train_df = pd.read_csv('../outputs/train_manifest.csv')val_df = pd.read_csv('../outputs/val_manifest.csv')IMG_SIZE, BATCH_SIZE, EPOCHS = (224, 224), 32, 50NUM_CLASSES = len(train_df['class_label'].unique())INITIAL_LR, WEIGHT_DECAY, WARMUP_EPOCHS = 1e-3, 1e-4, 5LABEL_SMOOTHING, GRADIENT_CLIP_NORM = 0.1, 1.0print(f'Train: {len(train_df)}, Val: {len(val_df)}, Classes: {NUM_CLASSES}')print(f'Hyperparams: LR={INITIAL_LR}, WD={WEIGHT_DECAY}, Warmup={WARMUP_EPOCHS}, LS={LABEL_SMOOTHING}')

## Preprocessing

In [None]:
def preprocess(fp, label):    img = tf.io.read_file(fp)    img = tf.image.decode_jpeg(img, channels=3)    img = tf.image.resize(img, IMG_SIZE)    img = tf.keras.applications.densenet.preprocess_input(img)    return img, labelaug = tf.keras.Sequential([tf.keras.layers.RandomFlip('horizontal'), tf.keras.layers.RandomRotation(0.15)])def build_dataset(df, augment=False, shuffle=True):    ds = tf.data.Dataset.from_tensor_slices((df['filepath'].values, df['class_label'].values))    ds = ds.map(preprocess, tf.data.AUTOTUNE).cache()    if augment:        ds = ds.map(lambda x, y: (aug(x, training=True), y))    if shuffle:        ds = ds.shuffle(1000, seed=SEED)    return ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)train_ds, val_ds = build_dataset(train_df, augment=True), build_dataset(val_df, augment=False, shuffle=False)print('✅ Datasets ready')

## Build DenseNet121

In [None]:
base = tf.keras.applications.DenseNet121(include_top=False, pooling='avg', weights='imagenet', input_shape=(*IMG_SIZE, 3))for layer in base.layers[:-30]:    layer.trainable = Falseinputs = tf.keras.Input(shape=(*IMG_SIZE, 3))x = base(inputs, training=True)x = tf.keras.layers.Dropout(0.3)(x)x = tf.keras.layers.Dense(256, activation='relu')(x)x = tf.keras.layers.Dropout(0.4)(x)x = tf.keras.layers.Dense(128, activation='relu')(x)x = tf.keras.layers.Dropout(0.5)(x)outputs = tf.keras.layers.Dense(NUM_CLASSES, activation='softmax', dtype='float32')(x)model = tf.keras.Model(inputs, outputs)model.summary()

## Compile

In [None]:
def get_lr_schedule(epoch, lr):    if epoch < WARMUP_EPOCHS:        return INITIAL_LR * (epoch + 1) / WARMUP_EPOCHS    progress = (epoch - WARMUP_EPOCHS) / (EPOCHS - WARMUP_EPOCHS)    return INITIAL_LR * 0.5 * (1 + math.cos(math.pi * progress))

In [None]:
# Custom Label Smoothing Loss (Compatible with all TF versions)class LabelSmoothingLoss(tf.keras.losses.Loss):    def __init__(self, num_classes, smoothing=0.1):        super().__init__()        self.num_classes = num_classes        self.smoothing = smoothing            def call(self, y_true, y_pred):        y_true = tf.cast(y_true, tf.int32)        y_true_one_hot = tf.one_hot(y_true, self.num_classes)        y_true_smooth = y_true_one_hot * (1 - self.smoothing) + self.smoothing / self.num_classes        return tf.keras.losses.categorical_crossentropy(y_true_smooth, y_pred)loss_fn = LabelSmoothingLoss(NUM_CLASSES, smoothing=LABEL_SMOOTHING)print(f'✅ Label Smoothing Loss created (ε={LABEL_SMOOTHING})')

In [None]:
class_weights = compute_class_weight('balanced', classes=np.unique(train_df['class_label']), y=train_df['class_label'])class_weight_dict = {i: w for i, w in enumerate(class_weights)}optimizer = tf.keras.optimizers.AdamW(learning_rate=INITIAL_LR, weight_decay=WEIGHT_DECAY, clipnorm=GRADIENT_CLIP_NORM)model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])print('✅ Compiled with AdamW + Custom Label Smoothing')

## Train

In [None]:
callbacks = [    tf.keras.callbacks.ModelCheckpoint(f'{OUTPUT_DIR}/models/densenet121_best.h5', monitor='val_accuracy', save_best_only=True),    tf.keras.callbacks.LearningRateScheduler(get_lr_schedule, verbose=1),    tf.keras.callbacks.CSVLogger(f'{OUTPUT_DIR}/training_history/densenet121_training.csv')]history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS, callbacks=callbacks, class_weight=class_weight_dict)with open(f'{OUTPUT_DIR}/training_history/densenet121_history.json', 'w') as f:    json.dump({k: [float(v) for v in vals] for k, vals in history.history.items()}, f)print(f'✅ Best Val Acc: {max(history.history["val_accuracy"]):.4f}')