In [1]:
import tensorflow as tf
import numpy as np
import keras
from keras import applications, utils, layers ,callbacks 


gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Set memory growth for each GPU to true
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)



def mixup_data(x, y, alpha):
    """
    Returns mixed inputs and mixed targets.
    
    Args:
        x: Batch of input images, shape (batch_size, height, width, channels)
        y: Batch of one-hot encoded labels, shape (batch_size, num_classes)
        alpha: Hyperparameter for the Beta distribution (default 0.2)
    
    Returns:
        x_mix: Mixed inputs
        y_mix: Mixed labels
        lam: The mixing coefficient
    """
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1.0

    batch_size = tf.shape(x)[0]
    # Shuffle the batch indices
    indices = tf.random.shuffle(tf.range(batch_size))
    x_shuffled = tf.gather(x, indices)
    y_shuffled = tf.gather(y, indices)
    
    # Create mixed inputs and targets
    x_mix = lam * x + (1 - lam) * x_shuffled
    y_mix = lam * y + (1 - lam) * y_shuffled
    return x_mix, y_mix, lam


def mixup_generator(x, y, batch_size, alpha):
    """
    A generator that yields mixup-augmented batches.
    
    Args:
        x: Training images as a NumPy array.
        y: One-hot encoded labels as a NumPy array.
        batch_size: Batch size.
        alpha: Mixup hyperparameter.
    
    Yields:
        A tuple (x_mix, y_mix) for training.
    """
    num_samples = x.shape[0]
    indices = np.arange(num_samples)
    while True:
        np.random.shuffle(indices)
        for i in range(0, num_samples, batch_size):
            batch_indices = indices[i:i+batch_size]
            x_batch = x[batch_indices]
            y_batch = y[batch_indices]
            x_mix, y_mix, _ = mixup_data(x_batch, y_batch, alpha)
            yield x_mix, y_mix

# Load CIFAR-100 data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data(label_mode="fine")

# For ResNet preprocessing, images are expected in range [0, 255]
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Apply ResNet-specific preprocessing (this function converts RGB to BGR and zero-centers using ImageNet means)
x_train = applications.resnet.preprocess_input(x_train)
x_test = applications.resnet.preprocess_input(x_test)

# Convert labels to one-hot encoding (100 classes)
y_train = utils.to_categorical(y_train, 100)
y_test = utils.to_categorical(y_test, 100)

2025-03-29 06:42:30.546042: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-29 06:42:30.554642: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743230550.564345  106125 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743230550.567049  106125 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-29 06:42:30.578459: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

1 Physical GPUs, 1 Logical GPUs


In [2]:
# Define parameters
batch_size = 32
alpha = 0.5 #augmenation intensity
steps_per_epoch = x_train.shape[0] // batch_size

# Create the mixup generator
train_gen = mixup_generator(x_train, y_train, batch_size, alpha)

In [3]:
# Build a simple ResNet50 model (using weights=None allows custom input shape, here 32x32 for CIFAR-100)
base_model = applications.ResNet50(
    input_shape=(224,224, 3), weights="imagenet", include_top= False #pooling doesn't work
)

base_model.trainable= False  
#for layer in base_model.layers[-7:]:
    #layer.trainable = True



def model_builder(hp):
    model = keras.Sequential()
    model.add(keras.Input(shape=[32,32,3], batch_size= batch_size))
    model.add(layers.Resizing(224,224,interpolation='bilinear'))
    model.add(base_model)
    model.add(layers.GlobalAveragePooling2D())

    hp_activation = hp.Choice('activation', values = ['relu', 'tanh'])
    hp_layer_1 = hp.Int('Dense_1', min_value = 100, max_value = 2048, step = 100)
    hp_layer_2 = hp.Int('Dense_2', min_value = 100, max_value = 2048, step = 100)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2,1e-3,1e-4,1e-5,1e-6,1e-7])

    
    model.add(layers.Dense(units=hp_layer_1, activation=hp_activation))
    model.add(layers.Dense(units=hp_layer_2, activation=hp_activation))
    model.add(layers.Dense(100, activation='softmax', name='output'))

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss=keras.losses.CategoricalCrossentropy(),
                  metrics=['accuracy'])
    
    return model

In [None]:
import keras_tuner as kt

tuner = kt.Hyperband(model_builder, 
                     objective='val_accuracy',
                     max_epochs = 50)

: 

In [None]:
reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_accuracy', 
    factor=0.7,
    patience=3,
    verbose=1,
    mode='max',
    min_lr=0.00000001
)

early_stopping =callbacks.EarlyStopping(
    monitor="val_loss",
    patience=5,
    verbose=1,
    restore_best_weights=True,
    mode='min'
)

tuner.search(x_train,y_train, epochs = 50, validation_split=0.2, callbacks=[early_stopping,reduce_lr])


Trial 72 Complete [00h 13m 43s]
val_accuracy: 0.6614000201225281

Best val_accuracy So Far: 0.7475000023841858
Total elapsed time: 06h 48m 15s

Search: Running Trial #73

Value             |Best Value So Far |Hyperparameter
tanh              |tanh              |activation
1900              |1900              |Dense_1
400               |1600              |Dense_2
1e-05             |1e-05             |learning_rate
50                |50                |tuner/epochs
17                |17                |tuner/initial_epoch
2                 |3                 |tuner/bracket
2                 |3                 |tuner/round
0068              |0047              |tuner/trial_id



In [None]:
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

In [None]:
model = tuner.hypermodel.build(best_hps)
# Train the model using mixup-augmented data
history=model.fit(x_train,y_train, #train_gen
          steps_per_epoch=steps_per_epoch,
          epochs=50,
          validation_split=0.2,
          callbacks=[reduce_lr,early_stopping]
          )

In [None]:
import matplotlib.pyplot as plt
plt.style.use('dark_background')

plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Training and Validation Accuracy')

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')

plt.show()

In [None]:
import numpy as np

y_eval=model.evaluate(x_test,y_test)


# Get model predictions
y_pred_probs = model.predict(x_test)  # Get probability predictions
y_pred = np.argmax(y_pred_probs, axis=1)  # Convert to class labels

print(y_eval)
print(y_pred)


In [None]:
import numpy as np
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score

# Assume y_true and y_pred are defined. They might be one-hot encoded arrays.
# Convert y_true to class labels if it is one-hot encoded.
if y_test.ndim > 1:
    y_true_class = np.argmax(y_test, axis=1)
else:
    y_true_class = y_test

# Convert y_pred to class labels if it is one-hot encoded.
if y_pred.ndim > 1:
    y_pred_class = np.argmax(y_pred, axis=1)
else:
    y_pred_class = y_pred

# Now compute the metrics with both arrays in the same format.
precision_val = precision_score(y_true_class, y_pred_class, average='macro')
recall_val = recall_score(y_true_class, y_pred_class, average='macro')
f1_val = f1_score(y_true_class, y_pred_class, average='macro')

print(f"Precision: {precision_val}")
print(f"Recall: {recall_val}")
print(f"F1 Score: {f1_val}")

# And print the full classification report.
print("\nClassification Report:")
print(classification_report(y_true_class, y_pred_class))
