In [None]:
import tensorflow as tf
import numpy as np
import random
import os

# 1. Set the Python built-in random seed
random.seed(123)

# 2. Set the NumPy random seed
np.random.seed(123)

# 3. Set the TensorFlow random seed
tf.random.set_seed(123)

# 4. For some operations that are hardware-dependent and can introduce randomness
os.environ['PYTHONHASHSEED'] = str(123)
os.environ['TF_DETERMINISTIC_OPS'] = '1'

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.metrics import Precision, Recall

# Parameters
batch_size = 32
img_height = 96  # Resize the image
img_width = 96   # Resize the image

# Load the custom training dataset
train_ds = image_dataset_from_directory(
    '/kaggle/input/cifar10/cifar10/train',
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size,
    label_mode='categorical'  # Ensure labels are one-hot encoded
)

# Load the test dataset to use as a validation set to ensure consistency across all experiments
val_ds = image_dataset_from_directory(
    '/kaggle/input/cifar10/cifar10/test',
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size,
    label_mode='categorical'  # Ensure labels are one-hot encoded
)


# Define the data augmentation pipeline
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.RandomRotation(0.3),
    tf.keras.layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
])

# Function to apply data augmentation n times
def augment_n_times(dataset, n):
    augmented_datasets = [dataset]  # Start with the original dataset
    for _ in range(n):
        # Apply augmentation and add the augmented dataset to the list
        augmented_datasets.append(dataset.map(lambda x, y: (data_augmentation(x, training=True), y)))
    return augmented_datasets

# Set i to 0 so we do not augment the dataset if we are using the full training set.
i = 0

# Get the original + augmented datasets
augmented_datasets = augment_n_times(train_ds, i)

# Combine all datasets in the list into one
combined_dataset = augmented_datasets[0]
for augmented_dataset in augmented_datasets[1:]:
    combined_dataset = combined_dataset.concatenate(augmented_dataset)

def normalize_img(image, label):
    """Normalizes images: `uint8` -> `float32`."""
    return tf.cast(image, tf.float32) / 255., label

# Apply the normalization to the training and validation datasets
combined_train_ds = combined_dataset.map(normalize_img)
val_ds = val_ds.map(normalize_img)

# Configure the dataset for performance
AUTOTUNE = tf.data.experimental.AUTOTUNE
combined_train_ds = combined_train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
# Check how big the dataset is
print(f"Number of batches in the original pre-augmentation training dataset: {len(train_ds)}")
print(f"Number of batches in the combined training dataset: {len(combined_train_ds)}")
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

# Define the CNN model using MobileNetV2
def create_model():
    base_model = MobileNetV2(input_shape=(img_height, img_width, 3),
                             include_top=False,  # Do not include the top (final FC) layer
                             weights=None)  # Do not use pre-trained weights
    base_model.trainable = True  # Train from scratch

    inputs = tf.keras.Input(shape=(img_height, img_width, 3))
    x = base_model(inputs, training=True)
    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(10, activation='softmax')(x)  # CIFAR-10 has 10 classes
    model = models.Model(inputs, outputs)

    model.compile(optimizer='adam',  
                  loss='categorical_crossentropy',
                  metrics=['accuracy', Precision(), Recall()])
    return model

if __name__ == '__main__':
    model = create_model()
    history = model.fit(combined_train_ds, epochs=100, validation_data=val_ds)  

    with open('/kaggle/working/training_metrics.txt', 'w') as file:
        file.write('Epoch,Train Loss,Validation Loss,Train Accuracy,Validation Accuracy,Precision,Recall\n')
        for i in range(len(history.history['loss'])):
            # Ensure precision and recall are formatted to string with desired precision, e.g., "{:.4f}".format(...)
            train_loss = history.history['loss'][i]
            val_loss = history.history['val_loss'][i]
            train_accuracy = history.history['accuracy'][i]
            val_accuracy = history.history['val_accuracy'][i]
            precision = history.history['precision'][i]
            recall = history.history['recall'][i]

            file.write(f"{i+1},{train_loss},{val_loss},{train_accuracy},{val_accuracy},{precision},{recall}\n")