# Task: Transfer Learning part :) 

In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Set paths to directories
TRAIN_DIR = '/kaggle/input/bttai-ajl-2025/train/train'
TEST_DIR = '/kaggle/input/bttai-ajl-2025/test/test'
TEST_CSV_PATH = '/kaggle/input/bttai-ajl-2025/test.csv'  # test metadata

# Image and training parameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

# Data Augmentation & Generators for Training and Validation

In [2]:
# Use ImageDataGenerator with a validation split (20%)
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    validation_split=0.2,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    zoom_range=0.2
)

# Training generator: loads images from subdirectories (each subdirectory is a class)
train_generator = train_datagen.flow_from_directory(
    directory=TRAIN_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    subset='training',
    class_mode='categorical',
    shuffle=True
)

# Validation generator
val_generator = train_datagen.flow_from_directory(
    directory=TRAIN_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    subset='validation',
    class_mode='categorical',
    shuffle=False
)

Found 2300 images belonging to 21 classes.
Found 560 images belonging to 21 classes.


# Build Model with Transfer Learning using EfficientNetB0

In [3]:
# Load pre-trained EfficientNetB0 (without the top layers)
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
base_model.trainable = False  # Freeze base model for initial training

# Add custom layers on top
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
num_classes = train_generator.num_classes  # Automatically detects the number of classes
predictions = Dense(num_classes, activation='softmax')(x)

# Construct the full model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Callbacks for early stopping and learning rate reduction
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


# Train the Model

In [4]:
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=15,  # Adjust as needed
    callbacks=[early_stop, reduce_lr]
)

Epoch 1/15


  self._warn_if_super_not_called()


[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 2s/step - accuracy: 0.1419 - loss: 2.8550 - val_accuracy: 0.2732 - val_loss: 2.4146 - learning_rate: 0.0010
Epoch 2/15
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 2s/step - accuracy: 0.3122 - loss: 2.2380 - val_accuracy: 0.3214 - val_loss: 2.1965 - learning_rate: 0.0010
Epoch 3/15
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 2s/step - accuracy: 0.3852 - loss: 2.0078 - val_accuracy: 0.3500 - val_loss: 2.1042 - learning_rate: 0.0010
Epoch 4/15
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 2s/step - accuracy: 0.4257 - loss: 1.9025 - val_accuracy: 0.3768 - val_loss: 2.0264 - learning_rate: 0.0010
Epoch 5/15
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 2s/step - accuracy: 0.4492 - loss: 1.7967 - val_accuracy: 0.3857 - val_loss: 1.9623 - learning_rate: 0.0010
Epoch 6/15
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 2s/step -

# F1 Score Calculations

In [5]:
from sklearn.metrics import f1_score

# Calculate F1 score on the validation set
val_steps = val_generator.n // val_generator.batch_size + 1
y_true = val_generator.classes

# Generate predictions on the validation data
val_preds_probs = model.predict(val_generator, steps=val_steps, verbose=1)
y_pred = np.argmax(val_preds_probs, axis=1)

# Compute the weighted F1 score
f1 = f1_score(y_true, y_pred, average='weighted')
print("Weighted F1 Score on Validation Set:", f1)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 2s/step
Weighted F1 Score on Validation Set: 0.4323484110719362


# Generate Predictions on the Test Set and Create Submission File

In [6]:
# Using test.csv to map file names for the test set
if os.path.exists(TEST_CSV_PATH):
    test_df = pd.read_csv(TEST_CSV_PATH)
    # Build full file paths by appending the .jpg extension to each md5hash
    test_df['filepath'] = TEST_DIR + '/' + test_df['md5hash'] + '.jpg'
    
    # Create a test generator from the dataframe
    test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
    test_generator = test_datagen.flow_from_dataframe(
        dataframe=test_df,
        x_col='filepath',
        y_col=None,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        shuffle=False,
        class_mode=None
    )
    
    # Predict probabilities for test images
    preds = model.predict(test_generator, verbose=1)
    predicted_class_indices = np.argmax(preds, axis=1)
    
    # Map indices back to class labels using the training generator mapping
    labels_map = {v: k for k, v in train_generator.class_indices.items()}
    predicted_labels = [labels_map[idx] for idx in predicted_class_indices]
    
    # Add predictions to the dataframe and create submission file
    test_df['label'] = predicted_labels
    submission = test_df[['md5hash', 'label']]
    submission.to_csv('submission.csv', index=False)
    print("Submission file created and saved as submission.csv")
else:
    print("test.csv not found. Skipping test set predictions.")

Found 1227 validated image filenames.


  self._warn_if_super_not_called()


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 2s/step
Submission file created and saved as submission.csv
