In [1]:
# Imports necessary libraries and modules
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, GlobalAveragePooling2D
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import accuracy_score, precision_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Load train and validation data from CSV files
train_data = pd.read_csv('/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-train.csv')
val_data = pd.read_csv('/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-validation.csv')
test_data = pd.read_csv('/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-test.csv')

# Data generators for preprocessing and augmentation
train_datagen = ImageDataGenerator()
val_datagen = ImageDataGenerator() 
test_datagen = ImageDataGenerator()

# Sets paths for training and validation data
train_image = '/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-train/BTTAIxNYBG-train'
test_image = '/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-test/BTTAIxNYBG-test'
val_image = '/kaggle/input/bttai-nybg-2024/BTTAIxNYBG-validation/BTTAIxNYBG-validation'

# Generates batches of tensor image data for training
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory=train_image,
    x_col='imageFile',
    y_col='classLabel',
    target_size=(224, 224),
    batch_size=32,
    color_mode='rgb',
    class_mode='categorical')

# Generates batches of tensor image data for validation
val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_data,
    directory=val_image,
    x_col='imageFile',
    y_col='classLabel',
    target_size=(224, 224),
    batch_size=32,
    color_mode='rgb',
    class_mode='categorical')

# Generates batches of tensor image data for validation
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_data,
    directory=test_image,
    x_col='imageFile',
    y_col=None, # No label
    class_mode=None, # No label
    target_size=(224, 224),
    batch_size=32,
    color_mode='rgb',
    shuffle=False)

# Dynamically determine the number of classes based on the training data
num_classes = len(train_generator.class_indices)

# Define a learning rate scheduler
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',   # Monitor the validation loss
    factor=0.1,           # Factor by which the learning rate will be reduced (new_lr = lr * factor)
    patience=3,           # Number of epochs with no improvement after which learning rate will be reduced
    min_lr=1e-6,          # Minimum learning rate
    verbose=1             # Print a message when learning rate is reduced
)

# Load the pre-trained ResNet50 model
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)

# Freeze the pre-trained layers
base_model.trainable = False

# Unfreeze the last few layers for fine-tuning
for layer in base_model.layers[-10:]:
    layer.trainable = True

# Add new layers on top of the pre-trained model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
x = Dropout(0.5)(x)
outputs = Dense(num_classes, activation='softmax')(x)  # Dynamically set the number of output classes

# Create the new model
model = Model(inputs=base_model.input, outputs=outputs)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    epochs=3,  
    validation_data=val_generator,
    callbacks=[lr_scheduler]
)

model.save('/kaggle/working/test_model.keras')

# Predict probabilities for validation set
y_pred_prob = model.predict(test_generator)
y_pred = np.argmax(y_pred_prob, axis=1)

print("Length of y_pred: " + str(len(y_pred)))

submission_df = test_data
submission_df['classID'] = y_pred
submission_df.drop(columns="imageFile", inplace=True)

submission_df.to_csv('submission.csv', index=False)

2024-04-04 16:34:04.621310: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-04 16:34:04.621445: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-04 16:34:04.764461: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Found 81946 validated image filenames belonging to 10 classes.
Found 10244 validated image filenames belonging to 10 classes.
Found 30690 validated image filenames.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
Epoch 1/3


  self._warn_if_super_not_called()


[1m2561/2561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8274s[0m 3s/step - accuracy: 0.8812 - loss: 1.0524 - val_accuracy: 0.9504 - val_loss: 0.2478 - learning_rate: 0.0010
Epoch 2/3
[1m2561/2561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8256s[0m 3s/step - accuracy: 0.9475 - loss: 0.2640 - val_accuracy: 0.9561 - val_loss: 0.2208 - learning_rate: 0.0010
Epoch 3/3
[1m2561/2561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8181s[0m 3s/step - accuracy: 0.9594 - loss: 0.2126 - val_accuracy: 0.9614 - val_loss: 0.2049 - learning_rate: 0.0010
[1m960/960[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2451s[0m 3s/step
Length of y_pred: 30690
