In [None]:
! kaggle competitions download -c bttai-nybg-2024

Downloading bttai-nybg-2024.zip to /content
... resuming from 221249536 bytes (29485726023 bytes left) ...
 84% 23.3G/27.7G [19:55<03:33, 22.2MB/s]

In [None]:
! unzip "bttai-nybg-2024" -d "bttai-nybg-2024"

In [2]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import InceptionV3, DenseNet121
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Concatenate, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

In [3]:
filename_train = "./bttai-nybg-2024/BTTAIxNYBG-train.csv"
df_train = pd.read_csv(filename_train)
filename_test = "./bttai-nybg-2024/BTTAIxNYBG-test.csv"
df_test = pd.read_csv(filename_test)
filename_val = "./bttai-nybg-2024/BTTAIxNYBG-validation.csv"
df_val = pd.read_csv(filename_val)

In [7]:
train_image_directory = "./bttai-nybg-2024/BTTAIxNYBG-train/BTTAIxNYBG-train"
validation_image_directory = "./bttai-nybg-2024/BTTAIxNYBG-validation/BTTAIxNYBG-validation"


In [4]:
datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess_input)

In [20]:
train_generator = datagen.flow_from_dataframe(
    dataframe=df_train,
    directory=train_image_directory,
    x_col='imageFile',
    y_col='classLabel',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle = True)

Found 81946 validated image filenames belonging to 10 classes.


In [21]:
validation_generator = datagen.flow_from_dataframe(
    dataframe=df_val,
    directory=validation_image_directory,
    x_col='imageFile',
    y_col='classLabel',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle = True)

Found 10244 validated image filenames belonging to 10 classes.


In [10]:
inception_base = InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
densenet_base = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5


In [11]:
for layer in inception_base.layers:
    layer.trainable = False

for layer in densenet_base.layers:
    layer.trainable = False

In [12]:
inception_output = GlobalAveragePooling2D()(inception_base.output)
densenet_output = GlobalAveragePooling2D()(densenet_base.output)

In [13]:
concatenated = Concatenate()([inception_output, densenet_output])

In [14]:
classes = list(df_train["classLabel"].unique())

In [15]:
x = Dense(1024, activation='relu')(concatenated)
predictions = Dense(len(classes), activation='softmax')(x)

In [16]:
model = Model(inputs=[inception_base.input, densenet_base.input], outputs=predictions)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [17]:
# Since we now have two inputs, we need to adjust our data generators accordingly
def dual_generator(generator):
    while True:
        x, y = next(generator)
        yield [x, x], y

In [22]:
train_dual_generator = dual_generator(train_generator)
validation_dual_generator = dual_generator(validation_generator)

In [None]:
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor validation loss
    patience=2,  # number of epochs with no improvement after which training will be stopped
    verbose=1,
    mode='min',  # the training will stop when the quantity monitored has stopped decreasing
    restore_best_weights=True  # restore model weights from the epoch with the best value of the monitored quantity
)

In [None]:
model_checkpoint = ModelCheckpoint(
    filepath='best_Emodel',  # path where to save the model
    monitor='val_loss',
    save_best_only=True,  # only save a model if `val_loss` has improved
    verbose=1,
    mode='min'
)

In [None]:
history = model.fit(
    train_dual_generator,
    steps_per_epoch=len(train_generator),
    epochs=10,
    validation_data=validation_dual_generator,
    validation_steps=len(validation_generator),
    callbacks=[early_stopping, model_checkpoint]
)

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess_input)

In [None]:
test_generator = test_datagen.flow_from_dataframe(
    dataframe=df_test,
    directory="./bttai-nybg-2024/BTTAIxNYBG-test/BTTAIxNYBG-test",
    x_col="imageFile",
    y_col=None,
    target_size=(224, 224),
    batch_size=32,
    class_mode=None,
    shuffle=False)  # No need to shuffle test data

In [None]:
loss, accuracy = model.evaluate(test_generator)

In [None]:
print(f"Test loss: {loss}")
print(f"Test accuracy: {accuracy}")

In [None]:
predictions = model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=1)

In [None]:
submission_df = pd.DataFrame({
    'uniqueID': df_test['uniqueID'],
    'classID': predicted_classes
})

In [None]:
submission_df.head()

In [None]:
submission_df.to_csv('submission.csv', index=False)

In [None]:
! kaggle competitions submit -c bttai-nybg-2024 -f submission.csv -m "Starflowers-NYC (Using Emodel)"