In [72]:
# ! kaggle competitions download -c bttai-nybg-2024

In [73]:
# ! unzip "bttai-nybg-2024" -d "bttai-nybg-2024"

In [74]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import InceptionV3, DenseNet121
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Concatenate, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

In [75]:
filename_train = "./bttai-nybg-2024/BTTAIxNYBG-train.csv"
df_train = pd.read_csv(filename_train)
filename_test = "./bttai-nybg-2024/BTTAIxNYBG-test.csv"
df_test = pd.read_csv(filename_test)
filename_val = "./bttai-nybg-2024/BTTAIxNYBG-validation.csv"
df_val = pd.read_csv(filename_val)

In [76]:
train_image_directory = "./bttai-nybg-2024/BTTAIxNYBG-train/BTTAIxNYBG-train"
validation_image_directory = "./bttai-nybg-2024/BTTAIxNYBG-validation/BTTAIxNYBG-validation"


In [77]:
datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess_input)

In [78]:
train_generator = datagen.flow_from_dataframe(
    dataframe=df_train,
    directory=train_image_directory,
    x_col='imageFile',
    y_col='classLabel',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle = True)

Found 81946 validated image filenames belonging to 10 classes.


In [79]:
validation_generator = datagen.flow_from_dataframe(
    dataframe=df_val,
    directory=validation_image_directory,
    x_col='imageFile',
    y_col='classLabel',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle = True)

Found 10244 validated image filenames belonging to 10 classes.


In [80]:
inception_base = InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
densenet_base = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))


In [81]:
for layer in inception_base.layers:
    layer.trainable = False

for layer in densenet_base.layers:
    layer.trainable = False

In [82]:
inception_output = GlobalAveragePooling2D()(inception_base.output)
densenet_output = GlobalAveragePooling2D()(densenet_base.output)

In [83]:
concatenated = Concatenate()([inception_output, densenet_output])

In [84]:
classes = list(df_train["classLabel"].unique())

In [85]:
x = Dense(1024, activation='relu')(concatenated)
predictions = Dense(len(classes), activation='softmax')(x)

In [86]:
model = Model(inputs=[inception_base.input, densenet_base.input], outputs=predictions)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [87]:
# Since we now have two inputs, we need to adjust our data generators accordingly
def dual_generator(generator):
    while True:
        x, y = next(generator)
        yield [x, x], y

In [88]:
train_dual_generator = dual_generator(train_generator)
validation_dual_generator = dual_generator(validation_generator)

In [89]:
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor validation loss
    patience=3,  # number of epochs with no improvement after which training will be stopped
    verbose=1,
    mode='min',  # the training will stop when the quantity monitored has stopped decreasing
    restore_best_weights=True  # restore model weights from the epoch with the best value of the monitored quantity
)

In [90]:
model_checkpoint = ModelCheckpoint(
    filepath='best_Emodel',  # path where to save the model
    monitor='val_loss',
    save_best_only=True,  # only save a model if `val_loss` has improved
    verbose=1,
    mode='min'
)

In [None]:
history = model.fit(
    train_dual_generator,
    steps_per_epoch=len(train_generator),
    epochs=10,
    validation_data=validation_dual_generator,
    validation_steps=len(validation_generator),
    callbacks=[early_stopping, model_checkpoint]
)

Epoch 1/10
Epoch 1: val_loss improved from inf to 0.66147, saving model to best_Emodel
Epoch 2/10
Epoch 2: val_loss improved from 0.66147 to 0.58250, saving model to best_Emodel
Epoch 3/10
Epoch 3: val_loss improved from 0.58250 to 0.53149, saving model to best_Emodel
Epoch 4/10
Epoch 4: val_loss did not improve from 0.53149
Epoch 5/10
Epoch 5: val_loss did not improve from 0.53149
Epoch 6/10
Epoch 6: val_loss improved from 0.53149 to 0.47535, saving model to best_Emodel
Epoch 7/10
Epoch 7: val_loss improved from 0.47535 to 0.45687, saving model to best_Emodel
Epoch 8/10
Epoch 8: val_loss did not improve from 0.45687
Epoch 9/10

In [None]:
def dual_generator_forTest(generator):
    while True:
        x, _ = next(generator)
        yield [x, x]

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess_input)

In [None]:
test_generator = test_datagen.flow_from_dataframe(
    dataframe=df_test,
    directory="./bttai-nybg-2024/BTTAIxNYBG-test/BTTAIxNYBG-test",
    x_col="imageFile",
    y_col= None,
    target_size=(224, 224),
    batch_size=32,
    class_mode= None,
    shuffle=False)  # No need to shuffle test data

In [None]:
test_dual_generator = dual_generator_forTest(test_generator)

In [None]:
predictions = model.predict(test_dual_generator, steps=len(test_generator))
predicted_classes = np.argmax(predictions, axis=1)

In [None]:
submission_df = pd.DataFrame({
    'uniqueID': df_test['uniqueID'],
    'classID': predicted_classes
})

In [None]:
submission_df.head()

In [None]:
submission_df.to_csv('submission.csv', index=False)

In [None]:
! kaggle competitions submit -c bttai-nybg-2024 -f submission.csv -m "Starflowers-NYC (Using Emodel)"