In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.optimizers import Adam

In [4]:
# Load the preprocessed data
train_df = pd.read_csv('dataset/train.csv')
test_df = pd.read_csv('dataset/test.csv')

# Split the data into training and validation sets
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42)

# Set up image data generators with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

# Create data generators
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory='dataset/preprocessed/train',
    x_col='Name',
    y_col='HeadCount',
    target_size=(224, 224),
    batch_size=32,
    class_mode='raw'
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_data,
    directory='dataset/preprocessed/train',
    x_col='Name',
    y_col='HeadCount',
    target_size=(224, 224),
    batch_size=32,
    class_mode='raw'
)


Found 8186 validated image filenames.
Found 2047 validated image filenames.


In [5]:
#def and train model
# Load the EfficientNetB7 base model
efficientnet_base = EfficientNetB7(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Create the model
model = Sequential()
model.add(efficientnet_base)
model.add(GlobalAveragePooling2D())
model.add(Dense(1, activation='linear'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=1e-4), loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])

# Learning rate scheduler
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# Train the model with fine-tuning
efficientnet_base.trainable = True  # Unfreeze the base model
fine_tune_epochs = 5
total_epochs = 10 + fine_tune_epochs

model.fit(
    train_generator,
    epochs=total_epochs,
    initial_epoch=15,  # Start fine-tuning from here
    validation_data=val_generator,
    callbacks=[lr_scheduler]
)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb7_notop.h5


<keras.src.callbacks.History at 0x21c64990d00>

In [6]:
# Prepare the test data generator
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory='dataset/preprocessed/test',
    x_col='Name',
    y_col=None,
    target_size=(224, 224),
    batch_size=32,
    class_mode=None,
    shuffle=False
)

# Make predictions
predictions = model.predict(test_generator)

# Round the predictions to the nearest integer
predicted_head_counts = np.round(predictions.flatten()).astype(int)

# Create a submission DataFrame
submission_df = pd.DataFrame({
    'Name': test_df['Name'],
    'HeadCount': predicted_head_counts
})

# Save the submission DataFrame to a CSV file
submission_df.to_csv('submission_efficientnet.csv', index=False)


Found 3963 validated image filenames.
