In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import pandas as pd


In [4]:
# Load the preprocessed data
train_df = pd.read_csv('dataset/train.csv')

# Split the data into training and validation sets
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42)

# Set up image data generators
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory='dataset/preprocessed/train',
    x_col='Name',
    y_col='HeadCount',
    target_size=(224, 224),
    batch_size=32,
    class_mode='raw'
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_data,
    directory='dataset/preprocessed/train',
    x_col='Name',
    y_col='HeadCount',
    target_size=(224, 224),
    batch_size=32,
    class_mode='raw'
)

Found 8186 validated image filenames.
Found 2047 validated image filenames.


In [5]:
# Load a pre-trained VGG16 model without the top layer
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Create a sequential model and add the base model
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='linear'))  # Output layer for regression

# Compile the model
model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5




In [6]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
model.save('models/head_count_model.keras')



In [9]:
from tensorflow.keras.models import load_model

loaded_model = load_model('models/head_count_model.keras')


In [10]:
# Evaluate the model on the training data
train_loss, train_rmse = model.evaluate(train_generator)

print(f"Training Loss: {train_loss}")
print(f"Training RMSE: {train_rmse}")


Training Loss: 4.297650337219238
Training RMSE: 2.073077440261841


In [11]:
test_df = pd.read_csv('dataset/test.csv')

test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory='dataset/preprocessed/test',
    x_col='Name',
    y_col=None,
    target_size=(224, 224),
    batch_size=32,
    class_mode=None,
    shuffle=False  # Important for maintaining order
)


Found 3963 validated image filenames.


In [12]:
predictions = model.predict(test_generator)




In [19]:
import numpy as np

# Round the predictions to the nearest integer
predicted_head_counts = np.round(predictions.flatten()).astype(int)

# Create a DataFrame for the submission
submission_df = pd.DataFrame({
    'Name': test_df['Name'],
    'HeadCount': predicted_head_counts
})

# Save the submission DataFrame to a CSV file
submission_df.to_csv('submission_main.csv', index=False)


In [20]:
predicted_head_counts


array([5, 5, 6, ..., 3, 4, 5])

In [21]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the data augmentation for the training data
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create the data generator for the training set
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory='dataset/preprocessed/train',
    x_col='Name',
    y_col='HeadCount',
    target_size=(224, 224),
    batch_size=32,
    class_mode='raw'
)


Found 8186 validated image filenames.


In [22]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the data augmentation for the training data
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create the data generator for the training set
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory='dataset/preprocessed/train',
    x_col='Name',
    y_col='HeadCount',
    target_size=(224, 224),
    batch_size=32,
    class_mode='raw'
)


Found 8186 validated image filenames.


In [28]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dropout

# Load the ResNet50 base model
base_model2 = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Create the model
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))  # Add dropout layer
model.add(Dense(1, activation='linear'))

# Compile the model
model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])




In [29]:
def preprocess_image(image_path, size=(224, 224)):
    image = cv2.imread(image_path)
    image = cv2.resize(image, size)

    # Apply histogram equalization
    img_yuv = cv2.cvtColor(image, cv2.COLOR_BGR2YUV)
    img_yuv[:, :, 0] = cv2.equalizeHist(img_yuv[:, :, 0])
    image = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)

    image = image / 255.0  # Normalize pixel values
    return image


In [30]:
# Assume model1 and model2 are two trained models

predictions1 = base_model.predict(test_generator)
predictions2 = base_model2.predict(test_generator)

# Simple averaging ensemble
final_predictions = (predictions1.flatten() + predictions2.flatten()) / 2




NameError: name 'predictions1' is not defined