In [2]:
import os
import pandas as pd
from PIL import Image
import numpy as np
from tqdm import tqdm
import cv2

In [3]:
train_df = pd.read_csv('../input/vista-codefest/dataset/bbox.csv')
bbox_df = pd.read_csv('../input/vista-codefest/dataset/bbox.csv')

In [6]:
def preprocess_images(image_folder, output_folder, df, size=(224, 224)):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for i, row in tqdm(df.iterrows(), total=len(df)):
        image_path = os.path.join(image_folder, row['Name'])
        image = Image.open(image_path)
        image = image.resize(size)
        image = np.array(image) / 255.0  # Normalize pixel values
        image = (image * 255).astype(np.uint8)  # Convert back to 8-bit values

        output_path = os.path.join(output_folder, row['Name'])
        cv2.imwrite(output_path, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

# Preprocess training images
preprocess_images('../input/vista-codefest/dataset/train', '../output/dataset/preprocessed/train', train_df)




100%|██████████| 62529/62529 [13:21<00:00, 78.05it/s] 


FileNotFoundError: [Errno 2] No such file or directory: 'dataset/test.csv'

In [13]:
# Preprocess testing images (assuming you have a test.csv similar to train.csv)
test_df = pd.read_csv('../input/vista-codefest/dataset/test.csv')
preprocess_images('../input/vista-codefest/dataset/test', 'kaggle/working/dataset/preprocessed/test', test_df)

100%|██████████| 3963/3963 [00:49<00:00, 80.55it/s]


In [14]:
def process_bounding_boxes(df, image_folder):
    processed_boxes = {}
    for _, row in tqdm(df.iterrows(), total=len(df)):
        image_name = row['Name']
        if image_name not in processed_boxes:
            processed_boxes[image_name] = []
        processed_boxes[image_name].append([row['xmin'], row['ymin'], row['xmax'], row['ymax']])

    # Save or use the processed_boxes dictionary as needed
    return processed_boxes

bbox_dict = process_bounding_boxes(bbox_df, '../input/vista-codefest/dataset/train')


100%|██████████| 62529/62529 [00:04<00:00, 13299.19it/s]


In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf  # Add this line to import TensorFlow
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

2024-03-28 20:54:45.480360: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-28 20:54:45.480500: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-28 20:54:45.616683: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [5]:
# Load the preprocessed data
train_df = pd.read_csv('../input/vista-codefest/dataset/train.csv')
test_df = pd.read_csv('../input/vista-codefest/dataset/test.csv')

train_df['HeadCount'] = train_df['HeadCount'].astype(np.float32)
test_df['HeadCount'] = test_df['HeadCount'].astype(np.float32)


# Split the data into training and validation sets
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42)

# Set up image data generators with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

# Create data generators
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory='/kaggle/working/dataset/preprocessed/train',
    x_col='Name',
    y_col='HeadCount',
    target_size=(224, 224),
    batch_size=32,
    class_mode='raw'
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_data,
    directory='/kaggle/working/dataset/preprocessed/train',
    x_col='Name',
    y_col='HeadCount',
    target_size=(224, 224),
    batch_size=32,
    class_mode='raw'
)


Found 0 validated image filenames.
Found 0 validated image filenames.




In [21]:
def create_model(base_model):
    model = Sequential()
    model.add(base_model)
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))  # Reduced number of units in this layer
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
    return model

# VGG16 model
vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
vgg_model = create_model(vgg_base)
vgg_model.fit(train_generator, epochs=20, validation_data=val_generator)  # Increased epochs

# ResNet50 model
resnet_base = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
resnet_model = create_model(resnet_base)
resnet_model.fit(train_generator, epochs=20, validation_data=val_generator)  # Increased epochs

Epoch 1/20


  self._warn_if_super_not_called()
2024-03-28 18:37:38.689954: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 0: 4.63498, expected 3.85968
2024-03-28 18:37:38.690018: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 3: 6.55784, expected 5.78254
2024-03-28 18:37:38.690028: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 4: 6.61282, expected 5.83752
2024-03-28 18:37:38.690036: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 6: 6.34499, expected 5.56968
2024-03-28 18:37:38.690045: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 7: 6.25159, expected 5.47629
2024-03-28 18:37:38.690053: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 8: 5.43964, expected 4.66434
2024-03-28 18:37:38.690061: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 9: 6.54333, expected 5.76803
2024-03-28 18:37:38.

[1m232/256[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m9s[0m 406ms/step - loss: 18.3082 - rmse: 4.2463 

2024-03-28 18:39:49.882361: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 0: 3.46443, expected 2.59124
2024-03-28 18:39:49.882422: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 1: 4.84232, expected 3.96913
2024-03-28 18:39:49.882437: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 2: 4.91526, expected 4.04207
2024-03-28 18:39:49.882452: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 3: 5.02814, expected 4.15494
2024-03-28 18:39:49.882468: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 4: 4.53562, expected 3.66243
2024-03-28 18:39:49.882479: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 5: 4.79722, expected 3.92403
2024-03-28 18:39:49.882490: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 6: 4.69349, expected 3.82029
2024-03-28 18:39:49.882500: E external/local_xla/xla/se

[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 514ms/step - loss: 18.0295 - rmse: 4.2154

2024-03-28 18:40:31.097546: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 100352: 3.62212, expected 2.70127
2024-03-28 18:40:31.097621: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 100353: 5.31563, expected 4.39477
2024-03-28 18:40:31.097643: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 100354: 5.43093, expected 4.51007
2024-03-28 18:40:31.097662: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 100355: 5.35723, expected 4.43638
2024-03-28 18:40:31.097674: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 100356: 5.06878, expected 4.14793
2024-03-28 18:40:31.097685: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 100357: 5.791, expected 4.87014
2024-03-28 18:40:31.097696: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 100358: 5.23388, expected 4.31303
2024-03-28 18:40:31.09

[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 587ms/step - loss: 18.0188 - rmse: 4.2142 - val_loss: 9.5763 - val_rmse: 3.0947
Epoch 2/20
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 407ms/step - loss: 12.7579 - rmse: 3.5703 - val_loss: 19.8989 - val_rmse: 4.4611
Epoch 3/20
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 405ms/step - loss: 13.6136 - rmse: 3.6798 - val_loss: 9.3992 - val_rmse: 3.0653
Epoch 4/20
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 406ms/step - loss: 12.2261 - rmse: 3.4950 - val_loss: 7.9625 - val_rmse: 2.8221
Epoch 5/20
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 407ms/step - loss: 11.5180 - rmse: 3.3906 - val_loss: 8.4273 - val_rmse: 2.9028
Epoch 6/20
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 408ms/step - loss: 10.3881 - rmse: 3.2225 - val_loss: 9.0900 - val_rmse: 3.0148
Epoch 7/20
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x7bd7d4714790>

In [25]:
# Prepare the test data generator
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory='/kaggle/working/kaggle/working/dataset/preprocessed/test',
    x_col='Name',
    y_col=None,
    target_size=(224, 224),
    batch_size=32,
    class_mode=None,
    shuffle=False
)

# Make predictions with both models
vgg_predictions = vgg_model.predict(test_generator)
resnet_predictions = resnet_model.predict(test_generator)

# Average the predictions
final_predictions = (vgg_predictions.flatten() + resnet_predictions.flatten()) / 2

# Round the predictions to the nearest integer
final_head_counts = np.round(final_predictions).astype(int)

# Create a submission DataFrame
submission_df = pd.DataFrame({
    'Name': test_df['Name'],
    'HeadCount': final_head_counts
})

# Save the submission DataFrame to a CSV file
submission_df.to_csv('/kaggle/working/submission22.csv', index=False)

Found 3963 validated image filenames.
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 60ms/step
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 41ms/step


In [26]:
submission_df = pd.DataFrame({
    'Name': test_df['Name'],
    'HeadCount': final_head_counts
})

# Save the submission DataFrame to a CSV file
submission_df.to_csv('/kaggle/working/submission22.csv', index=False)

In [1]:
submission_df = pd.DataFrame({
    'Name': test_df['Name'],
    'HeadCount': vgg_predictions.flatten()
})

# Save the submission DataFrame to a CSV file
submission_df.to_csv('/kaggle/working/submission22vggtuned.csv', index=False)
submission_df = pd.DataFrame({
    'Name': test_df['Name'],
    'HeadCount': resnet_predictions.flatten()
})

# Save the submission DataFrame to a CSV file
submission_df.to_csv('/kaggle/working/submission22resnettuned.csv', index=False)

NameError: name 'pd' is not defined