In [11]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from sklearn.model_selection import train_test_split

In [12]:
# Paths
data_dir = r"C:\\Users\\alyar\\Downloads\\hackathon-2-insect-species-classification"
train_dir = os.path.join(data_dir, "train", "train")
test_dir = os.path.join(data_dir, "test", "test")
csv_path = os.path.join(data_dir, "train.csv")

# Load CSV file
data = pd.read_csv(csv_path)
data["ID"] = data["ID"].apply(lambda x: os.path.join(train_dir, x))

In [13]:
# Split data into training and validation sets
train_data, val_data = train_test_split(data, test_size=0.2, stratify=data["Species"], random_state=42)

# ImageDataGenerator for data augmentation
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)
val_datagen = ImageDataGenerator(rescale=1.0/255)

In [14]:
# Flow from dataframe
train_generator = train_datagen.flow_from_dataframe(
    train_data,
    x_col="ID",
    y_col="Species",
    target_size=(224, 224),
    batch_size=32,
    class_mode="categorical"
)

val_generator = val_datagen.flow_from_dataframe(
    val_data,
    x_col="ID",
    y_col="Species",
    target_size=(224, 224),
    batch_size=32,
    class_mode="categorical"
)


Found 6872 validated image filenames belonging to 37 classes.
Found 1719 validated image filenames belonging to 37 classes.


In [15]:
# Load ResNet50 with pre-trained weights
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(256, activation="relu")(x)
x = Dropout(0.5)(x)
output = Dense(len(train_generator.class_indices), activation="softmax")(x)


In [37]:
# Create the model
model = Model(inputs=base_model.input, outputs=output)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

# Ensure the model uses the GPU
print("Running on GPU: " + str(tf.config.list_physical_devices('GPU')))

# Train the model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_steps=val_generator.samples // val_generator.batch_size
)

# Unfreeze the base model and fine-tune
for layer in base_model.layers[-10:]:
    layer.trainable = True

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

fine_tune_epochs = 10
total_epochs = 10 + fine_tune_epochs

history_fine = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=total_epochs,
    initial_epoch=history.epoch[-1],
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_steps=val_generator.samples // val_generator.batch_size
)

Running on GPU: []
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [38]:
test_files = [os.path.join(test_dir, f) for f in os.listdir(test_dir) if f.endswith(('jpg', 'png', 'jpeg'))]
test_df = pd.DataFrame({"ID": test_files})

test_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2]
)

test_generator = test_datagen.flow_from_dataframe(
    test_df,
    rescale=1.0/255,
    rotation_range=20,  # Add rotation during inference
    zoom_range=0.2,     # Add zoom during inference
    horizontal_flip=True,
    x_col="ID",
    y_col=None,
    target_size=(224, 224),
    batch_size=32,
    class_mode=None,
    shuffle=False
)


Found 2149 validated image filenames.


In [41]:
# Enhanced Test-Time Augmentation (TTA)
def perform_tta(generator, model, num_augments=10):
    augmented_predictions = []
    for _ in range(num_augments):
        # Reset the generator to ensure new augmentations are applied
        generator.reset()
        # Introduce more variety to TTA
        generator.image_data_generator.rotation_range = 30  # Increase rotation range
        generator.image_data_generator.zoom_range = [0.8, 1.2]  # Dynamic zoom range
        generator.image_data_generator.brightness_range = [0.8, 1.2]  # Add brightness variations
        generator.image_data_generator.horizontal_flip = True  # Flip images horizontally
        generator.image_data_generator.shear_range = 20  # Add shear transformations
        augmented_predictions.append(model.predict(generator, verbose=0))
    return tf.reduce_mean(augmented_predictions, axis=0)

# Make predictions with TTA
predictions = perform_tta(test_generator, model, num_augments=5)
predicted_classes = tf.argmax(predictions, axis=1).numpy()
class_indices = {v: k for k, v in train_generator.class_indices.items()}
predicted_species = [class_indices[idx] for idx in predicted_classes]


In [42]:
# Create a DataFrame for predictions
filenames = test_generator.filenames
filenames = [os.path.basename(f) for f in filenames]  # Extract only the image names
results = pd.DataFrame({"ID": filenames, "Species": predicted_species})

# Save predictions to CSV
results.to_csv(os.path.join(data_dir, "predictionss.csv"), index=False)
print("Predictions saved to predictionss.csv")

# Save the model
model.save("insect_species_classifier_resnet.h5")


Predictions saved to predictionss.csv


train 50 epochs in total