In [49]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet import EfficientNetB3, preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load the CSV files
train_data_df = pd.read_csv('Released_Data/archive_augmented/train_data_new.csv')
super_classes_df = pd.read_csv('Released_Data/superclass_mapping.csv')
sub_classes_df = pd.read_csv('Released_Data/subclass_mapping.csv')

# Rename
super_classes_df.rename(columns={'class': 'superclass_name'}, inplace=True)
sub_classes_df.rename(columns={'class': 'subclass_name'}, inplace=True)

# Merge the class names with the training data
train_data_df = train_data_df.merge(super_classes_df, left_on='superclass_index', right_on='index', how='left')
train_data_df = train_data_df.merge(sub_classes_df, left_on='subclass_index', right_on='index', how='left')

# Superclass_name (can replace with subclass_name depending on classification task)
train_data_df['class'] = train_data_df['subclass_name']

# Split the dataset into training and validation sets
train_df, validation_df = train_test_split(train_data_df, test_size=0.2)

# Initialize the ImageDataGenerator with EfficientNet's preprocess_input
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# Image folder path
image_folder_path = 'Released_Data/train_shuffle_augmented'

# Load and preprocess images for training
train_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=image_folder_path,
    x_col='image',
    y_col='class',
    class_mode='categorical',
    target_size=(300, 300),
    batch_size=32
)

# Load and preprocess images for validation
validation_generator = datagen.flow_from_dataframe(
    dataframe=validation_df,
    directory=image_folder_path,
    x_col='image',
    y_col='class',
    class_mode='categorical',
    target_size=(300, 300),
    batch_size=32
)



Found 5093 validated image filenames belonging to 88 classes.
Found 1274 validated image filenames belonging to 88 classes.


In [50]:
# Load pre-trained EfficientNetB3
base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=(300, 300, 3))

# Freeze the base model
base_model.trainable = False

# Actual number of subclasses: 
num_subclasses = train_data_df['subclass_index'].nunique()

# Add custom top layers
x = GlobalAveragePooling2D()(base_model.output)
output = Dense(len(sub_classes_df), activation='softmax')(x) # Number of classes
model = Model(inputs=base_model.input, outputs=output)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [51]:
len(train_df)

5093

In [52]:
len(validation_df)

1274

In [53]:
train_data_df.head()

Unnamed: 0,image,superclass_index,subclass_index,index_x,superclass_name,index_y,subclass_name,class
0,0.jpg,1,37,1,dog,37,"Maltese dog, Maltese terrier, Maltese","Maltese dog, Maltese terrier, Maltese"
1,1.jpg,0,42,0,bird,42,"oystercatcher, oyster catcher","oystercatcher, oyster catcher"
2,2.jpg,1,62,1,dog,62,"Afghan hound, Afghan","Afghan hound, Afghan"
3,3.jpg,1,31,1,dog,31,Shih-Tzu,Shih-Tzu
4,4.jpg,0,4,0,bird,4,"great grey owl, great gray owl, Strix nebulosa","great grey owl, great gray owl, Strix nebulosa"


In [54]:
train_data_df.tail()

Unnamed: 0,image,superclass_index,subclass_index,index_x,superclass_name,index_y,subclass_name,class
6362,6362.jpg,3,87,3,novel,87,novel,novel
6363,6363.jpg,3,87,3,novel,87,novel,novel
6364,6364.jpg,3,87,3,novel,87,novel,novel
6365,6365.jpg,3,87,3,novel,87,novel,novel
6366,6366.jpg,3,87,3,novel,87,novel,novel


In [55]:
test_image_folder = 'Released_Data/train_shuffle_augmented'
image_files = [os.path.join(test_image_folder, img) for img in os.listdir(test_image_folder) if img.endswith('.jpg')]
train_df = pd.DataFrame(image_files, columns=['filename'])


In [56]:
train_df.tail()

Unnamed: 0,filename
6362,Released_Data/train_shuffle_augmented/5676.jpg
6363,Released_Data/train_shuffle_augmented/2119.jpg
6364,Released_Data/train_shuffle_augmented/5110.jpg
6365,Released_Data/train_shuffle_augmented/3561.jpg
6366,Released_Data/train_shuffle_augmented/1376.jpg


In [57]:
# Train the model
model.fit(train_generator, validation_data=validation_generator, epochs=10, batch_size=32)

# Unfreeze some layers for fine-tuning
for layer in base_model.layers[-20:]:
    layer.trainable = True

# Re-compile the model with a lower learning rate
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Continue training
model.fit(train_generator, validation_data=validation_generator, epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1b306bb10>

In [58]:
test_image_folder = 'Released_Data/test_shuffle'   # Correct path to your test images
image_files = [os.path.join(test_image_folder, img) for img in os.listdir(test_image_folder) if img.endswith('.jpg')]
test_df = pd.DataFrame(image_files, columns=['filename'])


In [59]:
test_df.tail()

Unnamed: 0,filename
12372,Released_Data/test_shuffle/3561.jpg
12373,Released_Data/test_shuffle/8434.jpg
12374,Released_Data/test_shuffle/7707.jpg
12375,Released_Data/test_shuffle/6419.jpg
12376,Released_Data/test_shuffle/1376.jpg


In [60]:
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)


In [61]:
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='filename',
    y_col=None,
    target_size=(300, 300),
    batch_size=32,
    class_mode=None,
    shuffle=False
)


Found 12377 validated image filenames.


In [62]:
predictions = model.predict(test_generator)




In [63]:
predicted_subclass_indices = np.argmax(predictions, axis=1)
predicted_subclass_indices

array([22, 23,  0, ...,  6, 62, 38])

In [64]:
image_filenames = [f"{i}.jpg" for i in range(len(predictions))]


In [65]:
results_df = pd.DataFrame({
    'ID': image_filenames,
    'Target': predicted_subclass_indices
})


In [66]:
results_df.to_csv("res/subclass_pred_augmented.csv", index=False)