In [7]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet import EfficientNetB3, preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load the CSV files
train_data_df = pd.read_csv('Released_Data/archive/train_data_new.csv')
super_classes_df = pd.read_csv('Released_Data/superclass_mapping.csv')
sub_classes_df = pd.read_csv('Released_Data/subclass_mapping.csv')

# Rename
super_classes_df.rename(columns={'class': 'superclass_name'}, inplace=True)
sub_classes_df.rename(columns={'class': 'subclass_name'}, inplace=True)

# Merge the class names with the training data
train_data_df = train_data_df.merge(super_classes_df, left_on='superclass_index', right_on='index', how='left')
train_data_df = train_data_df.merge(sub_classes_df, left_on='subclass_index', right_on='index', how='left')

# Superclass_name (can replace with subclass_name depending on classification task)
train_data_df['class'] = train_data_df['subclass_name']

# Split the dataset into training and validation sets
train_df, validation_df = train_test_split(train_data_df, test_size=0.2)

# Initialize the ImageDataGenerator with EfficientNet's preprocess_input
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# Image folder path
image_folder_path = 'Released_Data/train_shuffle'

# Load and preprocess images for training
train_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=image_folder_path,
    x_col='image',
    y_col='class',
    class_mode='categorical',
    target_size=(300, 300),
    batch_size=32
)

# Load and preprocess images for validation
validation_generator = datagen.flow_from_dataframe(
    dataframe=validation_df,
    directory=image_folder_path,
    x_col='image',
    y_col='class',
    class_mode='categorical',
    target_size=(300, 300),
    batch_size=32
)

# Load pre-trained EfficientNetB3
base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=(300, 300, 3))

# Freeze the base model
base_model.trainable = False

# Actual number of subclasses: 
num_subclasses = train_data_df['subclass_index'].nunique()

# Add custom top layers
x = GlobalAveragePooling2D()(base_model.output)
output = Dense(num_subclasses , activation='softmax')(x) # Number of classes
model = Model(inputs=base_model.input, outputs=output)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

Found 5057 validated image filenames belonging to 87 classes.
Found 1265 validated image filenames belonging to 87 classes.


In [8]:
# Count the number of unique subclass indices
num_subclasses = train_data_df['subclass_index'].nunique()
print("Number of unique subclasses:", num_subclasses)

Number of unique subclasses: 87


In [9]:
# Train the model
model.fit(train_generator, validation_data=validation_generator, epochs=10, batch_size=32)

# Unfreeze some layers for fine-tuning
for layer in base_model.layers[-20:]:
    layer.trainable = True

# Re-compile the model with a lower learning rate
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Continue training
model.fit(train_generator, validation_data=validation_generator, epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1b71d45d0>

In [10]:
test_image_folder = 'Released_Data/test_shuffle'   # Correct path to your test images
image_files = [os.path.join(test_image_folder, img) for img in os.listdir(test_image_folder) if img.endswith('.jpg')]
test_df = pd.DataFrame(image_files, columns=['filename'])


In [11]:
test_df.head()

Unnamed: 0,filename
0,Released_Data/test_shuffle/9733.jpg
1,Released_Data/test_shuffle/63.jpg
2,Released_Data/test_shuffle/6400.jpg
3,Released_Data/test_shuffle/823.jpg
4,Released_Data/test_shuffle/4217.jpg


In [12]:
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)


In [13]:
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='filename',
    y_col=None,
    target_size=(300, 300),
    batch_size=32,
    class_mode=None,
    shuffle=False
)


Found 12377 validated image filenames.


In [14]:
predictions = model.predict(test_generator)




In [20]:
predictions

array([[1.51300862e-01, 1.04239192e-07, 6.18650256e-06, ...,
        3.27369307e-05, 1.34536705e-07, 6.04311463e-05],
       [6.70024529e-05, 9.42222050e-06, 2.77952622e-05, ...,
        9.01447274e-05, 5.78850950e-06, 4.74755052e-06],
       [4.50959414e-01, 5.32656813e-05, 4.51473607e-05, ...,
        1.74179638e-03, 9.17537875e-07, 9.90388726e-05],
       ...,
       [1.21575158e-06, 5.71911301e-08, 3.70027533e-06, ...,
        3.24344983e-05, 7.48405280e-03, 5.43535954e-08],
       [3.59533879e-05, 2.87077364e-05, 5.86568092e-08, ...,
        2.39243571e-04, 1.70933290e-05, 5.30179477e-06],
       [1.47966458e-03, 1.17337164e-04, 1.15857727e-03, ...,
        6.07504153e-05, 3.71942148e-02, 8.65577895e-05]], dtype=float32)

In [15]:
np.shape(predictions)

(12377, 87)

In [16]:
predicted_subclass_indices = np.argmax(predictions, axis=1)
predicted_subclass_indices

array([22, 23,  0, ...,  6, 62, 74])

In [17]:
image_filenames = [f"{i}.jpg" for i in range(len(predictions))]


In [18]:
results_df = pd.DataFrame({
    'ID': image_filenames,
    'Target': predicted_subclass_indices
})


In [19]:
results_df.to_csv("res/subclass_pred_new.csv", index=False)

In [None]:
import os
# Check the directory path
print("Directory exists:", os.path.isdir(image_folder_path))

# Check the first few rows of train_df and validation_df
print("Train DataFrame:\n", train_df.head())
print("Validation DataFrame:\n", validation_df.head())

# Check the length of the DataFrames
print("Length of train_df:", len(train_df))
print("Length of validation_df:", len(validation_df))

# Check a few image file paths
sample_images = train_df['image'].head().tolist()
for img in sample_images:
    file_path = os.path.join(image_folder_path, img)
    print(f"{img} exists:", os.path.isfile(file_path))


Directory exists: True
Train DataFrame:
          image  superclass_index  subclass_index  index_x superclass_name  \
4468  4468.jpg                 2              44        2         reptile   
3498  3498.jpg                 0              28        0            bird   
6148  6148.jpg                 0              30        0            bird   
354    354.jpg                 1              31        1             dog   
4655  4655.jpg                 2              69        2         reptile   

      index_y                                      subclass_name    class  
4468       44              hognose snake, puff adder, sand viper  reptile  
3498       28                                       black grouse     bird  
6148       30                                            vulture     bird  
354        31                                           Shih-Tzu      dog  
4655       69  leatherback turtle, leatherback, leathery turt...  reptile  
Validation DataFrame:
          image  s

In [None]:
validation_df.to_csv("/res/superclass_pred_new.csv", index=False)