In [None]:
# Import necessary libraries
import pandas as pd
import os
import zipfile
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras import layers, models, optimizers
import tensorflow as tf

# Extract ZIP file
uploaded_zip_path = '/content/clasifica-el-sargazo-24-b.zip'
extract_path = '/content/clasifica-el-sargazo'
with zipfile.ZipFile(uploaded_zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# Load CSV files
train_csv_path = os.path.join(extract_path, "train_data.csv")
test_csv_path = os.path.join(extract_path, "test_data.csv")
images_path = "/content/clasifica-el-sargazo/images_public/images_public"
train_df = pd.read_csv(train_csv_path)
test_df = pd.read_csv(test_csv_path)

# Function to augment images and update them in the DataFrame
def augment_images(class_name, target_count, datagen, train_df, images_path, target_size=(224, 224)):
    current_count = train_df[train_df['Category'] == class_name].shape[0]
    augmentation_needed = target_count - current_count
    if augmentation_needed <= 0:
        return train_df

    # Filter existing images of the class
    class_images = train_df[train_df['Category'] == class_name]
    augmented_rows = []

    for _, row in class_images.iterrows():
        if augmentation_needed <= 0:
            break

        image_path = os.path.join(images_path, row['Id'])
        img = tf.keras.utils.load_img(image_path, target_size=target_size)
        img_array = tf.keras.utils.img_to_array(img)  # Convert to array

        # Expand dimensions for compatibility with the generator
        img_array = img_array.reshape((1,) + img_array.shape)

        # Generate augmented images and save them
        for batch in datagen.flow(img_array, batch_size=1):
            augmented_img_name = f"augmented_{class_name}_{augmentation_needed}.jpg"
            augmented_img_path = os.path.join(images_path, augmented_img_name)
            tf.keras.utils.save_img(augmented_img_path, batch[0])
            augmented_rows.append({'Id': augmented_img_name, 'Category': class_name})
            augmentation_needed -= 1
            if augmentation_needed <= 0:
                break  # Stop generation if target is reached

    # Concatenate new data to the original DataFrame
    return pd.concat([train_df, pd.DataFrame(augmented_rows)], ignore_index=True)

# Create augmentation generator for minority classes
augmentation_datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Augment images for each minority class
minor_classes = ['moderado', 'abundante', 'excesivo']  # Adjust according to your classes
target_count = 500  # Increase to 500 images per class
for class_name in minor_classes:
    train_df = augment_images(class_name, target_count, augmentation_datagen, train_df, images_path)

# Check the total number of images in the DataFrame
print(f"Total images in the DataFrame: {len(train_df)}")

# Create data generators
datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    validation_split=0.2
)

train_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=images_path,
    x_col='Id',
    y_col='Category',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

val_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=images_path,
    x_col='Id',
    y_col='Category',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation',
    shuffle=True
)

# 1. Load the base DenseNet201 model and freeze layers
base_model = DenseNet201(include_top=False, input_shape=(224, 224, 3), weights="imagenet")
base_model.trainable = False

# 2. Create the full model
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.3),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(6, activation='softmax')  # Change the number according to your classes
])

# 3. Compile the model
model.compile(
    optimizer=optimizers.Adam(learning_rate=0.01),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 4. Train the top layers
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)]
)

# 5. Unfreeze the base model layers
base_model.trainable = True

# 6. Set a lower learning rate for fine-tuning
model.compile(
    optimizer=optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 7. Train with fine-tuning
fine_tune_history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=50,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)]
)

# Prepare test data
test_datagen = ImageDataGenerator(rescale=1.0 / 255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=images_path,
    x_col='Id',
    target_size=(224, 224),
    batch_size=16,
    class_mode=None,
    shuffle=False
)

# Generate predictions
class_indices = {v: k for k, v in train_generator.class_indices.items()}  # Invert mapping
predictions = model.predict(test_generator)
predicted_classes = [class_indices[idx] for idx in predictions.argmax(axis=1)]

# Assign predictions to the DataFrame
test_df['prediction'] = predicted_classes

# Save the predictions file
output_csv_path = "/content/clasifica-el-sargazo-submission.csv"
test_df.to_csv(output_csv_path, index=False)
print(f"Predictions file saved at: {output_csv_path}")


Total de imágenes en el DataFrame: 3395
Found 2716 validated image filenames belonging to 6 classes.
Found 679 validated image filenames belonging to 6 classes.
Epoch 1/10


  self._warn_if_super_not_called()


[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 977ms/step - accuracy: 0.5039 - loss: 2.7527 - val_accuracy: 0.5670 - val_loss: 1.1486
Epoch 2/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 369ms/step - accuracy: 0.6398 - loss: 0.8967 - val_accuracy: 0.5538 - val_loss: 1.1010
Epoch 3/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 392ms/step - accuracy: 0.6607 - loss: 0.8319 - val_accuracy: 0.5434 - val_loss: 1.1042
Epoch 4/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 364ms/step - accuracy: 0.6678 - loss: 0.8292 - val_accuracy: 0.5449 - val_loss: 1.1556
Epoch 5/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 397ms/step - accuracy: 0.6717 - loss: 0.8431 - val_accuracy: 0.5449 - val_loss: 1.1392
Epoch 1/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m718s[0m 4s/step - accuracy: 0.6152 - loss: 0.9874 - val_accuracy: 0.4845 - val_loss: 1.8159
Epoch 2/50
[1m85/85[0m [32m━━━━

In [None]:
print(train_df['Category'].value_counts())

Category
nada         1167
bajo          703
moderado      225
excesivo      178
abundante     147
excesivo       25
Name: count, dtype: int64
