In [8]:
from keras.src.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

import src.utils as utils
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

In [9]:
df = utils.build_dataset('data/plantvillage/plantvillage dataset')
df

Unnamed: 0,Format,Species,Healthy,Disease,Folder,FileName,File
0,color,Strawberry,True,,data/plantvillage/plantvillage dataset/color/S...,8f558908-aa1b-4a86-855a-5094c2392e5a___RS_HL 1...,data/plantvillage/plantvillage dataset/color/S...
1,color,Strawberry,True,,data/plantvillage/plantvillage dataset/color/S...,b8e9ed27-8e37-4214-9206-f8c0ef21cf4d___RS_HL 4...,data/plantvillage/plantvillage dataset/color/S...
2,color,Strawberry,True,,data/plantvillage/plantvillage dataset/color/S...,abdd34a0-ab02-41e0-95a3-a014ab863ec2___RS_HL 1...,data/plantvillage/plantvillage dataset/color/S...
3,color,Strawberry,True,,data/plantvillage/plantvillage dataset/color/S...,d1aee44a-b6bb-45b9-b7b6-5d553add8fd1___RS_HL 2...,data/plantvillage/plantvillage dataset/color/S...
4,color,Strawberry,True,,data/plantvillage/plantvillage dataset/color/S...,3d28c3ea-8419-4e09-addd-211e3828e39f___RS_HL 1...,data/plantvillage/plantvillage dataset/color/S...
...,...,...,...,...,...,...,...
162911,segmented,Soybean,True,,data/plantvillage/plantvillage dataset/segment...,f6579a78-e6eb-4a65-82f7-7be30f100a07___RS_HL 5...,data/plantvillage/plantvillage dataset/segment...
162912,segmented,Soybean,True,,data/plantvillage/plantvillage dataset/segment...,356eb227-3e6d-4164-b84d-31f590293644___RS_HL 4...,data/plantvillage/plantvillage dataset/segment...
162913,segmented,Soybean,True,,data/plantvillage/plantvillage dataset/segment...,5d3def53-fdb2-4106-ad31-c020e75bccea___RS_HL 7...,data/plantvillage/plantvillage dataset/segment...
162914,segmented,Soybean,True,,data/plantvillage/plantvillage dataset/segment...,63d474df-5512-4ecc-9cd3-c0649c260668___RS_HL 7...,data/plantvillage/plantvillage dataset/segment...


In [10]:
img_shape = (224, 224, 3)
lr = 1e-4
batch_size = 32

# Usaré solamente las imágenes a color
df = df[df['Format'] == 'color'].copy()

# Encodeamos las enfermedades
le = LabelEncoder()
df['label'] = le.fit_transform(df['Disease']).astype(str)
num_classes = len(le.classes_)

In [11]:
# Split del dataset
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

In [12]:
# Data Augmentation

def data_augmentation():
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=25,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.2,
        horizontal_flip=True,
        brightness_range=[0.8, 1.2],
        fill_mode='nearest'
    )
    
    val_datagen = ImageDataGenerator(rescale=1./255)
    
    def dataframe_generator(datagen, dataframe, batch_size, img_size=(224, 224), num_classes=None, shuffle=True):
        while True:
            if shuffle:
                dataframe = dataframe.sample(frac=1).reset_index(drop=True)
    
            for i in range(0, len(dataframe), batch_size):
                batch_df = dataframe.iloc[i:i+batch_size]
                batch_images = []
                batch_labels = []
    
                for _, row in batch_df.iterrows():
                    img_path = os.path.join(row['Folder'], row['FileName'])
                    img = image.load_img(img_path, target_size=img_size)
                    img_array = image.img_to_array(img)
                    img_array = img_array.reshape((1,) + img_array.shape)
                    aug_iter = datagen.flow(img_array, batch_size=1)
                    aug_img = next(aug_iter)[0]
                    batch_images.append(aug_img)
                    batch_labels.append(row['label'])  # Guardamos el índice de clase
    
                batch_labels = to_categorical(batch_labels, num_classes=num_classes)
                yield np.array(batch_images), np.array(batch_labels)
    
    train_gen = dataframe_generator(train_datagen, train_df, batch_size=batch_size, num_classes=num_classes)
    val_gen = dataframe_generator(val_datagen, val_df, batch_size=batch_size, num_classes=num_classes, shuffle=False)
    
    return train_gen, val_gen

In [13]:
def data_augmentationt():
    train_datagen = ImageDataGenerator(rescale=1./255)
    val_datagen = ImageDataGenerator(rescale=1./255)
    
    train_gen = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory=None,
        x_col='File',  # debe existir esta columna con la ruta completa
        y_col='label',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical',
        shuffle=True
    )
    
    val_gen = val_datagen.flow_from_dataframe(
        dataframe=val_df,
        x_col='File',
        y_col='label',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical',
        shuffle=False
    )
    
    return train_gen, val_gen

In [16]:
epochs = 10
steps_per_epoch = len(train_df) // batch_size
validation_steps = len(val_df) // batch_size

# === Cargar ResNet50 base (sin la cabeza final)
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_tensor=Input(shape=img_shape)
)

for layer in base_model.layers:
    layer.trainable = False

# === Agregar cabeza de clasificación
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

# === Compilar el modelo
model.compile(
    optimizer=Adam(learning_rate=lr),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
train_gen, val_gen = data_augmentation()

# Train with data augmentation
history = model.fit(
    train_gen,
    steps_per_epoch=steps_per_epoch,
    epochs=epochs,
    validation_data=val_gen,
    validation_steps=validation_steps
)

Epoch 1/10


Expected: ['keras_tensor_720']
Received: inputs=Tensor(shape=(None, 224, 224, 3))


[1m 280/1357[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m4:07[0m 229ms/step - accuracy: 0.1347 - loss: 3.4605