In [1]:
import src.utils as utils
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam


In [2]:
df = utils.build_dataset('data/plantvillage/plantvillage dataset')
df

Unnamed: 0,Format,Species,Healthy,Disease,Folder,FileName
0,color,Apple,False,Apple_scab,data/plantvillage/plantvillage dataset\color\A...,00075aa8-d81a-4184-8541-b692b78d398a___FREC_Sc...
1,color,Apple,False,Apple_scab,data/plantvillage/plantvillage dataset\color\A...,01a66316-0e98-4d3b-a56f-d78752cd043f___FREC_Sc...
2,color,Apple,False,Apple_scab,data/plantvillage/plantvillage dataset\color\A...,01f3deaa-6143-4b6c-9c22-620a46d8be04___FREC_Sc...
3,color,Apple,False,Apple_scab,data/plantvillage/plantvillage dataset\color\A...,0208f4eb-45a4-4399-904e-989ac2c6257c___FREC_Sc...
4,color,Apple,False,Apple_scab,data/plantvillage/plantvillage dataset\color\A...,023123cb-7b69-4c9f-a521-766d7c8543bb___FREC_Sc...
...,...,...,...,...,...,...
162911,segmented,Tomato,False,Tomato_Yellow_Leaf_Curl_Virus,data/plantvillage/plantvillage dataset\segment...,ffb295c9-f14e-4a15-831a-bf905da7fcb6___UF.GRC_...
162912,segmented,Tomato,False,Tomato_Yellow_Leaf_Curl_Virus,data/plantvillage/plantvillage dataset\segment...,ffe08ccc-c55e-4ca2-9234-2906b98b8d05___YLCV_NR...
162913,segmented,Tomato,False,Tomato_Yellow_Leaf_Curl_Virus,data/plantvillage/plantvillage dataset\segment...,ffe996e5-c8dc-47b7-bca2-4fc25e5ac57c___UF.GRC_...
162914,segmented,Tomato,False,Tomato_Yellow_Leaf_Curl_Virus,data/plantvillage/plantvillage dataset\segment...,fff42f1b-7ec4-46e3-9269-45932e63635e___YLCV_GC...


In [3]:
# === Filtramos por formato 'color' y armamos etiquetas ===
df = df[df['Format'] == 'color'].copy()
df['label'] = df['Healthy'].apply(lambda x: 0 if x else 1)  # 0: sana, 1: enferma

# === Split Train / Validation ===
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

# === Aumentos para entrenamiento ===
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=25,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

# === Custom DataFrame-based Generator ===
def dataframe_generator(datagen, dataframe, batch_size, img_size=(224, 224), shuffle=True):
    while True:
        if shuffle:
            dataframe = dataframe.sample(frac=1).reset_index(drop=True)

        for i in range(0, len(dataframe), batch_size):
            batch_df = dataframe.iloc[i:i+batch_size]
            batch_images = []
            batch_labels = []

            for _, row in batch_df.iterrows():
                img_path = os.path.join(row['Folder'], row['FileName'])
                img = image.load_img(img_path, target_size=img_size)
                img_array = image.img_to_array(img)
                img_array = img_array.reshape((1,) + img_array.shape)
                aug_iter = datagen.flow(img_array, batch_size=1)
                aug_img = next(aug_iter)[0]
                batch_images.append(aug_img)
                batch_labels.append(row['label'])

            yield np.array(batch_images), np.array(batch_labels)

# === Crear generadores ===
batch_size = 32
train_gen = dataframe_generator(train_datagen, train_df, batch_size=batch_size)
val_gen = dataframe_generator(val_datagen, val_df, batch_size=batch_size, shuffle=False)

train_gen

<generator object dataframe_generator at 0x000002108E5397E0>

In [4]:
import tensorflow as tf

print("Versión de TensorFlow:", tf.__version__)
print("Dispositivos disponibles:")
print(tf.config.list_physical_devices())

Versión de TensorFlow: 2.10.0
Dispositivos disponibles:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [5]:
# === Parámetros
img_shape = (224, 224, 3)
lr = 1e-4
epochs = 10
steps_per_epoch = len(train_df) // batch_size
validation_steps = len(val_df) // batch_size

# === Cargar ResNet50 base (sin la cabeza final)
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_tensor=Input(shape=img_shape)
)

for layer in base_model.layers:
    layer.trainable = False

# # === Congelar capas del modelo base
# base_model.trainable = False

# === Agregar nuevas capas
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(1, activation='sigmoid')(x)  # salida binaria

model = Model(inputs=base_model.input, outputs=output)

# === Compilar el modelo
model.compile(
    optimizer=Adam(learning_rate=lr),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# === Entrenamiento
history = model.fit(
    train_gen,
    steps_per_epoch=steps_per_epoch,
    epochs=epochs,
    validation_data=val_gen,
    validation_steps=validation_steps
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
