In [2]:
import src.utils as utils
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam


In [3]:
df = utils.build_dataset('data/plantvillage/plantvillage dataset')
df

Unnamed: 0,Format,Species,Healthy,Disease,Folder,FileName
0,color,Strawberry,True,,data/plantvillage/plantvillage dataset/color/S...,8f558908-aa1b-4a86-855a-5094c2392e5a___RS_HL 1...
1,color,Strawberry,True,,data/plantvillage/plantvillage dataset/color/S...,b8e9ed27-8e37-4214-9206-f8c0ef21cf4d___RS_HL 4...
2,color,Strawberry,True,,data/plantvillage/plantvillage dataset/color/S...,abdd34a0-ab02-41e0-95a3-a014ab863ec2___RS_HL 1...
3,color,Strawberry,True,,data/plantvillage/plantvillage dataset/color/S...,d1aee44a-b6bb-45b9-b7b6-5d553add8fd1___RS_HL 2...
4,color,Strawberry,True,,data/plantvillage/plantvillage dataset/color/S...,3d28c3ea-8419-4e09-addd-211e3828e39f___RS_HL 1...
...,...,...,...,...,...,...
162911,segmented,Soybean,True,,data/plantvillage/plantvillage dataset/segment...,f6579a78-e6eb-4a65-82f7-7be30f100a07___RS_HL 5...
162912,segmented,Soybean,True,,data/plantvillage/plantvillage dataset/segment...,356eb227-3e6d-4164-b84d-31f590293644___RS_HL 4...
162913,segmented,Soybean,True,,data/plantvillage/plantvillage dataset/segment...,5d3def53-fdb2-4106-ad31-c020e75bccea___RS_HL 7...
162914,segmented,Soybean,True,,data/plantvillage/plantvillage dataset/segment...,63d474df-5512-4ecc-9cd3-c0649c260668___RS_HL 7...


In [4]:
# === Filtramos por formato 'color' y armamos etiquetas ===
df = df[df['Format'] == 'color'].copy()
df['label'] = df['Healthy'].apply(lambda x: 0 if x else 1)  # 0: sana, 1: enferma

# === Split Train / Validation ===
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

# === Aumentos para entrenamiento ===
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=25,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

# === Custom DataFrame-based Generator ===
def dataframe_generator(datagen, dataframe, batch_size, img_size=(224, 224), shuffle=True):
    while True:
        if shuffle:
            dataframe = dataframe.sample(frac=1).reset_index(drop=True)

        for i in range(0, len(dataframe), batch_size):
            batch_df = dataframe.iloc[i:i+batch_size]
            batch_images = []
            batch_labels = []

            for _, row in batch_df.iterrows():
                img_path = os.path.join(row['Folder'], row['FileName'])
                img = image.load_img(img_path, target_size=img_size)
                img_array = image.img_to_array(img)
                img_array = img_array.reshape((1,) + img_array.shape)
                aug_iter = datagen.flow(img_array, batch_size=1)
                aug_img = next(aug_iter)[0]
                batch_images.append(aug_img)
                batch_labels.append(row['label'])

            yield np.array(batch_images), np.array(batch_labels)

# === Crear generadores ===
batch_size = 32
train_gen = dataframe_generator(train_datagen, train_df, batch_size=batch_size)
val_gen = dataframe_generator(val_datagen, val_df, batch_size=batch_size, shuffle=False)

train_gen

<generator object dataframe_generator at 0x320691900>

In [5]:
import tensorflow as tf

print("Versión de TensorFlow:", tf.__version__)
print("Dispositivos disponibles:")
print(tf.config.list_physical_devices())

Versión de TensorFlow: 2.16.2
Dispositivos disponibles:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [None]:
# === Parámetros
img_shape = (224, 224, 3)
lr = 1e-4
epochs = 10
steps_per_epoch = len(train_df) // batch_size
validation_steps = len(val_df) // batch_size

# === Cargar ResNet50 base (sin la cabeza final)
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_tensor=Input(shape=img_shape)
)

for layer in base_model.layers:
    layer.trainable = False

# # === Congelar capas del modelo base
# base_model.trainable = False

# === Agregar nuevas capas
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(1, activation='sigmoid')(x)  # salida binaria

model = Model(inputs=base_model.input, outputs=output)

# === Compilar el modelo
model.compile(
    optimizer=Adam(learning_rate=lr),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# === Entrenamiento
history = model.fit(
    train_gen,
    steps_per_epoch=steps_per_epoch,
    epochs=epochs,
    validation_data=val_gen,
    validation_steps=validation_steps
)


2025-07-24 11:46:02.026468: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-07-24 11:46:02.026509: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-07-24 11:46:02.026512: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-07-24 11:46:02.026939: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-07-24 11:46:02.026989: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/10


Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(None, 224, 224, 3))
2025-07-24 11:46:05.524247: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1357/1357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m403s[0m 291ms/step - accuracy: 0.6585 - loss: 0.6950 - val_accuracy: 0.7223 - val_loss: 0.5561
Epoch 2/10
[1m 479/1357[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m3:29[0m 238ms/step - accuracy: 0.6834 - loss: 0.6522