In [None]:
# -*- coding: utf-8 -*-
"""New_Data.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1op5L6cCskTiujUSKbp50PlXNTxPpftt5
"""

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from sklearn.utils import class_weight

"""### **Set Constants**"""

EPOCHS = 50
IMAGE_SIZE = (128, 128)
INPUT_SHAPE = (128, 128, 3)
# DATASET_DIR = "/kaggle/input/sugarcane-leaf-disease-dataset"
SEED = 123
BATCH_SIZE = 32
BUFFER_SIZE = 250
FINE_TUNE_POINT = 100
LEARNING_RATE = 0.001

# from google.colab import drive
# drive.mount('/content/drive')

DATASET_DIR = '../data/arcgis-survey-images-new'

"""### **Load Image Datasets**"""

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATASET_DIR,
    labels="inferred",
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    shuffle=True,
    seed=SEED,
    validation_split=0.2,
    subset="training"
)

validation_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATASET_DIR,
    labels="inferred",
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    seed=SEED,
    validation_split=0.2,
    subset="validation"
)

# Calcular los pesos de clase
class_names = train_ds.class_names
y_train = np.concatenate([y for x, y in train_ds], axis=0)
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights = dict(enumerate(class_weights))
print("Pesos de clase:", class_weights)

data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal_and_vertical'),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.1),
    tf.keras.layers.RandomContrast(0.1),
])

augmented_train_ds = train_ds.map(
    lambda x, y: (data_augmentation(x, training=True), y))

for example_image, example_label in train_ds.take(1):
    break

print(f"Image Shape: {example_image.shape}")

"""### **Split Validation Dataset into a Validation DS and Test DS**"""

validation_ds = validation_ds.shard(num_shards=2, index=0)
test_ds = validation_ds.shard(num_shards=2, index=1)

"""### **Visualize a Set of Training Data**"""

plt.figure(figsize=(12, 9))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.suptitle("Sugarcane Leafs (Healthy or Diseased)")
        plt.axis("off")

train_ds = train_ds.cache().shuffle(BUFFER_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)
validation_ds = validation_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

"""### **Load Base Model**"""

from tensorflow.keras.applications import MobileNetV2

# Carga del modelo base con MobileNetV2
base_model = MobileNetV2(input_shape=INPUT_SHAPE,
                         include_top=False,
                         weights='imagenet')

# Ajuste de las capas del modelo base
for layer in base_model.layers[:FINE_TUNE_POINT]:  # Ajusta el número de capas descongeladas
    layer.trainable = False

# Definir el modelo completo con capas adicionales y Batch Normalization
model = tf.keras.models.Sequential([
    tf.keras.layers.Rescaling(1./255),
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(len(class_names), activation='softmax')
])

# Compilar el modelo con una tasa de aprendizaje adecuada
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

"""### **Callbacks para Mejorar el Entrenamiento**"""

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-6
)

checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'best_model.keras',
    monitor='val_loss',
    save_best_only=True
)

callbacks = [early_stopping, reduce_lr, checkpoint]

"""### **Fit the Model con Class Weights y Callbacks**"""

history = model.fit(
    train_ds,
    validation_data=validation_ds,
    epochs=EPOCHS,
    class_weight=class_weights,
    callbacks=callbacks
)

"""### **Classification Report**"""

metrics = history.history
plt.figure(figsize=(16, 6))
plt.subplot(1, 2, 1)
plt.plot(history.epoch, metrics['loss'], label='Training Loss')
plt.plot(history.epoch, metrics['val_loss'], label='Validation Loss')
plt.legend()
plt.ylim([0, max(max(metrics['loss']), max(metrics['val_loss']))])
plt.ylabel('Loss')
plt.xlabel('Epoch')

plt.subplot(1, 2, 2)
plt.plot(history.epoch, metrics['accuracy'], label='Training Accuracy')
plt.plot(history.epoch, metrics['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.ylim([0, 1])
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.show()

# Evaluación del modelo en el conjunto de test
test_results = model.evaluate(test_ds, return_dict=True)
print("Resultados de evaluación en test set:")
for metric, value in test_results.items():
    print(f"{metric}: {value:.4f}")

# Predicción de etiquetas en el conjunto de test
y_pred = model.predict(test_ds)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.concatenate([y for x, y in test_ds], axis=0)

# Reporte de clasificación
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from sklearn.preprocessing import label_binarize

print("Reporte de clasificación:")
print(classification_report(y_true, y_pred_classes, target_names=class_names))

# Matriz de confusión
conf_matrix = confusion_matrix(y_true, y_pred_classes)

plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, xticklabels=class_names, yticklabels=class_names, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Matriz de Confusión')
plt.show()

# Curvas ROC y AUC para cada clase
y_true_bin = label_binarize(y_true, classes=range(len(class_names)))
plt.figure(figsize=(10, 8))

for i in range(len(class_names)):
    fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_pred[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'Clase {class_names[i]} (AUC = {roc_auc:.2f})')

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Tasa de Falsos Positivos (FPR)')
plt.ylabel('Tasa de Verdaderos Positivos (TPR)')
plt.title('Curvas ROC')
plt.legend(loc="lower right")
plt.show()


Found 3757 files belonging to 5 classes.
Using 3006 files for training.
Found 3757 files belonging to 5 classes.
Using 751 files for validation.
Pesos de clase: {0: 0.905421686746988, 1: 1.8329268292682928, 2: 0.8906666666666667, 3: 0.6534782608695652, 4: 1.4348448687350834}
Image Shape: (32, 128, 128, 3)
Epoch 1/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 274ms/step - accuracy: 0.4151 - loss: 2.2669 - val_accuracy: 0.5521 - val_loss: 2.4347 - learning_rate: 0.0010
Epoch 2/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 248ms/step - accuracy: 0.6969 - loss: 1.2763 - val_accuracy: 0.5990 - val_loss: 3.1012 - learning_rate: 0.0010
Epoch 3/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 252ms/step - accuracy: 0.7704 - loss: 0.9417 - val_accuracy: 0.7005 - val_loss: 2.2216 - learning_rate: 0.0010
Epoch 4/50
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 248ms/step - accuracy: 0.8429 - loss: 0.7701 - val_accur