In [2]:
import json
import os

import tqdm
import pandas as pd

import src.data.Dataset as dt

import tensorflow as tf
# import tensorboard
from keras.callbacks import TensorBoard
from tensorflow.keras.applications import ResNet50, MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [3]:
required_paths = ["/ai4eutils", "/CameraTraps", "/yolov5"]
python_path = os.environ.get("PYTHONPATH", "")
root_path = os.getcwd()

for path in required_paths:
    if not any(p.endswith(path) for p in python_path.split(":")):
        python_path += f":{root_path}/data/external{path}"

os.environ["PYTHONPATH"] = python_path

!echo "PYTHONPATH: $PYTHONPATH"

PYTHONPATH: :/Users/carlos/WORKSPACE/MegaClassifier/data/external/ai4eutils:/Users/carlos/WORKSPACE/MegaClassifier/data/external/CameraTraps:/Users/carlos/WORKSPACE/MegaClassifier/data/external/yolov5


In [4]:
DATASET_PATH = os.path.abspath("./dataset/datasetFiltered")

TRAIN_CSV = os.path.abspath("./data/processed/train/10000Train.csv")
VALIDATION_CSV = os.path.abspath("./data/processed/validation/10000Validation.csv")
TEST_CSV = os.path.abspath("./data/processed/test/10000Test.csv")

RESULT_CSV = os.path.abspath("./reports/model_coberture/")

print(f"DATASET_PATH:   {DATASET_PATH}")
print(f"TRAIN_CSV:      {TRAIN_CSV}")
print(f"VALIDATION_CSV: {VALIDATION_CSV}")
print(f"TEST_CSV:       {TEST_CSV}")
print(f"RESULT_JSON:    {RESULT_CSV}")


DATASET_PATH:   /Users/carlos/WORKSPACE/MegaClassifier/dataset/datasetFiltered
TRAIN_CSV:      /Users/carlos/WORKSPACE/MegaClassifier/data/processed/train/10000Train.csv
VALIDATION_CSV: /Users/carlos/WORKSPACE/MegaClassifier/data/processed/validation/10000Validation.csv
TEST_CSV:       /Users/carlos/WORKSPACE/MegaClassifier/data/processed/test/10000Test.csv
RESULT_JSON:    /Users/carlos/WORKSPACE/MegaClassifier/reports/model_coberture


In [5]:
train_csv = dt.load_from_csv(TRAIN_CSV)
vali_csv = dt.load_from_csv(VALIDATION_CSV)
test_csv = dt.load_from_csv(TEST_CSV)

The file /Users/carlos/WORKSPACE/MegaClassifier/data/processed/train/10000Train.csv has been successfully opened.
The file /Users/carlos/WORKSPACE/MegaClassifier/data/processed/validation/10000Validation.csv has been successfully opened.
The file /Users/carlos/WORKSPACE/MegaClassifier/data/processed/test/10000Test.csv has been successfully opened.


In [6]:
columns = ['file_name', 'label']

data = []
for _, row in train_csv.iterrows():
    file_name = os.path.join(DATASET_PATH, row['file_name'])
    label = row['label']
    data.append([file_name, label])
train_dataset = pd.DataFrame(data, columns=columns)
train_dataset['label'] = train_dataset['label'].astype(str)

data = []
for _, row in vali_csv.iterrows():
    file_name = os.path.join(DATASET_PATH, row['file_name'])
    label = row['label']
    data.append([file_name, label])
vali_dataset = pd.DataFrame(data, columns=columns)
vali_dataset['label'] = vali_dataset['label'].astype(str)

data = []
for _, row in test_csv.iterrows():
    file_name = os.path.join(DATASET_PATH, row['file_name'])
    label = row['label']
    data.append([file_name, label])
test_dataset = pd.DataFrame(data, columns=columns)
test_dataset['label'] = test_dataset['label'].astype(str)

In [7]:
img_weight = 224
img_height = 224

batch_size = 64

seed = 42

In [8]:
datagen_train = ImageDataGenerator(
    rescale=1.0 / 255,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2]
)
datagen_val_test = ImageDataGenerator(rescale=1.0 / 255)

train_generator = datagen_train.flow_from_dataframe(
    dataframe=train_dataset,
    x_col="file_name",
    y_col="label",
    target_size=(img_weight, img_height),
    batch_size=batch_size,
    class_mode="binary",
    shuffle=True,
    seed=seed,
)

val_generator = datagen_val_test.flow_from_dataframe(
    dataframe=vali_dataset,
    x_col="file_name",
    y_col="label",
    target_size=(img_weight, img_height),
    batch_size=batch_size,
    class_mode="binary",
    shuffle=True,
    seed=seed,
)

test_generator = datagen_val_test.flow_from_dataframe(
    dataframe=test_dataset,
    x_col="file_name",
    y_col="label",
    target_size=(img_weight, img_height),
    batch_size=batch_size,
    class_mode="binary",
    shuffle=False,
)

Found 20732 validated image filenames belonging to 2 classes.
Found 4442 validated image filenames belonging to 2 classes.
Found 4444 validated image filenames belonging to 2 classes.


In [9]:
mobilenetV2 = MobileNetV2(
    include_top=False, 
    input_shape=(img_weight, img_height, 3),
    weights="imagenet",
)

# Congelar las capas del modelo base
mobilenetV2.trainable = False

model = Sequential([
    mobilenetV2,
    GlobalAveragePooling2D(),
    Dense(1, activation='sigmoid')
])

# Compilar el modelo
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Mostrar el resumen del modelo
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenetv2_1.00_224 (Funct  (None, 7, 7, 1280)       2257984   
 ional)                                                          
                                                                 
 global_average_pooling2d (G  (None, 1280)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 1)                 1281      
                                                                 
Total params: 2,259,265
Trainable params: 1,281
Non-trainable params: 2,257,984
_________________________________________________________________


In [None]:

# Definir el callback para guardar el mejor modelo basado en la menor pérdida de validación
checkpoint = ModelCheckpoint('./models/CNNs/original_MobileNetV2_V1.h5', 
                             monitor='val_loss',  # Monitoreamos la pérdida en validación
                             save_best_only=True,  # Guardamos solo el mejor modelo
                             mode='min',           # Queremos minimizar la pérdida
                             verbose=1)            # Muestra un mensaje cada vez que guarda


tensorBoard = TensorBoard(log_dir='./logs/original/MobileNetV2/version_1')

# Entrenar el modelo con el callback de checkpoint
history = model.fit(
    train_generator,
    epochs=100,
    validation_data=val_generator,
    callbacks=[checkpoint, tensorBoard]
)

In [None]:
import matplotlib.pyplot as plt

# Graficar la precisión y pérdida en entrenamiento y validación
def plot_training_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs_range = range(len(acc))

    plt.figure(figsize=(20, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()

# Llamar a la función para graficar
plot_training_history(history)

In [10]:
from tensorflow.keras.models import load_model

# Cargar el mejor modelo guardado durante el entrenamiento
best_model = load_model('./models/CNNs/original_MobileNetV2_V1.h5')

# Evaluar o usar el mejor modelo
best_model.evaluate(test_generator)

2024-09-28 20:46:44.888813: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz




[0.20741039514541626, 0.9120162129402161]