In [2]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model

# 1. Define el tamaño de entrada (con 3 canales)
INPUT_SHAPE = (224, 224, 3)

# 2. Carga el modelo base sin la capa de clasificación
base_model = MobileNetV2(weights='imagenet', 
                         include_top=False, 
                         input_shape=INPUT_SHAPE)

# 3. Congela el modelo base (no queremos re-entrenarlo)
base_model.trainable = False

# 4. Crea tu extractor:
#    Toma la salida del modelo base y aplícale un Pooling
#    Esto convierte el mapa de características (ej. 7x7x1280) 
#    en un solo vector (ej. 1x1280)
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Este es tu modelo extractor final
feature_extractor = Model(inputs=base_model.input, outputs=x)

# Puedes ver la estructura
feature_extractor.summary()

2025-10-29 08:53:52.933326: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-10-29 08:53:52.988787: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-10-29 08:53:54.532912: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-10-29 08:53:56.056265: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [5]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
import os
# Directorio de tu dataset dividido
DATA_DIR = "../dataset_split"
TARGET_SIZE = (224, 224)
BATCH_SIZE = 32

# 1. Crea el generador.
#    OJO: Usamos la función de pre-procesamiento de MobileNetV2
#    El 'color_mode' por defecto es 'rgb', así que cargará
#    tu imagen gris y la duplicará a 3 canales automáticamente.
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# 2. Crea los generadores para cada set
train_generator = datagen.flow_from_directory(
    os.path.join(DATA_DIR, 'train'),
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False  # Importante: no barajar para mantener orden con las etiquetas
)

val_generator = datagen.flow_from_directory(
    os.path.join(DATA_DIR, 'val'),
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

test_generator = datagen.flow_from_directory(
    os.path.join(DATA_DIR, 'test'),
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

Found 8783 images belonging to 3 classes.
Found 2509 images belonging to 3 classes.
Found 1258 images belonging to 3 classes.


In [6]:
import numpy as np

print("Extrayendo características del set de entrenamiento...")
features_train = feature_extractor.predict(train_generator)
labels_train = train_generator.classes

print("Extrayendo características del set de validación...")
features_val = feature_extractor.predict(val_generator)
labels_val = val_generator.classes

print("Extrayendo características del set de prueba...")
features_test = feature_extractor.predict(test_generator)
labels_test = test_generator.classes

print(f"Forma de las características de train: {features_train.shape}")
# Salida esperada: (N_imagenes_train, 1280)

Extrayendo características del set de entrenamiento...


2025-10-29 08:56:49.971010: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 51380224 exceeds 10% of free system memory.
2025-10-29 08:56:50.018864: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 51380224 exceeds 10% of free system memory.
2025-10-29 08:56:50.059360: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 25690112 exceeds 10% of free system memory.
2025-10-29 08:56:50.078876: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 154140672 exceeds 10% of free system memory.
2025-10-29 08:56:50.180490: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 156905472 exceeds 10% of free system memory.


[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 705ms/step
Extrayendo características del set de validación...
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 688ms/step
Extrayendo características del set de prueba...
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 680ms/step
Forma de las características de train: (8783, 1280)


In [None]:
import pandas as pd
import numpy as np

print("Guardando características en archivos CSV...")

# 1. Obtener el mapeo de clases (para referencia)
#    (Ej: {'COVID': 0, 'Normal': 1, 'Tuberculosis': 2})
class_indices = train_generator.class_indices
print(f"Mapeo de clases (Etiquetas): {class_indices}")

# --- Guardar datos de ENTRENAMIENTO ---
# Crear un DataFrame con las características
df_train = pd.DataFrame(features_train)
# Añadir las etiquetas numéricas como una nueva columna
df_train['label'] = labels_train
# Guardar en CSV
df_train.to_csv('train_features.csv', index=False)
print(" - Archivo 'train_features.csv' guardado.")


# --- Guardar datos de VALIDACIÓN ---
df_val = pd.DataFrame(features_val)
df_val['label'] = labels_val
df_val.to_csv('val_features.csv', index=False)
print(" - Archivo 'val_features.csv' guardado.")


# --- Guardar datos de PRUEBA ---
df_test = pd.DataFrame(features_test)
df_test['label'] = labels_test
df_test.to_csv('test_features.csv', index=False)
print(" - Archivo 'test_features.csv' guardado.")

print("\n¡Archivos CSV generados exitosamente!")




Guardando características en archivos CSV...
Mapeo de clases (Etiquetas): {'Covid': 0, 'Normal': 1, 'Tuberculosis': 2}
 - Archivo 'train_features.csv' guardado.
 - Archivo 'val_features.csv' guardado.
 - Archivo 'test_features.csv' guardado.

¡Archivos CSV generados exitosamente!


In [7]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# 1. Modelo SVM (Support Vector Machine)
print("\nEntrenando SVM...")
svm_model = SVC(kernel='rbf', C=1.0) # Kernel RBF es una buena opción por defecto
svm_model.fit(features_train, labels_train)

# 2. Modelo Random Forest
print("Entrenando Random Forest...")
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10)
rf_model.fit(features_train, labels_train)

# 3. Modelo K-Nearest Neighbors
print("Entrenando KNN...")
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(features_train, labels_train)


Entrenando SVM...
Entrenando Random Forest...
Entrenando KNN...


In [8]:
# Obtener nombres de las clases para el reporte
class_names = list(train_generator.class_indices.keys())

# Evaluar SVM
print("\n--- Resultados SVM ---")
preds_svm = svm_model.predict(features_test)
print(f"Accuracy (SVM): {accuracy_score(labels_test, preds_svm):.4f}")
print(classification_report(labels_test, preds_svm, target_names=class_names))

# Evaluar Random Forest
print("\n--- Resultados Random Forest ---")
preds_rf = rf_model.predict(features_test)
print(f"Accuracy (RF): {accuracy_score(labels_test, preds_rf):.4f}")
print(classification_report(labels_test, preds_rf, target_names=class_names))

# Evaluar KNN
print("\n--- Resultados KNN ---")
preds_knn = knn_model.predict(features_test)
print(f"Accuracy (KNN): {accuracy_score(labels_test, preds_knn):.4f}")
print(classification_report(labels_test, preds_knn, target_names=class_names))  


--- Resultados SVM ---
Accuracy (SVM): 0.9730
              precision    recall  f1-score   support

       Covid       0.95      0.98      0.96       408
      Normal       0.98      0.98      0.98       529
Tuberculosis       0.99      0.97      0.98       321

    accuracy                           0.97      1258
   macro avg       0.97      0.97      0.97      1258
weighted avg       0.97      0.97      0.97      1258


--- Resultados Random Forest ---
Accuracy (RF): 0.9030
              precision    recall  f1-score   support

       Covid       0.86      0.90      0.88       408
      Normal       0.90      0.95      0.92       529
Tuberculosis       0.97      0.83      0.90       321

    accuracy                           0.90      1258
   macro avg       0.91      0.89      0.90      1258
weighted avg       0.91      0.90      0.90      1258


--- Resultados KNN ---
Accuracy (KNN): 0.9348
              precision    recall  f1-score   support

       Covid       0.96      0.87

In [10]:
# --- Celda para verificar sobreajuste ---

print("Evaluando modelos en el set de ENTRENAMIENTO (para comparar)...")

# Evaluar SVM en datos de ENTRENAMIENTO
preds_svm_train = svm_model.predict(features_train)
print(f"\nAccuracy (SVM) en TRAIN: {accuracy_score(labels_train, preds_svm_train):.4f}")
# Evaluar SVM en datos de PRUEBA (ya lo tienes, pero para comparar)
print(f"Accuracy (SVM) en TEST:  {accuracy_score(labels_test, preds_svm):.4f}")

# Evaluar Random Forest en datos de ENTRENAMIENTO
preds_rf_train = rf_model.predict(features_train)
print(f"\nAccuracy (RF) en TRAIN: {accuracy_score(labels_train, preds_rf_train):.4f}")
print(f"Accuracy (RF) en TEST:  {accuracy_score(labels_test, preds_rf):.4f}")

Evaluando modelos en el set de ENTRENAMIENTO (para comparar)...

Accuracy (SVM) en TRAIN: 0.9826
Accuracy (SVM) en TEST:  0.9730

Accuracy (RF) en TRAIN: 0.9891
Accuracy (RF) en TEST:  0.9030
