In [None]:
pip install numpy==1.24.4




In [3]:
import numpy as np
import cv2
from hdfs import InsecureClient
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import gc

# Crear sesión de Spark
from pyspark.sql import SparkSession

In [4]:
# Crear sesión de Spark
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("FacialEmotionDetection").getOrCreate()

# Cliente HDFS
client = InsecureClient('http://namenode:9870', user='hadoop')  

# Cargar Datos

In [5]:
# FUNCIONES AUXILIARES
def load_images_from_hdfs(split='train', base_dir='/user/hadoop/datasets_imagenes/facial_expressions', target_size=(48, 48)):
    images = []
    labels = []

    split_path = f"{base_dir}/{split}"
    try:
        classes = client.list(split_path)
        for label in classes:
            class_path = f"{split_path}/{label}"
            files = client.list(class_path)

            for filename in files:
                file_path = f"{class_path}/{filename}"
                try:
                    if client.status(file_path)['type'] == 'FILE':
                        with client.read(file_path) as reader:
                            file_bytes = np.asarray(bytearray(reader.read()), dtype=np.uint8)
                            img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
                            if img is not None:
                                img = cv2.resize(img, target_size)
                                img = img_to_array(img)
                                images.append(img)
                                labels.append(label)
                except Exception as e:
                    print(f"Error leyendo {file_path}: {e}")

        print(f"[{split.upper()}] Total de imágenes cargadas: {len(images)}")
        print(f"[{split.upper()}] Clases detectadas: {set(labels)}")
        return np.array(images), np.array(labels)

    except Exception as e:
        print(f"Error accediendo a {split_path}: {e}")
        return np.array([]), np.array([])


In [6]:
X_train, y_train = load_images_from_hdfs('train')
X_val, y_val = load_images_from_hdfs('val')
X_test, y_test = load_images_from_hdfs('test')

[TRAIN] Total de imágenes cargadas: 3716
[TRAIN] Clases detectadas: {'Surprize', 'Fear', 'Neutral', 'Sad', 'Disgust', 'Happy', 'Angry'}
[VAL] Total de imágenes cargadas: 388
[VAL] Clases detectadas: {'Surprize', 'Fear', 'Neutral', 'Sad', 'Disgust', 'Happy', 'Angry'}
[TEST] Total de imágenes cargadas: 388
[TEST] Clases detectadas: {'Surprize', 'Fear', 'Neutral', 'Sad', 'Disgust', 'Happy', 'Angry'}


## Normalizar para entrenamiento

In [7]:
# Normalización
X_train, X_val, X_test = X_train / 255.0, X_val / 255.0, X_test / 255.0

# Codificación de etiquetas
encoder = LabelEncoder()
y_train_enc = to_categorical(encoder.fit_transform(y_train))
y_val_enc = to_categorical(encoder.transform(y_val))
y_test_enc = to_categorical(encoder.transform(y_test))

# MODELO CNN ROBUSTO

In [8]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 3)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),

    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(7, activation='softmax')  # 7 clases de emoción
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
print(model.summary())

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


None


# Entrenamiento

In [None]:
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)
]

history = model.fit(
    X_train, y_train_enc,
    validation_data=(X_val, y_val_enc),
    epochs=30,
    batch_size=32,
    callbacks=callbacks
)

Epoch 1/30
