In [1]:
#Librerias necesarias
import numpy as np
import matplotlib.pyplot as plt
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from keras.utils import to_categorical

In [3]:
# Cargar datos preprocesados
with open('preprocessed_emojis.pkl', 'rb') as f:
    data = pickle.load(f)
X = np.array([img for img, label in data])  # numpy array (N, H, W)
y = np.array([label for img, label in data])  # numpy array (N,)
print('Shape X:', X.shape)
print('Shape y:', y.shape)

Shape X: (2530, 32, 32)
Shape y: (2530,)


In [4]:
# Si las imágenes no son binarias ni 32x32, procesarlas
import cv2
def preprocess_image(img):
    # Convertir a escala de grises
    if img.ndim == 3:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    # Binarizar
    _, img_bin = cv2.threshold(img, 0, 1, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    # Encontrar contornos
    contours, _ = cv2.findContours((img_bin*255).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if len(contours) == 0:
        return np.zeros((32,32), dtype=np.uint8)
    x, y, w, h = cv2.boundingRect(np.vstack(contours))
    emoji_crop = img_bin[y:y+h, x:x+w]
    # Redimensionar manteniendo aspecto
    scale = 32.0 / max(h, w)
    emoji_resized = cv2.resize(emoji_crop, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_NEAREST)
    # Centrar
    out = np.zeros((32,32), dtype=np.uint8)
    y_off = (32 - emoji_resized.shape[0]) // 2
    x_off = (32 - emoji_resized.shape[1]) // 2
    out[y_off:y_off+emoji_resized.shape[0], x_off:x_off+emoji_resized.shape[1]] = emoji_resized
    return out

X_bin = np.array([preprocess_image(img) for img in X])
print('Shape X_bin:', X_bin.shape)

Shape X_bin: (2530, 32, 32)


In [5]:
# Dividir datos
X_train, X_test, y_train, y_test = train_test_split(X_bin, y, test_size=0.2, random_state=42, stratify=y)
print('Train:', X_train.shape, y_train.shape)
print('Test:', X_test.shape, y_test.shape)

Train: (2024, 32, 32) (2024,)
Test: (506, 32, 32) (506,)


In [7]:
# Preparar datos para MLP
X_train_flat = X_train.reshape((X_train.shape[0], -1))
X_test_flat = X_test.reshape((X_test.shape[0], -1))

# Convertir etiquetas de string a índices
classes = np.unique(y)
class_to_idx = {cls: idx for idx, cls in enumerate(classes)}
y_train_idx = np.array([class_to_idx[label] for label in y_train])
y_test_idx = np.array([class_to_idx[label] for label in y_test])

y_train_cat = to_categorical(y_train_idx)
y_test_cat = to_categorical(y_test_idx)

mlp = Sequential([
    Dense(128, activation='relu', input_shape=(32*32,)),
    Dense(64, activation='relu'),
    Dense(y_train_cat.shape[1], activation='softmax')
])
mlp.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
mlp.fit(X_train_flat, y_train_cat, epochs=20, batch_size=32, validation_split=0.1, verbose=2)

y_pred_mlp = np.argmax(mlp.predict(X_test_flat), axis=1)
print('MLP Accuracy:', accuracy_score(y_test_idx, y_pred_mlp))
print(classification_report(y_test_idx, y_pred_mlp, target_names=classes))

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


57/57 - 1s - 14ms/step - accuracy: 0.5755 - loss: 1.0164 - val_accuracy: 0.6847 - val_loss: 0.8177
Epoch 2/20
57/57 - 0s - 3ms/step - accuracy: 0.7787 - loss: 0.5992 - val_accuracy: 0.7192 - val_loss: 0.7524
Epoch 3/20
57/57 - 0s - 3ms/step - accuracy: 0.8556 - loss: 0.4023 - val_accuracy: 0.7438 - val_loss: 0.6749
Epoch 4/20
57/57 - 0s - 3ms/step - accuracy: 0.8995 - loss: 0.2885 - val_accuracy: 0.7438 - val_loss: 0.7488
Epoch 5/20
57/57 - 0s - 3ms/step - accuracy: 0.9368 - loss: 0.1979 - val_accuracy: 0.7537 - val_loss: 0.7345
Epoch 6/20
57/57 - 0s - 3ms/step - accuracy: 0.9632 - loss: 0.1390 - val_accuracy: 0.7537 - val_loss: 0.7520
Epoch 7/20
57/57 - 0s - 3ms/step - accuracy: 0.9709 - loss: 0.1076 - val_accuracy: 0.7537 - val_loss: 0.8124
Epoch 8/20
57/57 - 0s - 3ms/step - accuracy: 0.9808 - loss: 0.0789 - val_accuracy: 0.7488 - val_loss: 0.8280
Epoch 9/20
57/57 - 0s - 3ms/step - accuracy: 0.9835 - loss: 0.0669 - val_accuracy: 0.7586 - val_loss: 0.8383
Epoch 10/20
57/57 - 0s - 3ms/

In [9]:
# Preparar datos para CNN
X_train_cnn = X_train[..., np.newaxis]  # (N, 32, 32, 1)
X_test_cnn = X_test[..., np.newaxis]

cnn = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(32,32,1)),
    MaxPooling2D((2,2)),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(y_train_cat.shape[1], activation='softmax')
])
cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
cnn.fit(X_train_cnn, y_train_cat, epochs=20, batch_size=32, validation_split=0.1, verbose=2)

y_pred_cnn = np.argmax(cnn.predict(X_test_cnn), axis=1)
print('CNN Accuracy:', accuracy_score(y_test_idx, y_pred_cnn))
print(classification_report(y_test_idx, y_pred_cnn, target_names=classes))

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


57/57 - 1s - 21ms/step - accuracy: 0.5722 - loss: 1.0723 - val_accuracy: 0.7537 - val_loss: 0.6171
Epoch 2/20
57/57 - 0s - 8ms/step - accuracy: 0.7727 - loss: 0.6009 - val_accuracy: 0.8374 - val_loss: 0.4213
Epoch 3/20
57/57 - 1s - 9ms/step - accuracy: 0.8418 - loss: 0.4231 - val_accuracy: 0.8670 - val_loss: 0.3297
Epoch 4/20
57/57 - 1s - 9ms/step - accuracy: 0.8688 - loss: 0.3304 - val_accuracy: 0.8473 - val_loss: 0.3162
Epoch 5/20
57/57 - 0s - 8ms/step - accuracy: 0.8896 - loss: 0.2768 - val_accuracy: 0.8719 - val_loss: 0.3089
Epoch 6/20
57/57 - 0s - 7ms/step - accuracy: 0.9154 - loss: 0.2237 - val_accuracy: 0.8768 - val_loss: 0.2427
Epoch 7/20
57/57 - 0s - 8ms/step - accuracy: 0.9242 - loss: 0.1993 - val_accuracy: 0.8571 - val_loss: 0.3066
Epoch 8/20
57/57 - 1s - 10ms/step - accuracy: 0.9396 - loss: 0.1549 - val_accuracy: 0.8966 - val_loss: 0.3028
Epoch 9/20
57/57 - 0s - 8ms/step - accuracy: 0.9412 - loss: 0.1534 - val_accuracy: 0.8916 - val_loss: 0.2279
Epoch 10/20
57/57 - 0s - 7ms