In [2]:
import pandas as pd
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

In [3]:
# Load the dataset
df = pd.read_csv("fer2013_images.csv")
print(df['label'].unique())

['sad' 'neutral' 'fear' 'surprise' 'angry' 'happy' 'disgust']


In [4]:
# Encode emotion names to numbers
label_encoder = LabelEncoder()
df['label_encoded'] = label_encoder.fit_transform(df['label'])

# Optional: print mapping
label_map = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Emotion label map:", label_map)

Emotion label map: {'angry': np.int64(0), 'disgust': np.int64(1), 'fear': np.int64(2), 'happy': np.int64(3), 'neutral': np.int64(4), 'sad': np.int64(5), 'surprise': np.int64(6)}


In [5]:
base_path = "fer2013"
import os
print(os.path.exists(base_path))

True


In [9]:
# Load and preprocess all images
import os
import cv2
import numpy as np

base_path = "fer2013"
IMG_SIZE = 48
X, y = [], []

LIMIT = 200  # ✅ Load only first 200 images for testing

for idx, row in df.iterrows():
    relative_path = row['filepath'].replace("fer2013_dataset/", "")
    img_path = os.path.join(base_path, relative_path)
    label = row['label']

    img = cv2.imread(img_path)
    if img is None:
        print("Missing:", img_path)
        continue

    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img / 255.0
    X.append(img)
    y.append(label)

    if len(X) >= LIMIT:
        print(f"✅ Loaded {LIMIT} images (testing mode)")
        break

X = np.array(X)
y = np.array(y)

print("X shape:", X.shape)
print("y shape:", y.shape)

✅ Loaded 200 images (testing mode)
X shape: (200, 48, 48, 3)
y shape: (200,)


In [12]:
# One - hot encode the labels
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# ⿡ Encode string labels to integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)   # 'sad' -> 4, 'happy' -> 3, etc.

# ⿢ Convert to one-hot
num_classes = len(label_encoder.classes_)
y_onehot = to_categorical(y_encoded, num_classes=num_classes)

print("Encoded y shape:", y_onehot.shape)
print("Classes:", label_encoder.classes_)

Encoded y shape: (200, 1)
Classes: ['sad']


In [13]:
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
import numpy as np

# Save the arrays
np.save('X_train.npy', X_train)
np.save('X_test.npy', X_test)
np.save('y_train.npy', y_train)
np.save('y_test.npy', y_test)