In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import cv2

In [2]:
# Ensure reproducibility
tf.random.set_seed(42)
np.random.seed(42)

# Project Configuration
class Config:
    # Data Parameters
    IMAGE_SIZE = (224, 224)
    BATCH_SIZE = 32
    NUM_CLASSES = 7  # Typical facial expressions: Happy, Sad, Angry, Surprise, Neutral, Disgust, Fear
    
    # Training Parameters
    EPOCHS = 50
    LEARNING_RATE = 1e-4
    
    # Paths
    DATA_DIR = 'data/fer2013'
    MODEL_SAVE_PATH = 'models/facial_expression_model.h5'
    
    # GPU Configuration
    def configure_gpu():
        gpus = tf.config.experimental.list_physical_devices('GPU')
        if gpus:
            try:
                for gpu in gpus:
                    tf.config.experimental.set_memory_growth(gpu, True)
            except RuntimeError as e:
                print(e)

# Configure GPU if available
Config.configure_gpu()

In [3]:
class DataProcessor:
    @staticmethod
    def load_fer2013_dataset(path):
        """
        Load FER2013 dataset
        Returns X (images), y (labels)
        """
        data = pd.read_csv(os.path.join(path, 'fer2013.csv'))
        
        # Split data into training, validation, and test
        train_data = data[data['Usage'] == 'Training']
        val_data = data[data['Usage'] == 'PublicTest']
        test_data = data[data['Usage'] == 'PrivateTest']
        
        def parse_data(df):
            images = df['pixels'].apply(lambda x: np.fromstring(x, sep=' '))
            images = np.vstack(images.values) / 255.0  # Normalize
            images = images.reshape(-1, 48, 48, 1)
            labels = df['emotion'].values
            return images, labels
        
        X_train, y_train = parse_data(train_data)
        X_val, y_val = parse_data(val_data)
        X_test, y_test = parse_data(test_data)
        
        return (X_train, y_train), (X_val, y_val), (X_test, y_test)
    
    @staticmethod
    def create_data_generators(X_train, y_train, X_val, y_val, config):
        """
        Create data generators with augmentation
        """
        train_datagen = ImageDataGenerator(
            rotation_range=20,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            fill_mode='nearest'
        )
        
        val_datagen = ImageDataGenerator()
        
        # Resize images to match transfer learning model input
        X_train_resized = tf.image.resize(X_train, config.IMAGE_SIZE)
        X_val_resized = tf.image.resize(X_val, config.IMAGE_SIZE)
        
        # Repeat grayscale image across 3 channels for transfer learning model
        X_train_rgb = tf.repeat(X_train_resized, repeats=3, axis=-1)
        X_val_rgb = tf.repeat(X_val_resized, repeats=3, axis=-1)
        
        train_generator = train_datagen.flow(
            X_train_rgb, y_train, 
            batch_size=config.BATCH_SIZE, 
            shuffle=True
        )
        
        val_generator = val_datagen.flow(
            X_val_rgb, y_val, 
            batch_size=config.BATCH_SIZE, 
            shuffle=False
        )
        
        return train_generator, val_generator

In [4]:
class FacialExpressionModel:
    @staticmethod
    def build_model(config):
        """
        Create transfer learning model using MobileNetV2
        """
        # Base model
        base_model = MobileNetV2(
            weights='imagenet', 
            include_top=False, 
            input_shape=(*config.IMAGE_SIZE, 3)
        )
        
        # Freeze base model layers
        base_model.trainable = False
        
        # Add custom classification head
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = Dense(512, activation='relu')(x)
        x = Dropout(0.5)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        
        # Output layer
        output = Dense(
            config.NUM_CLASSES, 
            activation='softmax', 
            name='expression_output'
        )(x)
        
        model = Model(inputs=base_model.input, outputs=output)
        
        # Compile model
        optimizer = tf.keras.optimizers.Adam(learning_rate=config.LEARNING_RATE)
        model.compile(
            optimizer=optimizer,
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        
        return model

In [None]:
class ModelTrainer:
    @staticmethod
    def train(config):
        # Load dataset
        (X_train, y_train), (X_val, y_val), (X_test, y_test) = DataProcessor.load_fer2013_dataset(config.DATA_DIR)
        
        # Create data generators
        train_generator, val_generator = DataProcessor.create_data_generators(
            X_train, y_train, X_val, y_val, config
        )
        
        # Build model
        model = FacialExpressionModel.build_model(config)
        
        # Callbacks
        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss', 
            patience=10, 
            restore_best_weights=True
        )
        
        model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
            config.MODEL_SAVE_PATH, 
            save_best_only=True
        )
        
        # Train model
        history = model.fit(
            train_generator,
            validation_data=val_generator,
            epochs=config.EPOCHS,
            callbacks=[early_stopping, model_checkpoint]
        )
        
        # Evaluate on test set
        X_test_resized = tf.image.resize(X_test, config.IMAGE_SIZE)
        X_test_rgb = tf.repeat(X_test_resized, repeats=3, axis=-1)
        
        test_loss, test_accuracy = model.evaluate(X_test_rgb, y_test)
        print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
        
        # Detailed Classification Report
        y_pred = model.predict(X_test_rgb)
        y_pred_classes = np.argmax(y_pred, axis=1)
        
        print("\nClassification Report:")
        print(classification_report(y_test, y_pred_classes, 
            target_names=['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']))
        
        # Confusion Matrix
        cm = confusion_matrix(y_test, y_pred_classes)
        plt.figure(figsize=(10,8))
        sns.heatmap(cm, annot=True, fmt='d', 
            xticklabels=['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'],
            yticklabels=['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'])
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.tight_layout()
        plt.savefig('results/confusion_matrix.png')
        plt.close()
        
        return model, history

In [None]:
if __name__ == '__main__':
    trained_model, training_history = ModelTrainer.train(Config)

In [None]:
import cv2
import numpy as np
import tensorflow as tf

In [None]:
class FacialExpressionInference:
    def __init__(self, model_path, config):
        self.model = tf.keras.models.load_model(model_path)
        self.config = config
        self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        self.expressions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
    
    def detect_and_predict(self, frame):
        # Convert to grayscale
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # Detect faces
        faces = self.face_cascade.detectMultiScale(gray, 1.1, 4)
        
        for (x, y, w, h) in faces:
            # Extract face ROI
            roi_gray = gray[y:y+h, x:x+w]
            
            # Preprocess for model
            roi_resized = cv2.resize(roi_gray, (48, 48))
            roi_normalized = roi_resized / 255.0
            roi_expanded = np.expand_dims(roi_normalized, axis=[0, -1])
            
            # Resize to match transfer learning model input
            roi_rgb = np.repeat(cv2.resize(roi_expanded[0], self.config.IMAGE_SIZE), 3, axis=-1)
            roi_rgb = np.expand_dims(roi_rgb, axis=0)
            
            # Predict
            prediction = self.model.predict(roi_rgb)
            emotion_idx = np.argmax(prediction)
            emotion = self.expressions[emotion_idx]
            confidence = prediction[0][emotion_idx]
            
            # Draw rectangle and text
            cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
            cv2.putText(frame, f'{emotion}: {confidence:.2f}', 
                        (x, y-10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, 
                        (36,255,12), 2)
        
        return frame
    
    def real_time_detection(self):
        cap = cv2.VideoCapture(0)
        
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            # Flip frame for natural view
            frame = cv2.flip(frame, 1)
            
            # Detect and predict
            processed_frame = self.detect_and_predict(frame)
            
            # Display
            cv2.imshow('Facial Expression Recognition', processed_frame)
            
            # Exit condition
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        
        cap.release()
        cv2.destroyAllWindows()

In [None]:
if __name__ == '__main__':
    inference = FacialExpressionInference(
        model_path=Config.MODEL_SAVE_PATH, 
        config=Config
    )
    inference.real_time_detection()