In [1]:
import os
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split
import numpy as np

# Define paths
input_base_path = "/kaggle/input/face-expression-recognition-dataset/images"
output_base_path = "data_split"

# Define label mappings
labels_map = {
    "angry": "no_confidence",
    "disgust": "no_confidence",
    "fear": "no_confidence",
    "sad": "no_confidence",
    "happy": "confidence",
    "neutral": "confidence",
    "surprise": "confidence"
}

# Create output directories
def create_output_dirs():
    splits = ["train", "val", "test"]
    categories = {
        "confidence": ["happy", "neutral", "surprise"],
        "no_confidence": ["angry", "disgust", "fear", "sad"]
    }
    
    for split in splits:
        for category, emotions in categories.items():
            for emotion in emotions:
                os.makedirs(os.path.join(output_base_path, split, category, emotion), exist_ok=True)
    print("Output directories created.")

# Load images and labels
def load_images_and_labels():
    image_paths = []
    labels = []
    emotion_labels = []
    
    splits = ["train", "validation"]
    for split in splits:
        split_path = os.path.join(input_base_path, split)
        if not os.path.exists(split_path):
            print(f"Warning: Directory {split_path} does not exist.")
            continue
            
        for label_name in labels_map.keys():
            folder = os.path.join(split_path, label_name)
            if not os.path.exists(folder):
                print(f"Warning: Directory {folder} does not exist.")
                continue
                
            print(f"Processing {split}/{label_name}...")
            img_count = 0
            for img_name in os.listdir(folder):
                img_path = os.path.join(folder, img_name)
                if os.path.isfile(img_path) and img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    image_paths.append(img_path)
                    labels.append(labels_map[label_name])
                    emotion_labels.append(label_name)
                    img_count += 1
                else:
                    print(f"Warning: Skipping {img_path} (not a valid image file)")
            print(f"Loaded {img_count} images from {split}/{label_name}")
    
    return image_paths, labels, emotion_labels

# Copy images to the new structure
def copy_images(image_paths, labels, emotion_labels, split_name):
    for img_path, category, emotion in zip(image_paths, labels, emotion_labels):
        dest_path = os.path.join(output_base_path, split_name, category, emotion, os.path.basename(img_path))
        try:
            shutil.copy2(img_path, dest_path)
        except Exception as e:
            print(f"Error copying {img_path} to {dest_path}: {e}")

# Main execution
def main():
    # Create output directories
    create_output_dirs()
    
    # Load images and labels
    image_paths, labels, emotion_labels = load_images_and_labels()
    
    if not image_paths:
        raise ValueError("No images were loaded. Check the input dataset path and structure.")
    
    print(f"Total images loaded: {len(image_paths)}")
    
    # Perform stratified split
    # First split: 80% (train + val), 20% test
    X_temp, X_test, y_temp, y_test, emo_temp, emo_test = train_test_split(
        image_paths, labels, emotion_labels, test_size=0.15, random_state=42, stratify=labels
    )
    
    # Second split: 70/80 (train), 10/80 (val) of temp
    X_train, X_val, y_train, y_val, emo_train, emo_val = train_test_split(
        X_temp, y_temp, emo_temp, test_size=0.1765, random_state=42, stratify=y_temp
    )  # 0.1765 ≈ 15/(100-15) to get 15% val of total
    
    # Print split sizes
    print(f"Train samples: {len(X_train)}")
    print(f"Validation samples: {len(X_val)}")
    print(f"Test samples: {len(X_test)}")
    
    # Copy images to respective directories
    print("Copying images to train...")
    copy_images(X_train, y_train, emo_train, "train")
    print("Copying images to val...")
    copy_images(X_val, y_val, emo_val, "val")
    print("Copying images to test...")
    copy_images(X_test, y_test, emo_test, "test")
    
    print("Dataset split and organization completed successfully.")

if __name__ == "__main__":
    main()

Output directories created.
Processing train/angry...
Loaded 3993 images from train/angry
Processing train/disgust...
Loaded 436 images from train/disgust
Processing train/fear...
Loaded 4103 images from train/fear
Processing train/sad...
Loaded 4938 images from train/sad
Processing train/happy...
Loaded 7164 images from train/happy
Processing train/neutral...
Loaded 4982 images from train/neutral
Processing train/surprise...
Loaded 3205 images from train/surprise
Processing validation/angry...
Loaded 960 images from validation/angry
Processing validation/disgust...
Loaded 111 images from validation/disgust
Processing validation/fear...
Loaded 1018 images from validation/fear
Processing validation/sad...
Loaded 1139 images from validation/sad
Processing validation/happy...
Loaded 1825 images from validation/happy
Processing validation/neutral...
Loaded 1216 images from validation/neutral
Processing validation/surprise...
Loaded 797 images from validation/surprise
Total images loaded: 3

In [2]:
!ls /kaggle/working/data_split
!ls /kaggle/working/data_split/train/confidence
!ls /kaggle/working/data_split/train/no_confidence

test  train  val
happy  neutral	surprise
angry  disgust	fear  sad


In [3]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  # Suppress TensorFlow warnings
import tensorflow as tf
tf.get_logger().setLevel("ERROR")  # Suppress additional warnings
import cv2
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from pathlib import Path

# Define paths
data_path = "/kaggle/working/data_split"

# Step 1: Load dataset
def load_dataset():
    images = []
    labels = []
    categories = ["confidence", "no_confidence"]
    splits = ["train", "val"]
    
    for split in splits:
        for category_idx, category in enumerate(categories):
            for emotion in os.listdir(os.path.join(data_path, split, category)):
                folder = os.path.join(data_path, split, category, emotion)
                if not os.path.isdir(folder):
                    continue
                print(f"Processing {split}/{category}/{emotion}...")
                img_count = 0
                for img_name in os.listdir(folder):
                    img_path = os.path.join(folder, img_name)
                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                    if img is not None:
                        img = cv2.resize(img, (48, 48))
                        images.append(img)
                        labels.append(category_idx)
                        img_count += 1
                    else:
                        print(f"Failed to load {img_path}")
                print(f"Loaded {img_count} images")
    
    images = np.array(images) / 255.0
    images = np.expand_dims(images, -1)
    labels = np.array(labels)
    
    if len(images) == 0:
        raise ValueError("No images loaded. Check dataset path.")
    
    print(f"Total images: {len(images)}")
    return images, labels

# Step 2: Train model
def train_model(images, labels):
    X_train, X_temp, y_train, y_temp = train_test_split(
        images, labels, test_size=0.3, random_state=42, stratify=labels
    )
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
    )
    
    y_train = to_categorical(y_train, num_classes=2)
    y_val = to_categorical(y_val, num_classes=2)
    y_test = to_categorical(y_test, num_classes=2)
    
    print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")
    
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 1)),
        MaxPooling2D(2, 2),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(2, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=10,
        batch_size=32,
        verbose=1
    )
    
    loss, acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Accuracy: {acc*100:.2f}%")
    
    model.save("/kaggle/working/emotion_model.h5")
    print("Model saved at /kaggle/working/emotion_model.h5")
    return model

# Main
def main():
    print("Loading dataset...")
    images, labels = load_dataset()
    print("Training model...")
    train_model(images, labels)

if __name__ == "__main__":
    main()

2025-08-11 12:28:28.685860: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754915308.870209      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754915308.921274      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Loading dataset...
Processing train/confidence/happy...
Loaded 6313 images
Processing train/confidence/surprise...
Loaded 2817 images
Processing train/confidence/neutral...
Loaded 4301 images
Processing train/no_confidence/fear...
Loaded 3613 images
Processing train/no_confidence/sad...
Loaded 4220 images
Processing train/no_confidence/angry...
Loaded 3487 images
Processing train/no_confidence/disgust...
Loaded 368 images
Processing val/confidence/happy...
Loaded 1327 images
Processing val/confidence/surprise...
Loaded 598 images
Processing val/confidence/neutral...
Loaded 954 images
Processing val/no_confidence/fear...
Loaded 749 images
Processing val/no_confidence/sad...
Loaded 937 images
Processing val/no_confidence/angry...
Loaded 725 images
Processing val/no_confidence/disgust...
Loaded 94 images
Total images: 30503
Training model...
Train: 21352, Val: 4575, Test: 4576


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1754915322.782537      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Epoch 1/10


I0000 00:00:1754915326.729086     101 service.cc:148] XLA service 0x791e0400aeb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1754915326.729767     101 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1754915327.000008     101 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m 53/668[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 3ms/step - accuracy: 0.5226 - loss: 0.6981

I0000 00:00:1754915329.032691     101 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.5575 - loss: 0.6831 - val_accuracy: 0.6536 - val_loss: 0.6297
Epoch 2/10
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.6389 - loss: 0.6341 - val_accuracy: 0.6835 - val_loss: 0.5836
Epoch 3/10
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.6906 - loss: 0.5815 - val_accuracy: 0.7121 - val_loss: 0.5566
Epoch 4/10
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7160 - loss: 0.5466 - val_accuracy: 0.7222 - val_loss: 0.5361
Epoch 5/10
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7425 - loss: 0.5113 - val_accuracy: 0.7296 - val_loss: 0.5263
Epoch 6/10
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7583 - loss: 0.4864 - val_accuracy: 0.7421 - val_loss: 0.5143
Epoch 7/10
[1m668/668[0m [32m━━━━━━━

In [4]:
import cv2
import numpy as np
import tensorflow as tf
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
tf.get_logger().setLevel("ERROR")

# Paths
model_path = "/kaggle/working/emotion_model.h5"
video_path = "/kaggle/input/muzammil/muzammil.mp4"  # Apna video path daal
output_video_path = "/kaggle/working/output_video.avi"
haar_cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"

# Load model
model = tf.keras.models.load_model(model_path)

# Analyze video
def analyze_video(video_path, model, output_path):
    face_cascade = cv2.CascadeClassifier(haar_cascade_path)
    if face_cascade.empty():
        raise ValueError("Error loading Haar Cascade.")
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Video not found: {video_path}")
    
    predictions = []
    processed_frames = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
        
        for (x, y, w, h) in faces:
            face = gray[y:y+h, x:x+w]
            face = cv2.resize(face, (48, 48))
            face = face / 255.0
            face = np.expand_dims(face, axis=(0, -1))
            pred = model.predict(face, verbose=0)
            predictions.append(pred[0])
            
            label = "Confidence" if pred[0][0] > pred[0][1] else "No Confidence"
            confidence = pred[0][0] * 100 if pred[0][0] > pred[0][1] else pred[0][1] * 100
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
            cv2.putText(frame, f"{label}: {confidence:.2f}%", (x, y-10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
        
        processed_frames.append(frame)
    
    cap.release()
    
    # Save output video
    if processed_frames:
        height, width = processed_frames[0].shape[:2]
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(output_path, fourcc, 20.0, (width, height))
        for frame in processed_frames:
            out.write(frame)
        out.release()
    
    # Calculate percentages
    if predictions:
        predictions = np.array(predictions)
        confidence_percent = np.mean(predictions[:, 0]) * 100
        no_confidence_percent = np.mean(predictions[:, 1]) * 100
        print(f"Confidence: {confidence_percent:.2f}%")
        print(f"No Confidence: {no_confidence_percent:.2f}%")
    else:
        print("No faces detected.")
    
    return confidence_percent, no_confidence_percent

# Run analysis
print("Analyzing video...")
confidence_percent, no_confidence_percent = analyze_video(video_path, model, output_video_path)
print(f"Final Results: Confidence: {confidence_percent:.2f}%, No Confidence: {no_confidence_percent:.2f}%")

Analyzing video...
Confidence: 49.98%
No Confidence: 50.02%
Final Results: Confidence: 49.98%, No Confidence: 50.02%


In [None]:
2