In [1]:
import cv2
import torch
import numpy
from PIL import Image
from torchvision import transforms
from model import SwinWithSE, SEBlock, train_one_epoch, evaluate

# Tell PyTorch it’s safe to unpickle SwinWithSE
# torch.serialization.add_safe_globals({'SwinWithSE': SwinWithSE})

model =SwinWithSE(7)
criterion =torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [17]:
import emotion_detection

In [2]:
model = torch.load('swin_with_se_fer2013_full.pth', map_location=torch.device(device), weights_only=False)
print(model)

SwinWithSE(
  (backbone): SwinTransformer(
    (features): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
        (1): Permute()
        (2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
      )
      (1): Sequential(
        (0): SwinTransformerBlock(
          (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
          (attn): ShiftedWindowAttention(
            (qkv): Linear(in_features=96, out_features=288, bias=True)
            (proj): Linear(in_features=96, out_features=96, bias=True)
          )
          (stochastic_depth): StochasticDepth(p=0.0, mode=row)
          (norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
          (mlp): MLP(
            (0): Linear(in_features=96, out_features=384, bias=True)
            (1): GELU(approximate='none')
            (2): Dropout(p=0.0, inplace=False)
            (3): Linear(in_features=384, out_features=96, bias=True)
            (4): Dropout(p=0.0, inpla

In [3]:
import mediapipe as mp
print(numpy.__version__, numpy.__file__); print(hasattr(numpy, 'ndarray'))

1.26.2 c:\Users\Bijaya\anaconda3\envs\emotion_detection\lib\site-packages\numpy\__init__.py
True


In [4]:
    # Preprocessing transform (match your training pipeline)
preprocess =  transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

# Emotion labels
emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

In [5]:
# Initialize MediaPipe Face Detection
mp_face_detection = mp.solutions.face_detection
face_detection = mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5)

In [13]:
import time
import matplotlib.pyplot as plt
import numpy as np
emoji_path = './emoji/'

In [14]:
def process_frame(frame):
    # Convert BGR (OpenCV) to RGB for MediaPipe
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Detect faces using MediaPipe
    results = face_detection.process(frame_rgb)
    
    if results.detections:
        for detection in results.detections:
            bboxC = detection.location_data.relative_bounding_box
            ih, iw, _ = frame.shape
            x, y, w, h = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)
            
            # Ensure coordinates are within frame bounds
            x, y = max(0, x), max(0, y)
            w, h = min(w, iw-x), min(h, ih-y)
            
            # Extract face region
            face_roi = frame_rgb[y:y+h, x:x+w]
            if face_roi.size == 0:  # Skip if ROI is empty
                continue
            
            # Preprocess face for model
            face_pil = Image.fromarray(face_roi)
            face_tensor = preprocess(face_pil).unsqueeze(0).to(device)
            
            # Predict emotion
            with torch.no_grad():
                output = model(face_tensor)
                _, predicted = torch.max(output, 1)
                emotion = emotion_labels[predicted.item()]
                emoji = './emoji/' + emotion + '.png'
                print(emotion, emoji)
            
            # Draw rectangle and label on frame
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
            cv2.putText(frame, emotion, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
            
    # Encode frame as JPEG
    ret, buffer = cv2.imencode('.jpg', frame)
    frame_bytes = buffer.tobytes()
    return frame_bytes
def generate_frames_continuously():
    cap = cv2.VideoCapture(0)  # Open default webcam
    if not cap.isOpened():
        print("Error: Could not open webcam.")
        return
    
    try:
        print("Webcam opened successfully. Press 'q' to quit.")
        while True:
            success, frame = cap.read()
            if not success:
                print("Error: Could not read frame. Restarting capture...")
                time.sleep(1)  # Brief pause before retry
                cap = cv2.VideoCapture(0)  # Attempt to reopen
                if not cap.isOpened():
                    print("Error: Failed to reopen webcam. Exiting.")
                    break
                continue
            
            # Process the frame (placeholder; replace with your logic)
            frame_bytes = process_frame(frame)
            if frame_bytes is None:
                print("Warning: process_frame returned None. Skipping frame.")
                continue
            
            # Convert frame_bytes back to image for display (assuming JPEG)
            frame_img = cv2.imdecode(np.frombuffer(frame_bytes, np.uint8), cv2.IMREAD_COLOR)
            if frame_img is not None:
                # Display the frame
                cv2.imshow('Webcam Feed', frame_img)
                # Break the loop if 'q' is pressed
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    print("Quitting...")
                    break
            else:
                print("Warning: Failed to decode frame.")
            
            time.sleep(1.0 / 30)  # Approx 30 FPS for smooth display
            
    except KeyboardInterrupt:
        print("\nStopped by user (Ctrl+C). Closing webcam...")
    except Exception as e:
        print(f"Error occurred: {e}")
    finally:
        cap.release()
        cv2.destroyAllWindows()  # Close all OpenCV windows
        print("Webcam and windows released.")


In [15]:
emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
emoji_path = './emoji/'

In [16]:
generate_frames_continuously()

Webcam opened successfully. Press 'q' to quit.
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
neutral ./emoji/neutral.png
sad ./emoji/sad.png
neutral ./emoji/neutral.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
neutral ./emoji/neutral.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emoji/sad.png
sad ./emo