In [3]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [5]:
import cv2
from PIL import Image
import torch
from transformers import DetrForObjectDetection, DetrImageProcessor, AutoImageProcessor, AutoModelForImageClassification
from google.colab.patches import cv2_imshow

# Initialize the DETR processor and model for object detection
object_detection_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
object_detection_model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")

# Initialize the emotion detection model
model_path = "/content/drive/MyDrive/CharbelMATTAR_LU_WebDev/Project/Model"
emotion_processor = AutoImageProcessor.from_pretrained("model_path")
model_path = "/content/drive/MyDrive/CharbelMATTAR_LU_WebDev/Project/Model"
emotion_model = AutoModelForImageClassification.from_pretrained("model_path")

# Open the video file
video_path = "/content/drive/MyDrive/Videos/CustomerService6.mp4"
video_capture = cv2.VideoCapture(video_path)

# Define the codec and create VideoWriter object
fps = int(video_capture.get(cv2.CAP_PROP_FPS))
frame_width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('/content/drive/MyDrive/Videos/output_video5.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width,frame_height))


while video_capture.isOpened():

    person_counter = 0
    customer_counter = 0
    sales_counter = 0
    ret, frame = video_capture.read()
    if not ret:
        break


    frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))


    inputs = object_detection_processor(images=frame_pil, return_tensors="pt")
    object_outputs = object_detection_model(**inputs)


    target_sizes = torch.tensor([frame_pil.size[::-1]])
    object_results = object_detection_processor.post_process_object_detection(object_outputs, target_sizes=target_sizes, threshold=0.9)[0]


    midpoint = frame_width // 2

    # Visualize the frame with bounding boxes
    for score, label, box in zip(object_results["scores"], object_results["labels"], object_results["boxes"]):
        box = [round(i, 2) for i in box.tolist()]
        class_name = object_detection_model.config.id2label[label.item()]
        if class_name == "person":
            person_counter += 1

            person_midpoint = (box[0] + box[2]) // 2
            if person_midpoint < midpoint:
                sales_counter += 1
                person_label = "Sales Rep"

            else:
                customer_counter += 1
                person_label = "Customer"


            roi = frame[int(box[1]):int(box[3]), int(box[0]):int(box[2])]


            emotion_inputs = emotion_processor(roi, return_tensors="pt")

            emotion_outputs = emotion_model(**emotion_inputs)

            predicted_probabilities = torch.softmax(emotion_outputs.logits, dim=1)

            predicted_emotion_index = torch.argmax(predicted_probabilities, dim=1)
            predicted_emotion_class = predicted_emotion_index.item()

            # Map the predicted index to the corresponding emotion label
            emotion_labels = ['Ahegao', 'Angry', 'Happy', 'Neutral', 'Sad', 'Surprise']
            predicted_emotion = emotion_labels[predicted_emotion_class]
            if person_midpoint < midpoint:
                label_string = f"{person_label} {sales_counter}: {round(score.item(), 3)}, {predicted_emotion}"
            else:
                label_string = f"{person_label} {customer_counter}: {round(score.item(), 3)}, {predicted_emotion}"


            # Add bounding box and label
            cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 0), 2)
            cv2.putText(frame, label_string, (int(box[0]), int(box[1]) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
        elif class_name == "cell phone":
            label_string = f"{class_name}: {round(score.item(), 3)}"
            cv2.putText(frame, label_string, (int(box[0]), int(box[1]) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the resulting frame
    cv2_imshow(frame)
    cv2.waitKey(1)
    # Write the frame into the output video
    out.write(frame)

# Release the video capture object and the output video writer
video_capture.release()
out.release()

# Close all the frames
cv2.destroyAllWindows()


Output hidden; open in https://colab.research.google.com to view.