In [None]:
import cv2
import numpy as np
import time
from ultralytics import YOLO
import pygame 

pygame.mixer.init()

# playing music
def play_relaxing_music():
    # music.mp3 is attached in repo
    pygame.mixer.music.load('music.mp3')
    pygame.mixer.music.play(-1)  # play music till "esc" is pressed

# stop music
def stop_relaxing_music():
    pygame.mixer.music.stop()

# using YOLOv8
model = YOLO("yolov8n.pt")

# start webcam
cap = cv2.VideoCapture(1)  # change to 1 to change cam

relaxing_music_playing = False  # flag to track wheather music is playing

while cap.isOpened():
    ret, frame = cap.read() 
    if not ret:
        break

    results = model(frame)  # Run YOLO model

    person_detected = False  # flag to check if a person is detected

    for i in results:
        for box in i.boxes:
            cls = int(box.cls[0].item())  # class index; 0 = person
            conf = box.conf[0].item()

            if conf > 0.5:  # will only work if confidence is high
                # bounding box
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                label = f"{model.names[cls]}: {conf:.2f}"
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                
                # if(person = true), set the flag to True
                if cls == 0:  # person = 0 is COCO dataset
                    person_detected = True

    # if (person = true & music = false), show chatbot prompt
    if person_detected and not relaxing_music_playing:
        cv2.putText(frame, "You look stressed today, would you like some relaxing music? Press 'y' for Yes", 
                    (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)

    # display video frame
    cv2.imshow("YOLOv8 Detection", frame)

    # wait for press key ('y' = yes, 'Esc' = exit)
    key = cv2.waitKey(1) & 0xFF
    if key == ord('y'): 
        print("Playing relaxing music...")
        play_relaxing_music()
        relaxing_music_playing = True

    # esc to stop
    if key == 27:  # esc key
        print("Exiting and stopping music...")
        stop_relaxing_music()
        break

cap.release()
cv2.destroyAllWindows()


pygame 2.6.1 (SDL 2.28.4, Python 3.11.4)
Hello from the pygame community. https://www.pygame.org/contribute.html





0: 384x640 (no detections), 72.6ms
Speed: 2.7ms preprocess, 72.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 71.7ms
Speed: 2.0ms preprocess, 71.7ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 69.9ms
Speed: 2.6ms preprocess, 69.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 66.8ms
Speed: 2.2ms preprocess, 66.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 64.7ms
Speed: 1.8ms preprocess, 64.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 62.9ms
Speed: 1.7ms preprocess, 62.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)



2025-03-22 13:21:06.459 Python[64347:763068] +[IMKClient subclass]: chose IMKClient_Modern
2025-03-22 13:21:06.459 Python[64347:763068] +[IMKInputSession subclass]: chose IMKInputSession_Modern


0: 384x640 1 person, 63.6ms
Speed: 1.7ms preprocess, 63.6ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 62.8ms
Speed: 1.9ms preprocess, 62.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 62.5ms
Speed: 1.8ms preprocess, 62.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 62.2ms
Speed: 1.7ms preprocess, 62.2ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 63.7ms
Speed: 1.7ms preprocess, 63.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 surfboard, 64.7ms
Speed: 1.8ms preprocess, 64.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 64.0ms
Speed: 1.7ms preprocess, 64.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 63.0ms
Speed: 1.7ms preprocess, 63.0ms inference, 0.5ms postprocess per image at sha

[src/libmpg123/id3.c:process_comment():584] error: No comment text / valid description?



0: 384x640 2 persons, 62.8ms
Speed: 2.2ms preprocess, 62.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 61.5ms
Speed: 1.8ms preprocess, 61.5ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 63.5ms
Speed: 1.7ms preprocess, 63.5ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 62.6ms
Speed: 1.7ms preprocess, 62.6ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 62.4ms
Speed: 1.5ms preprocess, 62.4ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 63.7ms
Speed: 1.7ms preprocess, 63.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 64.7ms
Speed: 2.0ms preprocess, 64.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 63.7ms
Speed: 1.8ms preprocess, 63.7ms inference, 0.4ms postprocess per image at shape (

: 