In [None]:
import cv2
import cvlib as cv
from cvlib.object_detection import draw_bbox
from gtts import gTTS
import pygame
import os
import time

# Initialize pygame mixer for audio playback
pygame.mixer.init()

def speak(text):
    print(text)  # Debugging print

    # Stop playback and properly unload the file
    pygame.mixer.music.stop()
    pygame.mixer.quit()  # Fully release the audio system

    # Ensure the previous file is removed
    if os.path.exists("output.mp3"):
        os.remove("output.mp3")

    # Reinitialize pygame.mixer after unloading
    pygame.mixer.init()

    # Generate new audio
    tts = gTTS(text=text, lang='en', slow=False)
    tts.save("output.mp3")

    # Wait to ensure file is fully saved before playing
    time.sleep(0.5)

    # Play the new audio file
    pygame.mixer.music.load("output.mp3")
    pygame.mixer.music.play()

    while pygame.mixer.music.get_busy():  # Wait until the audio finishes playing
        time.sleep(0.1)

# Initialize webcam
video = cv2.VideoCapture(0)
detected_objects = set()  # Store unique detected objects

while True:
    ret, frame = video.read()
    if not ret:
        print("Failed to capture frame")
        break

    # Object detection
    bbox, label, conf = cv.detect_common_objects(frame)

    # Find newly detected objects
    new_detections = [item for item in label if item not in detected_objects]

    if new_detections:
        detected_objects.update(new_detections)  # Add to detected set
        speech_text = "I found " + " and ".join(new_detections)
        speak(speech_text)  # Speak detected objects

    # Draw bounding boxes and display output
    output_image = draw_bbox(frame, bbox, label, conf)
    cv2.imshow("Detection", output_image)

    # Exit on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

video.release()
cv2.destroyAllWindows()

# Final speech for all detected objects
if detected_objects:
    final_text = "I found " + " and ".join(detected_objects)
    speak(final_text)

print("Final detected objects:", detected_objects)


I found person
I found cell phone
I found remote
I found cat
I found book
I found umbrella
