In [1]:
import cv2
import torch
import numpy as np
import dask.array as da
import imutils
import threading
import winsound

In [2]:
#Specifying the paths to the downloaded dependencies like the prototxt file and the pretrained Caffe model.
prototxt="C:\\Users\\user\\Downloads\\MobileNetSSD_deploy.prototxt"
model="C:\\Users\\user\\Downloads\\mobilenet_iter_73000.caffemodel"

In [3]:
#Specifying the minimum required confidence for us to write the class. Linking the classes to colors and initializing the model.
minconfidence=0.3
classes=['background',
           'aeroplane', 'bicycle', 'bird', 'boat',
           'bottle', 'bus', 'car', 'cat', 'chair',
           'cow', 'diningtable', 'dog', 'horse',
           'motorbike', 'person', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor']
np.random.seed(1000)
colors=np.random.uniform(0,255,size=(len(classes),3))
network=cv2.dnn.readNetFromCaffe(prototxt,model)

In [4]:
#Setting up the camera. "Start" represents the starting frame, which we will compare to the following frame, and so on.
#We change from color to black and white because the focus here is on motion.
#"Alarm" represents the presence of the beeping sound.
#"Check" is for checking what is going on -- if there is too much movement.
#"Monitor" counts the instances of movement that passes the threshold, and if the number is high enough, the beep sounds.
live=cv2.VideoCapture(0,cv2.CAP_DSHOW)
ret,start=live.read()
start=cv2.cvtColor(start,cv2.COLOR_BGR2GRAY)
alarm=False
check=False
monitor=0

In [5]:
def soundthealarm():
    global alarm
    for i in range(5):
        if not check:
            break
        winsound.Beep(1000,2000)
    alarm=False

In [10]:
live=cv2.VideoCapture(0)
while True:
    ret,frame=live.read()
    if ret==False:
        break
    height,width=frame.shape[0],frame.shape[1]
    inp=cv2.dnn.blobFromImage(cv2.resize(frame,(300,300)),0.007,(300,300),100)
    network.setInput(inp)
    detected=network.forward()
    detected=da.array(detected)
    for i in range(detected.shape[2]):
        confidence=detected[0][0][i][2]
        if confidence>minconfidence:
            predictedclass=int(detected[0][0][i][1])
            text=f"{classes[predictedclass]}:{confidence:.2f}"
            upperleftx=int(detected[0][0][i][3]*width)
            upperlefty=int(detected[0][0][i][4]*height)
            lowerrightx=int(detected[0][0][i][5]*width)
            lowerrighty=int(detected[0][0][i][6]*height)
            cv2.rectangle(frame,(upperleftx,upperlefty),(lowerrightx,lowerrighty))
            cv2.putText(frame,text,(upperleftx,upperlefty+15),cv2.FONT_HERSHEY_SIMPLEX,0.5,colors[predictedclass],2)
    if check:
        frame=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
        evidence=cv2.absdiff(start,frame)
        threshold=cv2.threshold(evidence,25,255,cv2.THRESH_BINARY)[1]
        start=frame
        if threshold.sum()>1000000:
            monitor+=1
        else:
            if monitor>0:
                monitor-=1
        cv2.imshow("Cam",threshold)
    else:
        cv2.imshow("Cam",frame)
    if monitor>15:
        if not alarm:
            alarm=True
            threading.Thread(target=soundthealarm).start()
    if key==ord("a"):
        check=not check
        monitor=0
    if key==ord("q"):
        check=False
        break

In [11]:
live.release()
cv2.destroyAllWindows()