In [9]:
import tensorflow as tf # type: ignore #
from tensorflow.keras.models import load_model #type: ignore #
import numpy as np
import cv2

modelPath = "C:/Users/aahfa/Documents/personalCode/python/ML_AI/ML_NerfGun/relevantModels/humanIdentifyV1.h5"
model = load_model(modelPath)

# Parameters
img_height, img_width = 224, 224  
threshold = 0.5

# Initialize video capture
cap = cv2.VideoCapture(0)

#set frame size
#cap.set(cv2.CAP_PROP_FRAME_WIDTH, 320)
#cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)

ret, frame1 = cap.read()
ret, frame2 = cap.read()

while True:
    # Calculate absolute difference between frames
    diff = cv2.absdiff(frame1, frame2) #takes two images and computes the difference between them. Returns
    # an image where each pixel is the absolute difference between the two
    gray = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY) 
    # converts to gray scale, which streamlines computation
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    # eliminates noise / small variations in image
    placeHolder, thresh = cv2.threshold(blur, 20, 255, cv2.THRESH_BINARY)
    # Converts the image to binary form (black and white). Pixels with a value above 20 are set to 255 (white),
    # and pixels with a value below or equal to 20 are set to 0 (black). This helps in clearly distinguishing the foreground 
    # (moving objects) from the background.
    dilated = cv2.dilate(thresh, None, iterations=3)
    #dilates increase white region in image which disjointed white regions which makes the countours of detected
    # moving objects more visible
    contours, placeHolder = cv2.findContours(dilated, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    #finds countours in the image

    # Find the largest contour (assuming it's the human)
    max_area = 0
    xMin = float('inf')
    yMin = float('inf')
    xMax = float('-inf')
    yMax = float('-inf')
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        xMin = min(xMin, x)
        yMin = min(yMin, y)
        xMax = max(xMax, x + w)
        yMax = max(yMax, y + h)
    
    width = xMax - xMin
    height = yMax - yMin

    if(width > 0 and height > 0):
        roi = frame1[yMin:yMin + height, xMin:xMin + width]
        roi_resized = cv2.resize(roi, (img_height, img_width))
        roi_normalized = roi_resized / 255.0
        roi_expanded = np.expand_dims(roi_normalized, axis=0)

        # Classify the region of interest
        prediction = model.predict(roi_expanded)[0][0]
        cv2.rectangle(frame1, (xMin, yMin), (xMin + width, yMin + height), (255, 0, 0), 2)
        cv2.putText(frame1, f"ROI", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
        if prediction <= threshold:
            cv2.rectangle(frame1, (xMin, yMin), (xMin + width, yMin + height), (0, 255, 0), 2)
            cv2.putText(frame1, f"Human: {prediction:.2f}", (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("Frame", frame1)

    # Break the loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    frame1 = frame2
    ret, frame2 = cap.read()

# Release the capture and close windows
cap.release()
cv2.destroyAllWindows()

