In [1]:
########################################################################
## This is the main notebook to capture new images and test the model ##
########################################################################

import os
import cv2
import numpy as np
from datetime import datetime, timedelta
from keras.preprocessing import image
from tensorflow.keras.models import load_model

# ROI BGR COLORS -----------------------------------------------------------------------------------
ROI_DEFAULT = (0, 255, 0)
ROI_SELECTION = (0, 200, 0)
ROI_CAPTURING = (0, 0, 255)
ROI_SAVING = (255, 255, 255)
ROI_PLAYING = (255, 0, 0)
ROI_BACKGROUND = (200, 200, 200)
ROI_CLASSIFYING = (255, 255, 255)
# CAPTURE WINDOW -----------------------------------------------------------------------------------
window_name = "Rock-Paper-Scissors"
capture_width = 1280
capture_height = 720
# ROI ----------------------------------------------------------------------------------------------
default_roi_dim = 10, 10, 300, 300  # x, y, w, h
settings_path = "settings.txt"  # store some settings to file for next settion, None to disable
# CAPTURE SETTINGS ---------------------------------------------------------------------------------
folder = "captured"  # image saved as "{folder}/{img_prefix}{img_next_id}.{img_extension}"
img_prefix = "rock_"
img_extension = "png"
img_next_id = 0  # Starting id for image naming. If existing, the next highest id will be assigned
frames_batch = 5  # how many frames to capture in once session
capture_delay = 2000  # time in milliseconds to wait between captures
# PLAY SETTINGS ------------------------------------------------------------------------------------
default_threshold = 20  # threshold to isolate hand from background in ROI
use_threshold = False  # can be toggled with 't' key
model = load_model('models/rps_v04_100epochs_20240209_2137.h5')
class_indices = {'paper': 0, 'rock': 1, 'scissors': 2}
class_labels = {v:k for k, v in class_indices.items()}
model_input_size = 150, 150
# LOG SETTINGS -------------------------------------------------------------------------------------
log_color = (255, 255, 255)  # White color in BGR
log_font = cv2.FONT_HERSHEY_SIMPLEX
log_scale = 0.5
log_thickness = 1
show_help = False
show_log = True
#---------------------------------------------------------------------------------------------------

# Access the webcam
cap = cv2.VideoCapture(0)

# Check if the webcam is opened successfully
if not cap.isOpened():
    print("Error: Unable to open webcam")
    exit()

# Define capture window
cap.set(cv2.CAP_PROP_FRAME_WIDTH, capture_width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, capture_height)
cv2.namedWindow(window_name)

# Load settings from file or use default values ----------------------------------------------------
try:
    # Load ROI dimensions from file or use default
    with open(settings_path, "r") as f:
        x, y, w, h, threshold_amount = map(int, f.readline().split())
        # reinit ROI if out of bounds
        if x < 0 or y < 0 or w < 0 or h < 0 or x + w > capture_width or y + h > capture_height:
            raise ValueError

except FileNotFoundError or ValueError:
    # use default values
    x, y, w, h = default_roi_dim
    threshold_amount = default_threshold

# ROI init settings --------------------------------------------------------------------------------
is_moving_roi = False
mode = "Default"
start_x, start_y = 0, 0
square_color = ROI_DEFAULT
# Capture init settings ----------------------------------------------------------------------------
is_capturing = False
time_counter = datetime.now()
image_counter = 0
os.makedirs(folder, exist_ok=True)
# Play init settings -------------------------------------------------------------------------------
is_playing = False
roi_background = None
classify_image = False
predicted_class_label = None

# Mouse event callback function --------------------------------------------------------------------
def mouse_callback(event, mouseX, mouseY, flags, param):
    global x, y, w, h, is_moving_roi, start_x, start_y, square_color, roi_background
    # Left Mouse Down: Check if mouse click is inside ROI ------------------------------------------
    if event == cv2.EVENT_LBUTTONDOWN:
        if x <= mouseX <= x + w and y <= mouseY <= y + h:
            is_moving_roi = True
            start_x, start_y = mouseX - x, mouseY - y
    # Mouse Move: Update ROI position while dragging -----------------------------------------------
    elif event == cv2.EVENT_MOUSEMOVE:
        if is_moving_roi:
            x = max(0, min(mouseX - start_x, frame.shape[1] - w))
            y = max(0, min(mouseY - start_y, frame.shape[0] - h))
            if use_threshold:
                roi_background = roi.copy()
    # Left Mouse Up: Stop moving ROI ---------------------------------------------------------------
    elif event == cv2.EVENT_LBUTTONUP:
        is_moving_roi = False
cv2.setMouseCallback(window_name, mouse_callback)

# MAIN LOOP ----------------------------------------------------------------------------------------
while True:

    # Capture frame-by-frame
    ret, frame = cap.read()
    # Check if the frame is read correctly
    if not ret:
        print("Error: Unable to capture frame")
        break
    # Flip the frame horizontally
    frame = cv2.flip(frame, 1)
    # Get the ROI
    roi = frame[y:y+h, x:x+w]

    # HANDLE KEY PRESS EVENTS ----------------------------------------------------------------------
    key = cv2.waitKey(1) & 0xFF
    # toogle capturing mode ------------------------------------------------------------------------
    if key == ord('c') and not is_playing:
        is_capturing = not is_capturing
        if is_capturing:
            mode = "Capturing"
            image_counter = 0
        else:
            mode = "Default"
    # toogle playing mode --------------------------------------------------------------------------
    elif key == ord('p') and not is_capturing:
        is_playing = not is_playing
        if is_playing:
            mode = "Playing"
            if use_threshold:
                roi_background = roi.copy()
        else:
            mode = "Default"
    # Space key, capture and classify current ROI --------------------------------------------------
    elif key == 32 and is_playing:
        classify_image = True
    # Escape key, exit all modes -------------------------------------------------------------------
    elif key == 27:
        is_capturing = False
        is_playing = False
        is_moving_roi = False
        use_threshold = False
    # toogle activate use threshold ----------------------------------------------------------------
    elif key == ord('t'):
        use_threshold = not use_threshold
        if use_threshold:
            roi_background = roi.copy()
    # increase threshold ---------------------------------------------------------------------------
    elif key == ord('+') and threshold_amount < 255:
        threshold_amount += 1
    # decrease threshold ---------------------------------------------------------------------------
    elif key == ord('-') and threshold_amount > 0:
        threshold_amount -= 1
    # toggle log -----------------------------------------------------------------------------------
    elif key == ord('l'):
        show_log = not show_log
    # toggle help ----------------------------------------------------------------------------------
    elif key == ord('h'):
        show_help = not show_help
    # Quit the program -----------------------------------------------------------------------------
    elif key == ord('q'):
        break

    # Activate Threshold in ROI --------------------------------------------------------------------
    if use_threshold:
        # Compare ROI and background image, any identical pixels will be get background color
        diff = cv2.absdiff(roi, roi_background)
        mask = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
        _, mask = cv2.threshold(mask, threshold_amount, 255, cv2.THRESH_BINARY)
        mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
        new_bkg = np.ones_like(roi) * ROI_BACKGROUND
        frame[y:y+h, x:x+w] = np.where(mask==255, roi, new_bkg)
        roi = frame[y:y+h, x:x+w]

    # Capturing Mode -------------------------------------------------------------------------------
    if is_capturing:
        if image_counter < frames_batch:
            if datetime.now() - time_counter > timedelta(milliseconds=capture_delay):
                # assign valid image id and name
                image_ids = [int(img.replace(img_prefix, "").replace(f".{img_extension}", ""))
                             for img in os.listdir(folder)
                             if img.startswith(img_prefix) and img.endswith(f".{img_extension}")]
                if img_next_id in image_ids:
                    # assign next id
                    img_next_id = max(image_ids) + 1
                filepath = os.path.join(folder, f"{img_prefix}{img_next_id}.{img_extension}")
                # Save the ROI image
                cv2.imwrite(filepath, roi)
                image_counter += 1
                time_counter = datetime.now()
                square_color = ROI_SAVING
            else:
                square_color = ROI_CAPTURING
        elif image_counter == frames_batch:
            # reinit counter for next capture
            is_capturing = False
            image_counter = 0

    # Playing Mode ---------------------------------------------------------------------------------
    elif is_playing:
        square_color = ROI_PLAYING

        # Convert and resize ROI as used by the keras model
        roi_rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
        roi_resized = cv2.resize(roi_rgb, model_input_size, interpolation=cv2.INTER_AREA)
        roi_normalized = roi_resized / 255.0
        roi_expanded = np.expand_dims(roi_normalized, axis=0)

        # Classify the ROI image
        prediction_prob = model.predict(roi_expanded, verbose=0)
        predicted_class_index = np.argmax(prediction_prob)
        predicted_class_label = class_labels[predicted_class_index]
        predicted_class_prob = prediction_prob[0][predicted_class_index]

        # Display prediction
        print(prediction_prob, end='\r')
        if predicted_class_prob < 0.75:
            label = 'Undefined'
        else:
            label = f'{predicted_class_label} ({predicted_class_prob:.2f})'
        text_height = 20
        position = x, y + h + text_height
        cv2.putText(frame, label, position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, ROI_PLAYING, 1, cv2.LINE_AA)

    # Moving ROI -----------------------------------------------------------------------------------
    elif is_moving_roi:
        square_color = ROI_SELECTION

    # Default Mode ---------------------------------------------------------------------------------
    else:
        square_color = ROI_DEFAULT

    # Draw square around the ROI -------------------------------------------------------------------
    cv2.rectangle(frame, (x, y), (x + w, y + h), square_color, 2)

    # Show the predicted class label below ROI -----------------------------------------------------
    if predicted_class_label:
        position = x, y + h + 20
        cv2.putText(frame, predicted_class_label, position, log_font, log_scale, ROI_PLAYING, log_thickness, cv2.LINE_AA)

    # Show help at the top left ---------------------------------------------------------------------
    if show_help:
        text = ['LMB: Move ROI', 'C: Toggle Capture Frames', 'P: Toggle Play Mode', 'T: Toggle Threshold Use',
                '+/-: Adjust Threshold Amount', 'L: Toggle Log', 'H: Toggle Help', 'ESC: Exit current mode',
                'Q: Quit the program']
        y = 20
        for line in text:
            position = 10, y
            cv2.putText(frame, line, position, log_font, log_scale, log_color, log_thickness, cv2.LINE_AA)
            y += 20
        position = 10, 10

    # Show log at the bottom left ------------------------------------------------------------------
    if show_log:
        text = f"ROI: {x}, {y}, {x+w}, {y+h} | Mode: {mode} | Threshold: {'On' if use_threshold else 'Off'} / {threshold_amount} | 'H': Toggle Help"
        position = 10, frame.shape[0] - 10
        cv2.putText(frame, text, position, log_font, log_scale, log_color, log_thickness, cv2.LINE_AA)

    # Display the frame
    cv2.imshow(window_name, frame)

# Save settings
if settings_path:
    try:
        with open(settings_path, "w") as f:
            f.write(f"{x} {y} {w} {h} {threshold_amount}")
    except Exception as e:
        print(f"Unable to save settings to file: {e}")

# Release the webcam and close all windows
cap.release()
cv2.destroyAllWindows()




[[0.02090589 0.9201723  0.05892182]]854e-05]]5]]