In [None]:
import tensorflow as tf
import numpy as np
import time
from cursor import Cursor
from pynput.mouse import Listener
import os
import pandas as pd
import cv2

model_name = "comfy-lion.h5"

class Webcam:
    def __init__(self):
        self.directory = ""
        self.cap = cv2.VideoCapture(0)

    def capture(self, id):
        """Take picture with the webcam and save the image under the directory stored in the attributes of the class with the passed id.

        Args:
            id (int): id of the picture
        """
        ret, frame = self.cap.read()

        if ret:
            filename = os.path.join(self.directory, f"{id}.jpg")
            cv2.imwrite(filename, frame)

    def release(self):
        """Release the webcam and destroy all windows."""
        self.cap.release()
        cv2.destroyAllWindows()

screen_x = 3840
screen_y = 2200

cursor = Cursor()
cam = Webcam()

model = tf.keras.models.load_model(model_name)

# def preprocess_image(image_path):
#     image = tf.io.read_file(image_path)
#     image = tf.image.decode_jpeg(image, channels=3)
#     image = tf.image.resize(image, (256, 256)) / 255.0
#     return tf.expand_dims(image, axis=0)  # Add batch dimension

# while True:
#     image_id = "42"
#     cam.capture(image_id)

#     image_path = f"{image_id}.jpg"
#     image = cv2.imread(image_path)
#     ... (preprocess here)
#     prediction = model.predict(image)
#     prediction = np.clip(prediction, 0.01, 0.99)

#     print("Predicted coordinates:", prediction[0])
#     cursor.set_position(prediction[0][0]*screen_x, prediction[0][1]*screen_y)
#     time.sleep(0.01)
#     # print("Prediction set")

while True:
    image_id = "42"
    cam.capture(image_id)

    image_path = f"{image_id}.jpg"
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Preprocessing (matching training)
    # image = cv2.resize(image, (350, 350)) # Resize to 350x350
    image = image / 255.0 # normalization

    image_expanded = np.expand_dims(image, axis=(0, 3))

    # Prediction
    prediction = model.predict(image_expanded)
    prediction = np.clip(prediction, 0.01, 0.99)

    print("Predicted coordinates:", prediction[0])
    cursor.set_position(prediction[0][0] * screen_x, prediction[0][1] * screen_y)
    time.sleep(0.01)

Predicted coordinates: [0.99 0.01]
Predicted coordinates: [0.99    0.01514]
Predicted coordinates: [0.99   0.1648]
Predicted coordinates: [0.6357 0.285 ]
Predicted coordinates: [0.5454 0.3403]
Predicted coordinates: [0.5415 0.3577]
Predicted coordinates: [0.5444 0.3506]
Predicted coordinates: [0.5537 0.3323]
Predicted coordinates: [0.558  0.3247]
Predicted coordinates: [0.56   0.3213]
Predicted coordinates: [0.5625 0.32  ]
Predicted coordinates: [0.5635 0.3184]
Predicted coordinates: [0.5635 0.3176]
Predicted coordinates: [0.5645 0.3162]
Predicted coordinates: [0.5645 0.3154]
Predicted coordinates: [0.57   0.3086]
Predicted coordinates: [0.5894 0.2952]
Predicted coordinates: [0.6113 0.2905]
Predicted coordinates: [0.6436 0.2832]
Predicted coordinates: [0.664  0.2795]
Predicted coordinates: [0.6797 0.2788]
Predicted coordinates: [0.6978 0.274 ]
Predicted coordinates: [0.709 0.27 ]
Predicted coordinates: [0.726  0.2625]
Predicted coordinates: [0.7334 0.259 ]
Predicted coordinates: [0.743

KeyboardInterrupt: 