In [None]:
#how to run the model (pytorch)
import torch
from ultralytics import YOLO
checkpoint = torch.load("yolov8n-seg.pt", map_location=torch.device("cpu"), weights_only=False)
print(checkpoint.keys())
model = YOLO("yolov8n-seg.pt")
results = model("download.jpg")
results[0].show()

In [None]:
#edit the model to add the layers
import torch
import torch.nn as nn
from ultralytics import YOLO
model = YOLO("yolov8n-seg.pt")
pytorch_model = model.model
pytorch_model.new_layer = nn.Linear(100, 10)
torch.save(pytorch_model.state_dict(), "modified_model.pt")
print("Modified model saved!")

In [None]:
#see the model
from ultralytics import YOLO

model = YOLO("yolov8n-seg.pt")
pytorch_model = model.model
print(pytorch_model)

In [None]:
import tensorflow as tf
interpreter = tf.lite.Interpreter(model_path="yolov8-face.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("Input Details:", input_details)
print("Output Details:", output_details)

In [None]:
#run face detection model (pytorch)
import torch
from ultralytics import YOLO

model = YOLO("yolov8-face.pt")
results = model("mon.jpg")
results[0].show()

results[0].save("resultpytroch.jpg")

In [None]:
import cv2
from ultralytics import YOLO

model = YOLO("yolov8-face.pt")
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

try:
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to capture image.")
            break

        results = model(frame)
        annotated_frame = results[0].plot()
        cv2.imshow("YOLOv8 Face Detection", annotated_frame)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q') or cv2.getWindowProperty("YOLOv8 Face Detection", cv2.WND_PROP_VISIBLE) < 1:
            break

finally:
    cap.release()
    cv2.destroyAllWindows()

In [None]:
#run face detection model (tflite)
import tensorflow as tf
import numpy as np
from PIL import Image
import cv2

image_path = "Picture1.png"
image = Image.open(image_path).convert("RGB")
input_data = np.expand_dims(image.resize((640, 640)), axis=0) / 255.0
input_data = input_data.astype(np.float32)

model_path = "yolov8-face.tflite"
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
outputs = interpreter.get_tensor(output_details[0]['index'])

detections = np.squeeze(outputs)
boxes = detections[:4, :]
scores = detections[4, :]

x_center, y_center, width, height = boxes
x1 = (x_center - width / 2) * image.width
y1 = (y_center - height / 2) * image.height
x2 = (x_center + width / 2) * image.width
y2 = (y_center + height / 2) * image.height

confidence_threshold = 0.5
filtered_indices = scores > confidence_threshold
x1, y1, x2, y2 = x1[filtered_indices], y1[filtered_indices], x2[filtered_indices], y2[filtered_indices]
scores = scores[filtered_indices]

def nms(boxes, scores, threshold):
    if len(boxes) == 0:
        return []
    x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
    areas = (x2 - x1) * (y2 - y1)
    order = scores.argsort()[::-1]
    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])
        w = np.maximum(0.0, xx2 - xx1)
        h = np.maximum(0.0, yy2 - yy1)
        intersection = w * h
        iou = intersection / (areas[i] + areas[order[1:]] - intersection)
        keep_indices = np.where(iou <= threshold)[0]
        order = order[keep_indices + 1]
    return keep

boxes_nms = np.stack([x1, y1, x2, y2], axis=1)
keep_indices = nms(boxes_nms, scores, 0.5)
boxes_nms = boxes_nms[keep_indices]
scores_nms = scores[keep_indices]

image_np = np.array(image)
image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
for i in range(len(boxes_nms)):
    x1, y1, x2, y2 = boxes_nms[i]
    cv2.rectangle(image_np, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
    cv2.putText(image_np, f"Face {scores_nms[i]:.2f}", (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

cv2.imwrite("tflite_image.jpg", image_np)
cv2.imshow("Result", image_np)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [12]:
with open("TinyTracker.tflite", "rb") as f:
    hex_data = f.read().hex()
c_array = "const unsigned char TinyTrackerS_tflite[] = {" + ", ".join(f"0x{hex_data[i:i+2]}" for i in range(0, len(hex_data), 2)) + "};"
with open("model_data.cc", "w") as f:
    f.write(c_array)

In [1]:
with open("TinyTrackerS.tflite", "rb") as f:
    model_data = f.read()

hex_array = ", ".join(f"0x{byte:02x}" for byte in model_data)
c_array = f"""#include <cstddef>
alignas(16) const unsigned char TinyTrackerS_tflite[] = {{
    {hex_array}
}};
const int TinyTrackerS_tflite_len = {len(model_data)};
"""

with open("model_data2.cc", "w") as f:
    f.write(c_array)


In [3]:
import tensorflow.lite as tflite

interpreter = tflite.Interpreter(model_path="TinyTrackerS.tflite")
interpreter.allocate_tensors()

ops = set()
for op in interpreter._get_ops_details():
    ops.add(op["op_name"])

print(f"Required Ops: {ops}")
print(f"Total Ops Needed: {len(ops)}")


Required Ops: {'TANH', 'DEQUANTIZE', 'CONV_2D', 'DELEGATE', 'FULLY_CONNECTED', 'DEPTHWISE_CONV_2D', 'CONCATENATION', 'MEAN'}
Total Ops Needed: 8


In [4]:
import tensorflow as tf
import numpy as np

# Load your TFLite model
model_path = "TinyTrackerS.tflite"  # Update with your actual model path
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

# Get input details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Print input details
for i, inp in enumerate(input_details):
    print(f"Input {i}:")
    print(f"  Shape: {inp['shape']}")
    print(f"  Data Type: {inp['dtype']}")
    print(f"  Scale: {inp['quantization'][0]}")   # Scale factor
    print(f"  Zero Point: {inp['quantization'][1]}")  # Zero point


Input 0:
  Shape: [ 1 96 96  1]
  Data Type: <class 'numpy.int8'>
  Scale: 0.007048431318253279
  Zero Point: -5


In [5]:
# Check input shape
input_shape = input_details[0]['shape']
channels = input_shape[-1] if len(input_shape) == 4 else 1

print(channels);
if channels == 3:
    print("Model expects RGB/BGR images (3 channels).")
elif channels == 1:
    print("Model expects grayscale images (1 channel).")
else:
    print("Unexpected input format:", input_shape)


1
Model expects grayscale images (1 channel).


In [6]:
for i, out in enumerate(output_details):
    print(f"Output {i}:")
    print(f"  Shape: {out['shape']}")
    print(f"  Data Type: {out['dtype']}")
    print(f"  Scale: {out['quantization'][0]}")
    print(f"  Zero Point: {out['quantization'][1]}")


Output 0:
  Shape: [1 2]
  Data Type: <class 'numpy.float32'>
  Scale: 0.0
  Zero Point: 0


In [7]:
import numpy as np

# Create a test input (black image, grayscale)
test_input = np.zeros(input_details[0]['shape'], dtype=np.float32)  # Start as float32

# Quantize the input: (Pixel - Zero Point) / Scale
scale = input_details[0]['quantization'][0]
zero_point = input_details[0]['quantization'][1]
test_input = np.round(test_input / scale + zero_point).astype(np.int8)  # Convert to int8

# Set input and run inference
interpreter.set_tensor(input_details[0]['index'], test_input)
interpreter.invoke()

# Get and print output
output = interpreter.get_tensor(output_details[0]['index'])
print("Raw output:", output)


Raw output: [[-0.109375  0.140625]]


In [None]:
import cv2  # Install with: pip install opencv-python
import numpy as np

def preprocess_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
    img = cv2.resize(img, (96, 96))  # Resize to model's input size
    img = img.astype(np.float32)  # Convert to float32

    # Apply quantization (use the scale & zero point from the model)
    scale = 0.007048
    zero_point = -5
    img = np.round(img / scale + zero_point).astype(np.int8)  # Quantize to int8

    # Add batch dimension (1, 96, 96, 1)
    img = np.expand_dims(img, axis=(0, -1))
    return img

# Example usage
image_path = "egp.jpg"  # Replace with an actual test image
processed_img = preprocess_image(image_path)

# Run inference
interpreter.set_tensor(input_details[0]['index'], processed_img)
interpreter.invoke()
output = interpreter.get_tensor(output_details[0]['index'])
print("Inference Output:", output)


In [None]:
import sys
!{sys.executable} -m pip install pyserial pyautogui screeninfo

In [None]:
try:
    import serial
    import pyautogui
    from screeninfo import get_monitors
    print("✅ All packages loaded successfully!")
except ImportError as e:
    print(f"❌ Missing package: {e.name}")

In [None]:
import sys
!{sys.executable} -m pip install keyboard

In [2]:
import sys
import time
import threading
import serial
import pyautogui
import keyboard
from screeninfo import get_monitors

# Flag to control the loop
running = True

# Thread to listen for any key press
def listen_for_keypress():
    global running
    keyboard.read_event()  # Wait for any key
    print("\nKey pressed. Stopping...")
    running = False

# Start the key listening thread
threading.Thread(target=listen_for_keypress, daemon=True).start()

# Serial setup
try:
    ser = serial.Serial(
        port='COM6',
        baudrate=115200,
        timeout=0.1
    )
except serial.SerialException as e:
    print(f"Serial error: {e}")
    print("Available ports:")
    import serial.tools.list_ports
    print([p.device for p in serial.tools.list_ports.comports()])
    sys.exit(1)

screen = get_monitors()[0]
print(f"Screen: {screen.width}x{screen.height}")

def map_gaze(x, y):
    return (
        int((x + 1) * screen.width / 2),
        int((1 - y) * screen.height / 2)
    )

print("Running gaze mouse... (Press any key or Ctrl+C to stop)")

try:
    while running:
        try:
            line = ser.readline().decode('utf-8').strip()
        except UnicodeDecodeError:
            continue  # skip bad data
        if not line:
            continue

        print(f"Output: {line}")
        if line.startswith("Gaze:"):
            try:
                _, data = line.split(':')
                data = data.strip().strip('()')
                x, y = map(float, data.split(','))
                mouse_x, mouse_y = map_gaze(x, y)
                pyautogui.moveTo(mouse_x, mouse_y)
            except ValueError:
                print(f"Parse error: {line}")


except KeyboardInterrupt:
    print("\nCtrl+C detected. Stopping...")
    running = False

except Exception as e:
    print(f"Error: {e}")

finally:
    ser.close()
    print("Serial connection closed")

Screen: 1920x1080
Running gaze mouse... (Press any key or Ctrl+C to stop)
Output: DRAM: 71468/323148 | PSRAM: 141524/4194304
Output: FPS: 2.5 | Capture: 15µs | Quant: 2ms | Inference: 402ms | Loop: 405ms
Output: Gaze: (0.39, 0.27)
Output: DRAM: 71468/323148 | PSRAM: 141524/4194304
Output: FPS: 2.5 | Capture: 15µs | Quant: 2ms | Inference: 401ms | Loop: 406ms
Output: Gaze: (0.40, 0.20)
Output: DRAM: 71468/323148 | PSRAM: 141524/4194304
Output: FPS: 2.5 | Capture: 16µs | Quant: 2ms | Inference: 402ms | Loop: 405ms
Output: Gaze: (0.38, 0.20)
Output: DRAM: 71468/323148 | PSRAM: 141524/4194304
Output: FPS: 2.5 | Capture: 15µs | Quant: 2ms | Inference: 402ms | Loop: 405ms
Output: Gaze: (0.39, 0.28)
Output: DRAM: 71468/323148 | PSRAM: 141524/4194304
Output: FPS: 2.5 | Capture: 16µs | Quant: 2ms | Inference: 402ms | Loop: 405ms
Output: Gaze: (0.32, 0.24)
Output: DRAM: 71468/323148 | PSRAM: 141524/4194304
Output: FPS: 2.5 | Capture: 15µs | Quant: 2ms | Inference: 402ms | Loop: 405ms
Output: Gaz

In [1]:
import keyboard
import os
import time

def open_virtual_keyboard():
    os.system("osk.exe")


keyboard_opened = False 
while True:
    if keyboard.is_pressed('a') and not keyboard_opened:
        print("Key 'a' pressed, opening virtual keyboard...")
        open_virtual_keyboard()
        time.sleep(1)

    if keyboard.is_pressed('q'):
        print("Key 'q' pressed, stopping program...")
        break

    time.sleep(0.1)

Key 'q' pressed, stopping program...


In [None]:
a