In [1]:
from ultralytics import YOLO
import cv2
import os
import numpy as np
import torch

## Professor Masking

In [None]:
# Load model
model = YOLO('yolo11m-seg.pt') 
model.to('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Model loaded on {model.device}")


# Create output directories if they don't exist
os.makedirs('output', exist_ok=True)

# Output video paths and parameters
output_path = 'output/annotated_output.mp4'
board_output_path = 'output/cleaned_board_output.mp4'
video_fps = 5

# Initialize video writers later
video_writer = None
board_video_writer = None

# Initialize old_frame for board
old_frame = None

# Define the board update function
def update_board(old_frame, new_frame, bbox):
    x1, y1, x2, y2 = map(int, bbox)
    mask = np.zeros(old_frame.shape[:2], dtype=np.uint8)
    mask[y1:y2, x1:x2] = 1
    mask_3ch = np.repeat(mask[:, :, np.newaxis], 3, axis=2)
    updated_board = old_frame * mask_3ch + new_frame * (1 - mask_3ch)
    return updated_board.astype(np.uint8)

# Start tracking
results = model.track(source='right.mp4', show=False, conf=0.3, iou=0.5, classes=[0], stream=True)

for i, result in enumerate(results):
    boxes = []
    if result.boxes is not None:
        boxes = result.boxes.xyxy.cpu().numpy()
        confidences = result.boxes.conf.cpu().numpy()
        class_ids = result.boxes.cls.cpu().numpy()

        # Save annotations
        with open('dataset.txt', 'a') as file, open(f'output/frame_{i:05d}.txt', 'w') as f:
            for cls_id, conf, (x1, y1, x2, y2) in zip(class_ids, confidences, boxes):
                line = f"{i} {cls_id} {conf:.4f} {x1:.2f} {y1:.2f} {x2:.2f} {y2:.2f}\n"
                file.write(line)
                f.write(line)

    # Get annotated and raw frames
    annotated_frame = result.plot()
    raw_frame = result.orig_img

    # Initialize video writers and board
    if video_writer is None:
        height, width = annotated_frame.shape[:2]
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        video_writer = cv2.VideoWriter(output_path, fourcc, video_fps, (width, height))
        board_video_writer = cv2.VideoWriter(board_output_path, fourcc, video_fps, (width, height))
        old_frame = raw_frame.copy()

    # Update board using box-free raw frame
    updated_board = old_frame.copy()
    for bbox in boxes:
        updated_board = update_board(updated_board, raw_frame, bbox)

    old_frame = updated_board.copy()

    # Write both videos
    video_writer.write(annotated_frame)
    board_video_writer.write(updated_board)

# Release resources
if video_writer:
    video_writer.release()
if board_video_writer:
    board_video_writer.release()

print(f"\nAnnotated video saved at: {output_path}")
print(f"Board video saved at: {board_output_path}")


video 1/1 (frame 1/5845) c:\Users\Yassin\Desktop\CV\prof_masking\right.mp4: 384x640 1 person, 202.5ms
video 1/1 (frame 2/5845) c:\Users\Yassin\Desktop\CV\prof_masking\right.mp4: 384x640 1 person, 161.1ms
video 1/1 (frame 3/5845) c:\Users\Yassin\Desktop\CV\prof_masking\right.mp4: 384x640 1 person, 160.3ms
video 1/1 (frame 4/5845) c:\Users\Yassin\Desktop\CV\prof_masking\right.mp4: 384x640 1 person, 161.9ms
video 1/1 (frame 5/5845) c:\Users\Yassin\Desktop\CV\prof_masking\right.mp4: 384x640 1 person, 151.6ms
video 1/1 (frame 6/5845) c:\Users\Yassin\Desktop\CV\prof_masking\right.mp4: 384x640 1 person, 164.9ms
video 1/1 (frame 7/5845) c:\Users\Yassin\Desktop\CV\prof_masking\right.mp4: 384x640 1 person, 159.4ms
video 1/1 (frame 8/5845) c:\Users\Yassin\Desktop\CV\prof_masking\right.mp4: 384x640 1 person, 162.0ms
video 1/1 (frame 9/5845) c:\Users\Yassin\Desktop\CV\prof_masking\right.mp4: 384x640 1 person, 160.7ms
video 1/1 (frame 10/5845) c:\Users\Yassin\Desktop\CV\prof_masking\right.mp4: 384x

## White Board to Smart Board Construction

In [3]:
def warp_board(frame, board_contour, output_size=(800, 600)):
    pts = board_contour.reshape(4, 2)

    # Sort points: [top-left, top-right, bottom-right, bottom-left]
    s = pts.sum(axis=1)
    diff = np.diff(pts, axis=1)

    rect = np.zeros((4, 2), dtype="float32")
    rect[0] = pts[np.argmin(s)]     # top-left
    rect[2] = pts[np.argmax(s)]     # bottom-right
    rect[1] = pts[np.argmin(diff)]  # top-right
    rect[3] = pts[np.argmax(diff)]  # bottom-left

    dst = np.array([
        [0, 0],
        [output_size[0] - 1, 0],
        [output_size[0] - 1, output_size[1] - 1],
        [0, output_size[1] - 1]], dtype="float32")

    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(frame, M, output_size)

    return warped


In [36]:
import cv2
import numpy as np

def find_board_hough_lines(frame):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(blurred, 50, 150, apertureSize=3)

    height, width = gray.shape

    lines = cv2.HoughLinesP(
        edges,
        rho=1,
        theta=np.pi / 180,
        threshold=80,
        minLineLength=100,
        maxLineGap=10
    )

    if lines is None:
        print("No lines detected")
        return None

    horizontals = []
    verticals = []

    for line in lines:
        for x1, y1, x2, y2 in line:
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            if abs(angle) < 20:  # horizontal-ish
                mid_y = (y1 + y2) / 2
                if 0.1 * height < mid_y < 0.9 * height:  # Keep only mid-height lines
                    horizontals.append((x1, y1, x2, y2))
            elif abs(angle) > 70:  # vertical-ish
                mid_x = (x1 + x2) / 2
                if 0.1 * width < mid_x < 0.9 * width:  # Keep only mid-width lines
                    verticals.append((x1, y1, x2, y2))

    if not horizontals or not verticals:
        print("Not enough horizontal or vertical lines after filtering")
        return None

    # Find extreme lines among filtered ones
    top_line = min(horizontals, key=lambda l: min(l[1], l[3]))  # smallest y
    bottom_line = max(horizontals, key=lambda l: max(l[1], l[3]))  # largest y
    left_line = min(verticals, key=lambda l: min(l[0], l[2]))  # smallest x
    right_line = max(verticals, key=lambda l: max(l[0], l[2]))  # largest x

    def compute_intersection(line1, line2):
        x1, y1, x2, y2 = line1
        x3, y3, x4, y4 = line2

        A1 = y2 - y1
        B1 = x1 - x2
        C1 = A1 * x1 + B1 * y1

        A2 = y4 - y3
        B2 = x3 - x4
        C2 = A2 * x3 + B2 * y3

        determinant = A1 * B2 - A2 * B1

        if determinant == 0:
            return None  # lines are parallel
        else:
            x = (B2 * C1 - B1 * C2) / determinant
            y = (A1 * C2 - A2 * C1) / determinant
            return np.array([x, y], dtype=np.float32)

    # Compute four corners
    tl = compute_intersection(top_line, left_line)
    tr = compute_intersection(top_line, right_line)
    br = compute_intersection(bottom_line, right_line)
    bl = compute_intersection(bottom_line, left_line)

    if any(p is None for p in (tl, tr, br, bl)):
        print("Failed to compute corners")
        return None

    return np.array([tl, tr, br, bl], dtype="float32")


In [None]:
# Paths
input_video_path = './updated_board_output.mp4'
output_video_path = 'smartboard_output/smartboard_output.mp4'
output_size = (800, 600)
video_fps = 5

# Open the cleaned video
cap = cv2.VideoCapture(input_video_path)

if not cap.isOpened():
    raise IOError(f"Cannot open video: {input_video_path}")

# Try finding board in first few frames
board_contour = None

while True:
    ret, frame = cap.read()
    if not ret:
        break

    board_contour = find_board_hough_lines(frame)
    if board_contour is not None:
        first_frame = frame.copy()
        break

if board_contour is None:
    raise ValueError("Whiteboard not detected in any frame")

# Visualize detected board
frame_copy = first_frame.copy()
cv2.polylines(frame_copy, [np.int32(board_contour)], isClosed=True, color=(0, 255, 0), thickness=3)
cv2.imwrite('smartboard_output/frame_with_detected_board.jpg', frame_copy)
print("Frame with detected board saved at: smartboard_output/frame_with_detected_board.jpg")

# Compute perspective transform
dst = np.array([
    [0, 0],
    [output_size[0] - 1, 0],
    [output_size[0] - 1, output_size[1] - 1],
    [0, output_size[1] - 1]
], dtype="float32")

M = cv2.getPerspectiveTransform(board_contour, dst)

# Prepare output video
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # Rewind to start
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
smartboard_writer = cv2.VideoWriter(output_video_path, fourcc, video_fps, output_size)

# Process all frames
while True:
    ret, frame = cap.read()
    if not ret:
        break

    warped_frame = cv2.warpPerspective(frame, M, output_size)
    smartboard_writer.write(warped_frame)

# Release resources
cap.release()
smartboard_writer.release()

print(f"Smartboard video saved at: {output_video_path}")


Frame with detected board saved at: smartboard_output/frame_with_detected_board.jpg
Smartboard video saved at: output/smartboard_output.mp4
