# Maze Detection

In [2]:
import cv2
import time
import numpy as np

In [3]:
# def display_video(cap):
#     prev_t = time.time()
#     win_name = 'Camera Stream + Maze Detection'

#     while True:
#         ret, frame = cap.read()
#         if not ret:
#             print("Can't receive frame. Exiting...")
#             break

#         # Image processing pipeline
#         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#         blur = cv2.GaussianBlur(gray, (5,5), 0)
#         thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
#                                        cv2.THRESH_BINARY_INV, 11, 2)

#         # Show threshold image for debugging
#         cv2.imshow("Threshold", thresh)

#         # Find and draw all contours first
#         contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#         # Draw all contours in blue
#         cv2.drawContours(frame, contours, -1, (255, 0, 0), 1)

#         if contours:
#             # Find the largest contour
#             maze_contour = max(contours, key=cv2.contourArea)
#             # Draw the largest contour in red
#             cv2.drawContours(frame, [maze_contour], -1, (0, 0, 255), 2)

#             epsilon = 0.02 * cv2.arcLength(maze_contour, True)
#             approx = cv2.approxPolyDP(maze_contour, epsilon, True)

#             # If we find a quadrilateral
#             if len(approx) == 4:
#                 # Draw the approximated quadrilateral in green
#                 cv2.drawContours(frame, [approx], -1, (0, 255, 0), 3)

#                 # Draw corner points
#                 for point in approx:
#                     x, y = point[0]
#                     cv2.circle(frame, (x, y), 5, (255, 255, 0), -1)

#                 # Perform perspective transform
#                 pts = np.float32([p[0] for p in approx])
#                 dst = np.float32([[0,0], [500,0], [500,500], [0,500]])
#                 M = cv2.getPerspectiveTransform(pts, dst)
#                 warp = cv2.warpPerspective(thresh, M, (500,500))
#                 imgShow("Maze (Warped)", warp)

#                 # Add text to indicate detection
#                 cv2.putText(frame, "Maze Detected", (10, 30),
#                             cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
#             else:
#                 # Add text to show number of corners
#                 cv2.putText(frame, f"Corners: {len(approx)}", (10, 30),
#                             cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

#         # Show the processed frame
#         imgShow(win_name, frame)

#         # Break loop on 'q' press
#         if cv2.waitKey(1) & 0xFF == ord('q'):
#             break

#     cap.release()
#     cv2.destroyAllWindows()

In [2]:


def detect_maze_structure(warp):
    """Detect and visualize internal maze walls"""
    # Create a color version for visualization
    warp_color = cv2.cvtColor(warp, cv2.COLOR_GRAY2BGR)
    
    # Find all contours in the warped image (internal walls)
    contours, hierarchy = cv2.findContours(warp, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    # Draw all detected contours
    cv2.drawContours(warp_color, contours, -1, (0, 255, 0), 2)
    
    # Detect corners (potential intersections)
    corners = cv2.goodFeaturesToTrack(warp, maxCorners=100, qualityLevel=0.01,
                                      minDistance=10, blockSize=5)
    
    if corners is not None:
        corners = corners.astype(int)
        for corner in corners:
            x, y = corner.ravel()
            cv2.circle(warp_color, (x, y), 5, (255, 0, 0), -1)
    
    # Add text showing number of detected features
    cv2.putText(warp_color, f"Walls: {len(contours)}", (10, 30),
               cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
    if corners is not None:
        cv2.putText(warp_color, f"Corners: {len(corners)}", (10, 60),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
    
    return warp_color


def sort_corners(pts):
    """Sort corners to be in order: top-left, top-right, bottom-right, bottom-left"""
    # Calculate center point
    center = pts.mean(axis=0)
    
    # Sort by angle from center
    angles = np.arctan2(pts[:, 1] - center[1], pts[:, 0] - center[0])
    sorted_indices = np.argsort(angles)
    sorted_pts = pts[sorted_indices]
    
    # Find top-left (smallest x+y sum)
    sums = sorted_pts[:, 0] + sorted_pts[:, 1]
    top_left_idx = np.argmin(sums)
    
    # Rotate array so top-left is first
    sorted_pts = np.roll(sorted_pts, -top_left_idx, axis=0)
    
    return sorted_pts


def convert_maze_to_grid(warp, cell_size=25):
    """Convert the warped maze image to a grid representation for pathfinding
    Returns: 2D numpy array where 0 = walkable path, 1 = wall
    """
    h, w = warp.shape
    grid_h = h // cell_size
    grid_w = w // cell_size
    
    maze_grid = np.zeros((grid_h, grid_w), dtype=int)
    
    for i in range(grid_h):
        for j in range(grid_w):
            # Sample the center of each cell
            y_start = i * cell_size
            y_end = min((i + 1) * cell_size, h)
            x_start = j * cell_size
            x_end = min((j + 1) * cell_size, w)
            
            # Get the cell region
            cell_region = warp[y_start:y_end, x_start:x_end]
            
            # If the cell is mostly white (wall in binary image), mark as 1
            if cell_region.size > 0:
                white_percentage = np.sum(cell_region > 127) / cell_region.size
                if white_percentage > 0.3:  # 30% threshold
                    maze_grid[i, j] = 1  # Wall
                else:
                    maze_grid[i, j] = 0  # Path
    
    return maze_grid


def visualize_grid(maze_grid, original_warp):
    """Visualize the grid representation overlaid on the original image"""
    h, w = original_warp.shape
    grid_h, grid_w = maze_grid.shape
    cell_size = h // grid_h
    
    # Create color version
    visual = cv2.cvtColor(original_warp, cv2.COLOR_GRAY2BGR)
    
    # Draw grid lines
    for i in range(grid_h + 1):
        y = i * cell_size
        cv2.line(visual, (0, y), (w, y), (100, 100, 100), 1)
    
    for j in range(grid_w + 1):
        x = j * cell_size
        cv2.line(visual, (x, 0), (x, h), (100, 100, 100), 1)
    
    # Color code the cells
    for i in range(grid_h):
        for j in range(grid_w):
            y_center = i * cell_size + cell_size // 2
            x_center = j * cell_size + cell_size // 2
            
            if maze_grid[i, j] == 1:
                # Wall - red dot
                cv2.circle(visual, (x_center, y_center), 5, (0, 0, 255), -1)
            else:
                # Path - green dot
                cv2.circle(visual, (x_center, y_center), 5, (0, 255, 0), -1)
    
    return visual

## Display Video

In [3]:
def display_video(cap):
    prev_t = time.time()
    win_name = 'Camera Stream + Maze Detection'
    
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Can't receive frame. Exiting...")
            break

        # Create a copy for visualization
        display_frame = frame.copy()
        
        # Convert to grayscale and apply preprocessing
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        blur = cv2.GaussianBlur(gray, (5, 5), 0)
        
        # Use adaptive threshold to detect dark maze walls
        thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                                       cv2.THRESH_BINARY_INV, 15, 5)
        
        # Apply morphological operations to clean up the image
        kernel = np.ones((5, 5), np.uint8)
        thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
        thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
        
        # Remove small noise
        kernel_small = np.ones((3, 3), np.uint8)
        thresh = cv2.erode(thresh, kernel_small, iterations=1)
        thresh = cv2.dilate(thresh, kernel_small, iterations=1)

        # Find contours - get all wall segments
        contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        
        # Filter contours by area and aspect ratio to remove noise
        min_area = 200
        max_area = frame.shape[0] * frame.shape[1] * 0.8  # Max 80% of frame
        valid_contours = []
        for c in contours:
            area = cv2.contourArea(c)
            if min_area < area < max_area:
                # Filter by aspect ratio to remove very thin noise
                x, y, w, h = cv2.boundingRect(c)
                aspect_ratio = max(w, h) / (min(w, h) + 1)
                if aspect_ratio < 20:  # Remove very elongated contours
                    valid_contours.append(c)
        
        # Draw all valid wall contours
        cv2.drawContours(display_frame, valid_contours, -1, (0, 255, 255), 2)
        
        # Try to find the largest rectangular boundary that contains the maze
        if valid_contours:
            # Get bounding rectangle of all contours combined
            all_points = np.vstack(valid_contours)
            rect = cv2.minAreaRect(all_points)
            box = cv2.boxPoints(rect)
            box = box.astype(int)
            
            # Draw the detected maze boundary
            cv2.drawContours(display_frame, [box], 0, (0, 255, 0), 3)
            
            # Draw corner points
            for point in box:
                cv2.circle(display_frame, tuple(point), 8, (0, 255, 255), -1)
            
            # Add status text
            cv2.putText(display_frame, "Maze Detected", (10, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(display_frame, f"Walls: {len(valid_contours)}", (10, 70),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
            
            # Perform perspective transform on the detected region
            width = int(rect[1][0])
            height = int(rect[1][1])
            
            # Ensure width > height for proper orientation
            if width < height:
                width, height = height, width
            
            # Sort corners for perspective transform
            pts = sort_corners(box.astype(np.float32))
            dst = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
            
            M = cv2.getPerspectiveTransform(pts, dst)
            warp = cv2.warpPerspective(thresh, M, (width, height))
            
            # Resize warped image for consistent processing
            warp_resized = cv2.resize(warp, (500, 500))
            
            # Apply additional cleaning to warped image
            warp_cleaned = cv2.morphologyEx(warp_resized, cv2.MORPH_OPEN, 
                                           np.ones((3, 3), np.uint8), iterations=1)
            
            # Convert to grid for pathfinding
            maze_grid = convert_maze_to_grid(warp_cleaned, cell_size=25)
            
            # Visualize the grid
            grid_visual = visualize_grid(maze_grid, warp_cleaned)
            
            # Display the warped maze and grid
            cv2.imshow("Warped Maze", warp_cleaned)
            cv2.imshow("Pathfinding Grid", grid_visual)
        
        # Calculate and display FPS
        curr_t = time.time()
        fps = 1 / (curr_t - prev_t)
        prev_t = curr_t
        cv2.putText(display_frame, f"FPS: {fps:.1f}", (10, 110),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
        
        # Show threshold for debugging
        cv2.imshow("Threshold", thresh)
        
        cv2.imshow(win_name, display_frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [5]:
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 640)
display_video(cap)

qt.qpa.plugin: Could not find the Qt platform plugin "wayland" in "/home/ibraheem/ras545/.venv/lib/python3.13/site-packages/cv2/qt/plugins"


KeyboardInterrupt: 

# Get Grid

In [4]:
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 640)

True

In [5]:
import matplotlib.pyplot as plt

def imgShow(title, img):
    """Display image in Jupyter notebook with proper color handling"""
    plt.figure(figsize=(10, 8))
    
    # Check if image is grayscale or color
    if len(img.shape) == 2:
        # Grayscale image
        plt.imshow(img, cmap='gray')
    else:
        # Color image - convert BGR to RGB
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.imshow(img_rgb)
    
    plt.title(title)
    plt.axis("off")
    plt.show()


In [None]:
win_name = 'Camera Stream + Maze Detection'
ret, frame = cap.read()

# Create a copy for visualization
display_frame = frame.copy()

# Convert to grayscale and apply preprocessing
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)

# Use adaptive threshold to detect dark maze walls
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                                cv2.THRESH_BINARY_INV, 15, 5)

# Apply morphological operations to clean up the image
kernel = np.ones((5, 5), np.uint8)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)

# Remove small noise
kernel_small = np.ones((3, 3), np.uint8)
thresh = cv2.erode(thresh, kernel_small, iterations=1)
thresh = cv2.dilate(thresh, kernel_small, iterations=1)

# Find contours - get all wall segments
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

print(contours)
print(hierarchy)

# Filter contours by area and aspect ratio to remove noise
min_area = 200
max_area = frame.shape[0] * frame.shape[1] * 0.8  # Max 80% of frame
valid_contours = []
for c in contours:
    area = cv2.contourArea(c)
    if min_area < area < max_area:
        # Filter by aspect ratio to remove very thin noise
        x, y, w, h = cv2.boundingRect(c)
        aspect_ratio = max(w, h) / (min(w, h) + 1)
        if aspect_ratio < 20:  # Remove very elongated contours
            valid_contours.append(c)

# Draw all valid wall contours
cv2.drawContours(display_frame, valid_contours, -1, (0, 255, 255), 2)

# Try to find the largest rectangular boundary that contains the maze
if valid_contours:
    # Get bounding rectangle of all contours combined
    all_points = np.vstack(valid_contours)
    rect = cv2.minAreaRect(all_points)
    box = cv2.boxPoints(rect)
    box = box.astype(int)
    
    # Draw the detected maze boundary
    cv2.drawContours(display_frame, [box], 0, (0, 255, 0), 3)
    
    # Draw corner points
    for point in box:
        cv2.circle(display_frame, tuple(point), 8, (0, 255, 255), -1)
    
    # Add status text
    cv2.putText(display_frame, "Maze Detected", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(display_frame, f"Walls: {len(valid_contours)}", (10, 70),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
    
    # Perform perspective transform on the detected region
    width = int(rect[1][0])
    height = int(rect[1][1])
    
    # Ensure width > height for proper orientation
    if width < height:
        width, height = height, width
    
    # Sort corners for perspective transform
    pts = sort_corners(box.astype(np.float32))
    dst = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
    
    M = cv2.getPerspectiveTransform(pts, dst)
    warp = cv2.warpPerspective(thresh, M, (width, height))
    
    # Resize warped image for consistent processing
    warp_resized = cv2.resize(warp, (500, 500))
    
    # Apply additional cleaning to warped image
    warp_cleaned = cv2.morphologyEx(warp_resized, cv2.MORPH_OPEN, 
                                    np.ones((3, 3), np.uint8), iterations=1)
    
    # Convert to grid for pathfinding
    maze_grid = convert_maze_to_grid(warp_cleaned, cell_size=25)
    
    # Visualize the grid
    grid_visual = visualize_grid(maze_grid, warp_cleaned)
    
    # Display the warped maze and grid
    imgShow("Warped Maze", warp_cleaned)
    imgShow("Pathfinding Grid", grid_visual)


# Show threshold for debugging
imgShow("Threshold", thresh)

imgShow(win_name, display_frame)

(array([[[342, 465]],

       [[342, 479]],

       [[356, 479]],

       [[356, 465]]], dtype=int32), array([[[312, 463]],

       [[312, 471]],

       [[313, 472]],

       [[314, 472]],

       [[315, 473]],

       [[315, 479]],

       [[328, 479]],

       [[328, 475]],

       [[327, 475]],

       [[326, 474]],

       [[326, 473]],

       [[325, 473]],

       [[324, 472]],

       [[323, 472]],

       [[322, 471]],

       [[322, 463]]], dtype=int32), array([[[576, 448]],

       [[576, 462]],

       [[575, 463]],

       [[572, 463]],

       [[572, 471]],

       [[571, 472]],

       [[558, 472]],

       [[558, 479]],

       [[615, 479]],

       [[615, 475]],

       [[614, 474]],

       [[614, 466]],

       [[603, 466]],

       [[598, 461]],

       [[598, 460]],

       [[592, 454]],

       [[591, 454]],

       [[588, 451]],

       [[588, 450]],

       [[587, 449]],

       [[586, 449]],

       [[585, 448]]], dtype=int32), array([[[ 36, 412]],

       [[ 3

NameError: name 'sort_corners' is not defined

# Maze Solving

## Path Finding

# Maze Detection Using LLM

In [1]:
def just_display_video(cap):
    prev_t = time.time()
    win_name = 'Camera Stream'
    
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Can't receive frame. Exiting...")
            break

        # Show the processed frame
        cv2.imshow(win_name, frame)

        # Break loop on 'q' press
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [2]:
def capture_image(cap):
    ret, frame = cap.read()
    if not ret:
        raise Exception("Failed to capture image from camera.")
    # Convert numpy array to JPEG bytes
    success, encoded_image = cv2.imencode('.jpg', frame)
    if not success:
        return None
    
    return encoded_image.tobytes(), frame

In [3]:
from openai import OpenAI
from dotenv import load_dotenv
import os
import base64
import threading
import cv2
import time

load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")

print("Using API Key:", api_key)

client = OpenAI(api_key=api_key)

cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 640)

threading.Thread(target=just_display_video, args=(cap,)).start()

Using API Key: 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


qt.qpa.plugin: Could not find the Qt platform plugin "wayland" in "/home/ibraheem/ras545/.venv/lib/python3.13/site-packages/cv2/qt/plugins"


In [14]:
image_bytes, frame = capture_image(cap)

In [18]:
input_prompt = """
Task: 
Analyze the following Image of a 5x5 maze. 
And produce an output as a graph.
The graph should represent walls and paths.


Important Information:
- The maze will always be a square.
- The maze will have multiple possible paths.
- Treat each square as a node
- Squares connected by open paths should have edges between them.

Instructions:
1. Determine the maze structure
2. Determine the entry/exit points
3. Determine the size of the maze in terms of number of cells
4. Determine the structure of the graph representing walls and paths.

"""
response = client.responses.create(
    model="gpt-4o", 
    input=[
        {
            "role": "user",
            "content": [
                {"type": "input_text", "text": input_prompt},
                {"type": "input_image", "image_url": "data:image/jpeg;base64," + base64.b64encode(image_bytes).decode('utf-8')}
            ]
        }
    ]
)

print(response.output_text)

To analyze the 5x5 maze image, let's follow the instructions:

1. **Determine the Maze Structure:**
   - The maze consists of walls (thick lines) and open paths (spaces between the lines).
   - It's a 5x5 grid, with each cell either being part of the path or a wall enclosing paths.

2. **Determine the Entry/Exit Points:**
   - Entry points appear to be at open spots on the outer boundary of the maze. This maze seems to have entry/exit points for a potential path.

3. **Determine the Size of the Maze:**
   - The maze is 5x5, meaning there are 25 cells in total.

4. **Determine the Structure of the Graph:**
   - Each cell in the grid can be a node in the graph.
   - Nodes (cells) are connected if there is no wall between them.
   
Here is the representation of the graph structure:

- Create nodes for each cell, labeled by their row and column (e.g., node (0,0) for the top-left cell).
- For each node, establish edges with adjacent nodes if no wall exists between them.
  
Graph Edges (Conn

### Test OpenAI Client

In [8]:
response = client.responses.create(
    model="gpt-4o", 
    input=[
        {
            "role": "user",
            "content": [
                {"type": "input_text", "text": "Ay yo just checking in"}
            ]
        }
    ]
)

print(response)

Response(id='resp_0c9e5614bc13d9f50068f9ca36a6e4819385e953577e4a6ac0', created_at=1761200694.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='gpt-4o-2024-08-06', object='response', output=[ResponseOutputMessage(id='msg_0c9e5614bc13d9f50068f9ca3869408193ab2fb695332868db', content=[ResponseOutputText(annotations=[], text="Hey! What's up? How's everything going?", type='output_text', logprobs=[])], role='assistant', status='completed', type='message')], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[], top_p=1.0, background=False, conversation=None, max_output_tokens=None, max_tool_calls=None, previous_response_id=None, prompt=None, prompt_cache_key=None, reasoning=Reasoning(effort=None, generate_summary=None, summary=None), safety_identifier=None, service_tier='default', status='completed', text=ResponseTextConfig(format=ResponseFormatText(type='text'), verbosity='medium'), top_logprobs=0, truncation='disabled', usage=ResponseUsage(inp

# Move Dobot

In [1]:
import pydobot

In [2]:
device = pydobot.Dobot(port='/dev/ttyACM0')

Clearing alarms: 0.


In [3]:
device.home()

6