UI Element Detection Script

# To install all required dependencies, run
          !pip install -r requirements.txt

In [8]:
import cv2
import os
import numpy as np
import pytesseract

In [17]:
import pytesseract


pytesseract.pytesseract.tesseract_cmd = "/usr/local/bin/tesseract "


 ## Features

 Detects UI elements (buttons, icons, toolbars) in extracted frames from videos.
Uses edge detection, contour extraction, and text region filtering to identify UI elements.
 Saves processed frames with highlighted UI elements (bounding boxes and labels).

## The detect_ui_elements(image) function:

 Converts the image to grayscale and applies Gaussian blur to reduce noise.
 Applies adaptive thresholding to enhance contrast.
 Uses Canny edge detection to identify edges in the image.
 Applies morphological transformations (dilation and erosion) to refine the edges.
 Extracts contours (shapes) from the processed image.


In [None]:
import os
import cv2
import numpy as np
import pytesseract


video_directory = "/Users/nvaishnavi/Documents/Instructional_Video_analysis/7_extracted_frames"
output_directory = "/Users/nvaishnavi/Documents/Instructional_video_analysis/10_final_ui_elements_detection"


os.makedirs(output_directory, exist_ok=True)


def detect_ui_elements(image):
    """Detects UI elements (buttons, icons, toolbars) based on edges, contours, and color, skipping text regions."""
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred_image = cv2.GaussianBlur(gray_image, (5, 5), 0)  # Reduce noise
    
   
    thresh = cv2.adaptiveThreshold(blurred_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                  cv2.THRESH_BINARY, 11, 2)

   
    edges = cv2.Canny(thresh, 50, 150)  
    
    kernel = np.ones((3, 3), np.uint8)
    dilated = cv2.dilate(edges, kernel, iterations=1)  
    eroded = cv2.erode(dilated, kernel, iterations=1)  

    
    contours, _ = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

   
    text_regions = detect_text_regions(image)

    detected = image.copy()  
    
    for contour in contours:
       
        if cv2.contourArea(contour) < 500:  
            continue
        
       
        x, y, w, h = cv2.boundingRect(contour)

        
        if is_contour_in_text_region(x, y, w, h, text_regions):
            continue

    
        aspect_ratio = float(w) / h
        if aspect_ratio < 0.2 or aspect_ratio > 5.0:
            continue  
        
        cv2.rectangle(detected, (x, y), (x + w, y + h), (0, 255, 0), 2)

      
        cv2.putText(detected, "UI Element", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    return detected

def detect_text_regions(image):
    """Detect text regions using Tesseract OCR."""
    
    h, w, _ = image.shape
    text_boxes = pytesseract.image_to_boxes(image)  
    
    
    text_regions = []
    for box in text_boxes.splitlines():
        b = box.split()
        x, y, x2, y2 = int(b[1]), int(b[2]), int(b[3]), int(b[4])
        text_regions.append((x, y, x2, y2))
    
    return text_regions

def is_contour_in_text_region(x, y, w, h, text_regions):
    """Check if a contour is inside any text region."""
    for (tx, ty, tx2, ty2) in text_regions:
        if (x + w > tx and x < tx2 and y + h > ty and y < ty2):
            return True  
    return False


for video_folder in os.listdir(video_directory):
    video_path = os.path.join(video_directory, video_folder)

    if not os.path.isdir(video_path):
        continue


    output_subfolder = os.path.join(output_directory, video_folder)
    os.makedirs(output_subfolder, exist_ok=True)


    for frame_name in os.listdir(video_path):
        frame_path = os.path.join(video_path, frame_name)

        
        image = cv2.imread(frame_path)
        if image is None:
            continue

        
        processed_image = detect_ui_elements(image)

        
        output_frame_path = os.path.join(output_subfolder, frame_name)
        cv2.imwrite(output_frame_path, processed_image)

print("UI element detection (buttons, icons, toolbars) completed.")

