Install required packages

In [None]:
!pip install easyocr
!pip install promptlib
!pip install progress

Import necessary libraries

In [None]:
import cv2  # OpenCV for image processing
import easyocr  # EasyOCR for text detection
import numpy as np  # NumPy for numerical operations
import pandas as pd  # Pandas for data manipulation
import glob  # Glob for file handling
import re  # Regular expressions for string processing
from collections import OrderedDict  # OrderedDict for maintaining order in dictionary
import string  # String module for string operations

Initialize EasyOCR Reader

In [None]:
reader = easyocr.Reader(['en'])

Framing Function

In [None]:
def getFrame(sec, file_name, count):
    """
    Function to extract frames from video.

    Args:
        sec (float): Time in seconds for the frame to be extracted.
        file_name (str): Path to the video file.
        count (int): Frame count.

    Returns:
        bool: True if frame extraction is successful, False otherwise.
    """
    vidcap = cv2.VideoCapture(file_name)
    vidcap.set(cv2.CAP_PROP_POS_MSEC, sec * 1000)
    hasFrames, image = vidcap.read()
    if hasFrames:
        cv2.imwrite(f"frame{count}.jpg", image)  # Save frame as JPG file
    return hasFrames


Preprocessing Functions

In [None]:
def hsv_conversion(image):
    """
    Convert BGR image to HSV color space.

    Args:
        image (numpy.ndarray): Input image.

    Returns:
        numpy.ndarray: HSV converted image.
    """
    hsv_frame = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    return hsv_frame

def get_green_mask(hsv_frame, image):
    """
    Generate mask for green color.

    Args:
        hsv_frame (numpy.ndarray): HSV converted image.
        image (numpy.ndarray): Input image.

    Returns:
        numpy.ndarray: Masked image with green color.
    """
    low_green = np.array([50, 25, 25])
    high_green = np.array([70, 255, 255])
    mask_green = cv2.inRange(hsv_frame, low_green, high_green)
    green = cv2.bitwise_and(image, image, mask=mask_green)
    return green

def get_yellow_mask(hsv_frame, image):
    """
    Generate mask for yellow color.

    Args:
        hsv_frame (numpy.ndarray): HSV converted image.
        image (numpy.ndarray): Input image.

    Returns:
        numpy.ndarray: Masked image with yellow color.
    """
    low_yellow = np.array([25, 80, 80])
    high_yellow = np.array([40, 255, 255])
    mask_yellow = cv2.inRange(hsv_frame, low_yellow, high_yellow)
    yellow = cv2.bitwise_and(image, image, mask=mask_yellow)
    return yellow

def get_red_mask(hsv_frame, image):
    """
    Generate mask for red color.

    Args:
        hsv_frame (numpy.ndarray): HSV converted image.
        image (numpy.ndarray): Input image.

    Returns:
        numpy.ndarray: Masked image with red color.
    """
    low_red1 = np.array([0, 70, 50])
    high_red1 = np.array([10, 255, 255])
    low_red2 = np.array([170, 70, 50])
    high_red2 = np.array([180, 255, 255])

    mask_red1 = cv2.inRange(hsv_frame, low_red1, high_red1)
    mask_red2 = cv2.inRange(hsv_frame, low_red2, high_red2)
    mask_red = mask_red1 | mask_red2
    red = cv2.bitwise_and(image, image, mask=mask_red)
    return red

def get_white_mask(hsv_frame, image):
    """
    Generate mask for white color.

    Args:
        hsv_frame (numpy.ndarray): HSV converted image.
        image (numpy.ndarray): Input image.

    Returns:
        numpy.ndarray: Masked image with white color.
    """
    low_white = np.array([0, 0, 168])
    high_white = np.array([172, 111, 255])
    mask_white = cv2.inRange(hsv_frame, low_white, high_white)
    white = cv2.bitwise_and(image, image, mask=mask_white)
    return white

def numericalSort(value):
    """
    Function to sort file names numerically.

    Args:
        value (str): File name.

    Returns:
        list: Sorted list of file names.
    """
    numbers = re.compile(r'(\d+)')
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

OCR Implementation for Header Detection

In [None]:
def get_mix_strings(item):
    """
    Check if string contains a mix of characters.

    Args:
        item (str): Input string.

    Returns:
        bool: True if string contains a mix of characters, False otherwise.
    """
    for ch in item:
        if not ch.isalpha():
            if not ch.isdigit():
                return False
    return True

def get_all_num(item):
    """
    Check if string contains all numeric characters.

    Args:
        item (str): Input string.

    Returns:
        bool: True if string contains all numeric characters, False otherwise.
    """
    return all([ch.isdigit() for ch in item])

def get_all_alpha(item):
    """
    Check if string contains all alphabetic characters.

    Args:
        item (str): Input string.

    Returns:
        bool: True if string contains all alphabetic characters, False otherwise.
    """
    return all([ch.isalpha() for ch in item])

def OCR_header(color):
    """
    Perform OCR to detect headers.

    Args:
        color (numpy.ndarray): Input image.

    Returns:
        list: Detected headers.
    """
    result = reader.readtext(color, min_size=25, detail=0, text_threshold=0.90)
    final_header = []
    for item in result:
        if get_all_num(item):
            continue
        elif get_mix_strings(item):
            final_header.append(item)
        elif get_all_alpha(item):
            final_header.append((item))

    return final_header


OCR Implementation for Value Detection

In [None]:
def OCR_values(color):
    """
    Perform OCR to detect numeric values.

    Args:
        color (numpy.ndarray): Input image.

    Returns:
        list: Detected numeric values.
    """
    result = reader.readtext(color, min_size=80, detail=0, text_threshold=0.90)
    value = [x for x in result if all(x1.isdigit() for x1 in x)]
    return value

#Header Dataframe Function
def get_headers_dataframe(header_color):
    """
    Create dataframe structure based on detected headers.

    Args:
        header_color (list): Detected headers.

    Returns:
        pandas.DataFrame: Empty dataframe with headers.
        list: List of headers.
    """
    final_header = ['HR', 'Tperi', 'Tblood', 'Pulse', 'SpO2', 'ABP', 'PAP', 'etCO2', 'awRR', 'NBP', 'etCO2', 'mCO2']
    header = []
    for i in header_color:
        for j in final_header:
            if i == j:
                header.append(j)

    header = list(OrderedDict.fromkeys(header))
    df = pd.DataFrame(columns=header)
    return df, header

#Numeric Values in Dataframe
def df_values(df, header, value):
    """
    Update dataframe with numeric values.

    Args:
        df (pandas.DataFrame): Dataframe to be updated.
        header (list): List of headers.
        value (list): List of numeric values.

    Returns:
        pandas.DataFrame: Updated dataframe.
    """
    if len(value) < len(header):
        value.extend([str(0)] * (len(header) - len(value)))
    length_df = len(df)
    df.loc[length_df] = value[:len(header)]
    return df


Main Function

In [None]:
def main(video_file_path):
    """
    Orchestrates the overall process.

    Args:
        video_file_path (str): Path to the video file.

    Returns:
        pandas.DataFrame: Concatenated dataframe.
    """
    sec = 0
    frameRate = 1
    count = 1
    success = getFrame(sec, video_file_path, count)
    while success:
        count = count + 1
        sec = sec + frameRate
        sec = round(sec, 2)
        success = getFrame(sec, video_file_path, count)

    print("Framing done")

    image_header = cv2.imread(f"frame200.jpg")
    hsv_frame = hsv_conversion(image_header)
    green = get_green_mask(hsv_frame, image_header)
    red = get_red_mask(hsv_frame, image_header)
    yellow = get_yellow_mask(hsv_frame, image_header)
    white = get_white_mask(hsv_frame, image_header)

    header_green = OCR_header(color=green)
    header_red = OCR_header(color=red)
    header_yellow = OCR_header(color=yellow)
    header_white = OCR_header(color=white)

    df_red, hd_red = get_headers_dataframe(header_red)
    df_yellow, hd_yellow = get_headers_dataframe(header_yellow)
    df_white, hd_white = get_headers_dataframe(header_white)
    df_green, hd_green = get_headers_dataframe(header_green)

    for infile in sorted(glob.glob("frame*.jpg"), key=numericalSort):
        image = cv2.imread(infile)

        hsv_frame = hsv_conversion(image)
        green = get_green_mask(hsv_frame, image)
        red = get_red_mask(hsv_frame, image)
        yellow = get_yellow_mask(hsv_frame, image)
        white = get_white_mask(hsv_frame, image)

        green_value = OCR_values(green)
        red_value = OCR_values(red)
        yellow_value = OCR_values(yellow)
        white_value = OCR_values(white)

        df_green = df_values(df_green, hd_green, green_value)
        df_red = df_values(df_red, hd_red, red_value)
        df_yellow = df_values(df_yellow, hd_yellow, yellow_value)
        df_white = df_values(df_white, hd_white, white_value)

    df = pd.concat([df_green, df_yellow, df_red, df_white], axis=1)
    df.to_csv('Output.csv', index=False)

    return df

#Execution Time Measurement
import time
start = time.time()

#Video File Path
video_file_path = '/content/SimCap01_180222_0843_C106_cbba98135b2b4d198c6af4944b4454c1 (online-video-cutter.com).mp4'

#Main Function Call
df = main(video_file_path)

#Execution Time Calculation
end = time.time()
print(end - start)

Optional: Displaying Bounding Boxes

In [None]:
image = cv2.imread("frame58.jpg")
hsv_frame = hsv_conversion(image)
green = get_green_mask(hsv_frame, image)
red = get_red_mask(hsv_frame, image)
yellow = get_yellow_mask(hsv_frame, image)
white = get_white_mask(hsv_frame, image)
res = reader.readtext(white, min_size=80, text_threshold=0.90)

for (bbox, text, prob) in res:
    (tl, tr, br, bl) = bbox
    tl = (int(tl[0]), int(tl[1]))
    tr = (int(tr[0]), int(tr[1]))
    br = (int(br[0]), int(br[1]))
    bl = (int(bl[0]), int(bl[1]))
    cv2.rectangle(white, tl, br, (0, 255, 0), 2)

plt.rcParams['figure.figsize'] = (16, 16)
imS = cv2.resize(white, (1000, 600))
cv2.imshow('Bounding Rectangle', imS)
cv2.waitKey(0)