# Libraries

Import libraries needed in this project.

In [1]:
import cv2  #For image and video processing and visualization
import os   #To interact with operating system and files
import numpy as np  #For matrix operations
import random   #To generate random numbers
from sklearn.mixture import GaussianMixture  #For clustering
from sklearn.cluster import DBSCAN   #For clustering
from sklearn.cluster import MeanShift, estimate_bandwidth  #For clustering

# Open Video

This function recieves a video path and returns a capture stream.

In [2]:
def open_vid(input_file): #Video path
    cap = cv2.VideoCapture(input_file) #Open capture stream
    if not cap.isOpened(): #Check if is available
        print("Error: Could not open video.")
    return cap

# Get Video Properties

This function gets the video's properties for its width and height in pixels, frames per second (fps) and frame count.

In [3]:
def get_props(cap, display=1): #Video capture stream and flag to display properties
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) #Get Width
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) #Get Height
    fps = cap.get(cv2.CAP_PROP_FPS) #Get FPS
    count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) #Get Frame Count
    if display==1:  #If flag is 1, display properties
        print("Width: ",width)
        print("Height: ",height)
        print("FPS: ",fps)
        print("Frame Count: ",count)
    return width,height,fps,count

# Get Frames from Video as a List 

This function takes the capture stream of a video and saves its frames in a list.

In [4]:
def get_frames(cap): #Video capture stream
    frames = [] #Frames list
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) #Get Frame count
    for i in range(frame_count): #For each frame
        ret, frame = cap.read() # Read a frame from the video
        if not ret: #If couldn't read frame
            print("Error: Could not read frame.") #Display error message and return read frames
            return frames
        frames.append(frame) # Save the frame to the list
    if not frames: #If list is empty
        print("No frames were saved.") #Display error message 
    return frames

# Delete PNG Files in Directory

This function deletes all PNG image files in the specified directory path.

In [5]:
def delete_png_files(directory_path): #Directory path to delete all PNG files
    for filename in os.listdir(directory_path): # List all files in the specified directory
        file_path = os.path.join(directory_path, filename) # Construct full file path
        try:
            if os.path.isfile(file_path) and filename.lower().endswith('.png'): # Check if it's a PNG file and remove it
                os.remove(file_path)
        except Exception as e:
            print(f'Failed to delete {file_path}. Reason: {e}')  #If not, display message

# Save Frames as Images

This function saves a list of frames as images in the given directory path.

In [6]:
def save_frames(frames, frame_folder): #Frame list, directory path to be saved
    if not os.path.exists(frame_folder):  # Create the folder to save frames if it doesn't exist
        os.makedirs(frame_folder)
    frame_count = 0 #frame index
    for i in range(len(frames)): #for each frame
        frame_filename = os.path.join(frame_folder, f'frame_{frame_count:03d}.png') #directory path and file name
        cv2.imwrite(frame_filename, frames[i]) # Save the frame as an image file
        frame_count += 1 #Next frame index

# Create Video with List of Frames

This function creates and saves the frames in a list into a video file in the specified directory path.

In [7]:
def save_vid(frames,output_file,fps): #List of frames, directory path to be saved, fps
    height, width, _ = frames[0].shape     # Get frame dimensions
    fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Define the codec and create VideoWriter object
    out = cv2.VideoWriter(output_file, fourcc, fps, (width, height)) 
    [out.write(frame) for frame in frames]; # Write the frames to the new video file
    out.release() # Release the video writer object

# Display Image

This function displays a given image in a window.

In [16]:
def display_frame(I): #Image to display
    cv2.imshow('I', I)  #Display Image
    cv2.waitKey(0) #Press any key to stop displaying
    cv2.destroyAllWindows() #Close all windows

# Play Frames in a Window

This function displays a video made out of a list of frames with the specified FPS.

In [8]:
def play_frames(frames,fps): #list of frames, fps
    delay = int(1000/fps) #Delay between frames
    print("Delay: ",delay)
    for frame in frames:
        cv2.imshow('Video Playback', frame) # Display the frame
        if cv2.waitKey(delay) & 0xFF == ord('q'): # Exit the playback if 'q' is pressed
            break
    cv2.destroyAllWindows() #Close all windows

# Optical Flow

Optical Flow can be seen as a vector field that describes the movement between two consecutive images or frames in a video. There are many ways to calculate the oprical flow. Some of the methods to solve optical flow are:
- Ferneback
- Lucas - Kanade
- Phase Correlation

## Farneback Magnitude

This function take a list of frames to return a list of the optical flow's vector fields and display the magnitude of these vectors.

In [9]:
def OFM(frames): #List of frames
    OF = []  #List of optical flow's vector field
    prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY) #Convert to Grayscale
    i = 0  #frame's index
    while True:
        print(i,end='\r') #Print frame index
        next_gray=cv2.cvtColor(frames[(i+1)%len(frames)],cv2.COLOR_BGR2GRAY)#Convert current & next frame to grayscale
        # Calculate the dense optical flow using Farneback method
        flow = cv2.calcOpticalFlowFarneback(prev_gray, next_gray, None, 0.5, 3, 10, 10, 5, 1.2, 0)
                                            # prev     next      flow  dist lvl win it smooth std  flag
        # Visualize the optical flow
        hsv = np.zeros_like(frames[i]) #Matrix with shape like frames with zeros
        if len(hsv.shape) != 3 or hsv.shape[2] != 3: #If color image
            hsv = np.zeros((frames[i].shape[0], frames[i].shape[1], 3)) #Matrix of size of frame with 3 color channels
        hsv[..., 1] = 255 # ch1 Saturation (Full)
        mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1]) # Cartesian to Polar
        hsv[..., 0] = ang * 180 / np.pi / 2  #Angle
        hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX) #Normalize from 0 to 255
        flow_rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)   #Convert to RGB
        OF.append(flow) #Add to list of Optical Flow
        flow_M  = flow[...,0] + flow[...,1] #Add components of vectors
        flow_M = flow_M/np.max(flow_M)*255 #Calculate Magnitude of vectors     
        # Display the original frames and the optical flow magnitudes
        cv2.imshow('Prev Frame', frames[i])
        cv2.imshow('Next Frame', frames[(i+1) % len(frames)])
        cv2.imshow('Optical Flow', flow_rgb)
        cv2.imshow('Optical Flow Mag', flow_M)
        # Wait for a key press to move to the next frame
        key = cv2.waitKeyEx(0)
        if key == ord('q'):
            break
        if key == 2424832:  # Left arrow key
            i = i - 1
            if i<0:
                i = 0
        if key == 2555904:  # Right arrow key
            i = i + 1
            if i>(len(frames)-1):
                i = len(frames)-1
        prev_gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY) # Update the previous frame and grayscale image
    cv2.destroyAllWindows() # Release the video capture object and close all OpenCV windows
    return OF

## Farneack with Vectors

The function OFV take a list of frames to return and display the optical flow's vector fields as well as the sum of all vectors.

In [10]:
def draw_optical_flow_vectors(flow, frame, step): #Optical flow, frame, window size
    h, w = frame.shape[:2] #frame size
    y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2, -1).astype(int) #Grid of window centers
    fx, fy = flow[y, x].T #Separate flow components
    mask = np.zeros_like(frame) # Create a mask to draw the vectors  
    lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2) # Create line endpoints
    lines = np.int32(lines + 0.5) # Add space between
    # Draw lines and circles for each vector
    for (x1, y1), (x2, y2) in lines:
        cv2.line(mask, (x2, y2), (x1, y1), (0, 255, 0), 1)
        cv2.circle(frame, (x2, y2), 1, (0, 255, 0), -1)
    return cv2.add(frame, mask) #Draw mask on top of frame

def draw_sum_vector(flow, frame):
    h, w = frame.shape[:2] #Frame size
    # Compute the sum of all flow vectors by components
    sum_fx = np.sum(flow[..., 0])
    sum_fy = np.sum(flow[..., 1])
    center_x, center_y = w // 2, h // 2 # Calculate the center point of the frame
    # Normalize the sum vector to fit within the image
    max_length = min(w, h) // 2
    vector_length = np.sqrt(sum_fx**2 + sum_fy**2)
    if vector_length > 0:
        scale = 0.001#max_length / vector_length #scale factor
        end_x = int(center_x + sum_fx * scale)
        end_y = int(center_y + sum_fy * scale)
    else:
        end_x, end_y = center_x, center_y
    # Draw the sum vector as a red arrow
    frame_with_vector = np.copy(frame) #copy original frame
    cv2.arrowedLine(frame_with_vector, (center_x, center_y), (end_x, end_y), (0, 0, 255), 2, tipLength=0.2) 
    return frame_with_vector

def OFV(frames, step): #Frame list, window size
    OF = [] #List of Optical flow's vector fields
    i = 0 #Frame's index
    while True:
        #convert it to grayscale
        prev_gray = cv2.cvtColor(np.copy(frames[i]), cv2.COLOR_BGR2GRAY)
        next_gray = cv2.cvtColor(np.copy(frames[i+1]), cv2.COLOR_BGR2GRAY)
        # Calculate the dense optical flow using Farneback method
        flow = cv2.calcOpticalFlowFarneback(prev_gray, next_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        # Draw the optical flow vectors on the frame
        flow_frame = draw_optical_flow_vectors(flow, cv2.addWeighted(frames[i], 0.5, frames[i+1], 0.5, 0), step)
        OF.append(flow) #Add flow to list
        # Draw the sum vector on the frame
        frame_with_sum_vector = draw_sum_vector(flow, flow_frame)
        # Display the original frame with optical flow vectors and sum vector
        cv2.imshow('Prev Frame', frames[i])
        cv2.imshow('Next Frame', frames[i+1])
        #cv2.imshow('Optical Flow Vectors', flow_frame)
        cv2.imshow('Sum Vector', frame_with_sum_vector)
        key = cv2.waitKeyEx(0) #Read pressed key
        if key == ord('q'): #Stop if pressed
            break
        if key == 2424832:  # Left arrow key for next frames
            i = i - 1
            if i < 0: #prevent non existent frames
                i = 0
        if key == 2555904:  # Right arrow key for previous frames
            i = i + 1
            if i > (len(frames) - 2): #prevent non existent frames
                i = len(frames) - 2
    cv2.destroyAllWindows() #Close all windows
    return OF

## Lucas-Kanade

In [11]:
def draw_sum_vector_lk(good_new, good_old, frame):
    h, w = frame.shape[:2]
    
    # Compute the sum of all flow vectors
    sum_fx = np.sum(good_new[:, 0] - good_old[:, 0])
    sum_fy = np.sum(good_new[:, 1] - good_old[:, 1])

    # Calculate the center point of the frame
    center_x, center_y = w // 2, h // 2
    
    # Normalize the sum vector to fit within the image
    max_length = min(w, h) // 2
    vector_length = np.sqrt(sum_fx**2 + sum_fy**2)
    if vector_length > 0:
        scale = 0.1#max_length / vector_length
        end_x = int(center_x + sum_fx * scale)
        end_y = int(center_y + sum_fy * scale)
    else:
        end_x, end_y = center_x, center_y
    
    # Draw the sum vector as a red arrow
    frame_with_vector = np.copy(frame)
    cv2.arrowedLine(frame_with_vector, (center_x, center_y), (end_x, end_y), (0, 0, 255), 2, tipLength=0.2)
    
    return frame_with_vector

def OFLK(frames, step_size):
    OF = []
    # Parameters for the Lucas-Kanade optical flow
    lk_params = dict(winSize=(step_size, step_size), maxLevel=10, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 30, 0.01))

    # Create a grid of points to track
    grid_y, grid_x = np.mgrid[0:frames[0].shape[0]:step_size, 0:frames[0].shape[1]:step_size]
    p0 = np.vstack((grid_x.ravel(), grid_y.ravel())).T.astype(np.float32).reshape(-1, 1, 2)

    i = 0
    while True:
        # Take the first frame and convert it to grayscale
        old_frame = np.copy(frames[i])
        old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
        frame = np.copy(frames[i+1])
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Calculate optical flow
        p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)

        # Select good points
        good_new = p1[st == 1]
        good_old = p0[st == 1]

        frame = cv2.addWeighted(frames[i], 0.5, frames[i+1], 0.5, 0)
        # Draw the normalized vectors
        for j, (new, old) in enumerate(zip(good_new, good_old)):
            a, b = new.ravel()
            c, d = old.ravel()
            # Calculate vector components
            dx = a - c
            dy = b - d
            # Normalize vector
            magnitude = np.sqrt(dx**2 + dy**2)
            if magnitude > 0:
                dx /= magnitude
                dy /= magnitude
            # Scale vector for visualization
            scale = 10  # Adjust this value for smaller or larger arrows
            a = int(c + dx * scale)
            b = int(d + dy * scale)
            frame = cv2.arrowedLine(frame, (int(c), int(d)), (a, b), (0, 255, 0), 1, tipLength=0.3)

        # Draw the sum vector on the frame
        frame_with_sum_vector = draw_sum_vector_lk(good_new, good_old, frame)
        
        OF.append(frame)
        cv2.imshow('Prev', frames[i])
        cv2.imshow('Next', frames[i+1])
        #cv2.imshow('Optical Flow Vectors', frame)
        cv2.imshow('Sum Vector', frame_with_sum_vector)

        # Wait for a key press to move to the next frame
        key = cv2.waitKeyEx(0)
        if key == ord('q'):
            break
        if key == 2424832:  # Left arrow key
            i = i - 1
            if i < 0:
                i = 0
        if key == 2555904:  # Right arrow key
            i = i + 1
            if i > (len(frames) - 2):
                i = len(frames) - 2

    cv2.destroyAllWindows()
    return OF


## Phase Correlation

In [12]:
def draw_sum_vector_phase_c(sum_dx, sum_dy, frame):
    h, w = frame.shape[:2]

    # Calculate the center point of the frame
    center_x, center_y = w // 2, h // 2

    # Normalize the sum vector to fit within the image
    max_length = min(w, h) // 2
    vector_length = np.sqrt(sum_dx**2 + sum_dy**2)
    if vector_length > 0:
        scale = max_length / vector_length
        end_x = int(center_x + sum_dx * scale)
        end_y = int(center_y + sum_dy * scale)
    else:
        end_x, end_y = center_x, center_y

    # Draw the sum vector as a red arrow
    frame_with_vector = np.copy(frame)
    cv2.arrowedLine(frame_with_vector, (center_x, center_y), (end_x, end_y), (0, 0, 255), 2, tipLength=0.2)

    return frame_with_vector

def PhaseC(frames, block_size=20, grid_step=20):
    OF = []
    i = 0
    while True:
        prev_frame = np.copy(frames[i])
        next_frame = np.copy(frames[i + 1])

        # Convert frames to grayscale
        prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
        next_gray = cv2.cvtColor(next_frame, cv2.COLOR_BGR2GRAY)

        # Create an image to visualize the flow
        flow_img = cv2.cvtColor(prev_gray, cv2.COLOR_GRAY2BGR)
        frame = cv2.addWeighted(frames[i], 0.5, frames[i+1], 0.5, 0)

        sum_dx, sum_dy = 0, 0

        # Iterate over the grid
        for y in range(0, prev_gray.shape[0] - block_size, grid_step):
            for x in range(0, prev_gray.shape[1] - block_size, grid_step):
                # Extract the blocks
                prev_block = prev_gray[y:y + block_size, x:x + block_size]
                next_block = next_gray[y:y + block_size, x:x + block_size]

                # Compute phase correlation
                shift, _ = cv2.phaseCorrelate(prev_block.astype(np.float32), next_block.astype(np.float32))
                dx, dy = shift

                # Sum the vectors
                sum_dx += dx
                sum_dy += dy

                # Scale down the length of the arrows and size of the tips
                scale = 1
                tip_length = 0.2

                # Draw the vector on the flow image
                cv2.arrowedLine(frame, (x + block_size // 2, y + block_size // 2),
                                (int(x + block_size // 2 + dx * scale), int(y + block_size // 2 + dy * scale)),
                                (0, 255, 0), 1, tipLength=tip_length)

        # Draw the sum vector on the frame
        frame_with_sum_vector = draw_sum_vector_phase_c(sum_dx, sum_dy, frame)

        OF.append(flow_img)

        # Display the frames and the flow
        cv2.imshow('Previous Frame', prev_frame)
        cv2.imshow('Next Frame', next_frame)
        #cv2.imshow('Optical Flow', frame)
        cv2.imshow('Sum Vector', frame_with_sum_vector)

        # Wait for a key press to move to the next frame
        key = cv2.waitKeyEx(0)
        if key == ord('q'):
            break
        if key == 2424832:  # Left arrow key
            i = i - 1
            if i < 0:
                i = 0
        if key == 2555904:  # Right arrow key
            i = i + 1
            if i > (len(frames) - 2):
                i = len(frames) - 2

    cv2.destroyAllWindows()
    return OF

# Frames Differences

In [13]:
def frame_dif(frames, threshold=30):
    D = []
    i = 0
    while True:
        prev_frame = np.copy(frames[i])
        # Convert frames to grayscale
        prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
        curr_gray = cv2.cvtColor(np.copy(frames[i+1]), cv2.COLOR_BGR2GRAY)
        # Compute absolute difference between frames
        diff = cv2.absdiff(prev_gray, curr_gray)
              
        # Create a mask to highlight pixels with significant changes
        mask = np.zeros_like(diff)
        mask[diff > threshold] = 255
        D.append(mask)
        # Show original frame and mask
        cv2.imshow('Prev Frame', frames[i])
        cv2.imshow('Next Frame', frames[i+1])
        cv2.imshow('Pixels with Most Changes', mask)
         # Wait for a key press to move to the next frame
        key = cv2.waitKeyEx(0)
        if key == ord('q'):
            break
        if key == 2424832:  # Left arrow key
            i = i - 1
            if i<0:
                i = 0
        if key == 2555904:  # Right arrow key
            i = i + 1
            if i>(len(frames)-2):
                i = len(frames)-2

        prev_frame = frames[i]

    # Release the video capture object
    cv2.destroyAllWindows()
    return D

# Change Color Channel's Ranges

In [14]:
def change_range_colors(image, min_vals=(0, 0, 0), max_vals=(255, 255, 255)):
    # Split the image into its BGR channels
    b, g, r = cv2.split(image)
    
    # Clip each channel to its respective range
    b = np.clip(b, min_vals[0], max_vals[0])
    g = np.clip(g, min_vals[1], max_vals[1])
    r = np.clip(r, min_vals[2], max_vals[2])
    
    # Merge the channels back together
    new_image = cv2.merge((b, g, r))
    
    return new_image

# Add Occlusions

In [15]:
def occlusions(image, num_occlusions=1,loc=[],sizes=[],shapes=['rectangle','circle'], colors=(-1,-1,-1)):    
    output_image = np.copy(image)
    
    height, width = image.shape[:2]
    num_occlusions = num_occlusions if len(loc)==0 else len(loc)
    
    # Draw occlusions on the image
    for i in range(num_occlusions):
        shape_type = random.choice(shapes)
        color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) if colors==(-1,-1,-1) else colors  # Random color
        
        if shape_type == 'rectangle':
            x = random.randint(0, width - 1) if len(loc)==0 else loc[i][0]
            y = random.randint(0, height - 1) if len(loc)==0 else loc[i][1]
            width_rect = random.randint(5, width//2) if len(sizes)==0 else loc[i][0]
            height_rect = random.randint(5, height//2) if len(sizes)==0 else loc[i][1]
            cv2.rectangle(output_image, (x, y), (x + width_rect, y + height_rect), color, -1)  # Filled rectangle
        
        elif shape_type == 'circle':
            center = (random.randint(0, width - 1), random.randint(0, height - 1)) if len(loc)==0 else (loc[i][0],loc[i][1])
            radius = 50#random.randint(5, 100)
            cv2.circle(output_image, center, radius, color, -1)  # Filled circle
    
    return output_image

# Read Images in a Directory

In [17]:
def read_images(directory_path):
    images = []
    # List all files in the directory
    for filename in os.listdir(directory_path):
        # Check if the file has a PNG extension
        if filename.lower().endswith('.png'):
            # Construct full file path
            file_path = os.path.join(directory_path, filename)
            # Read the image using OpenCV
            image = cv2.imread(file_path, cv2.IMREAD_UNCHANGED)  # cv2.IMREAD_UNCHANGED to keep the alpha channel if present
            if image is not None:
                images.append(image)
            else:
                print(f"Failed to read image: {file_path}")
    return images

# Draw Random Lines

In [18]:
def rnd_lines(image, num_lines):
    # Get the dimensions of the image
    height, width = image.shape[:2]

    # Copy the image to avoid modifying the original
    output_image = image.copy()

    for _ in range(num_lines):
        # Generate random start point
        start_point = (random.randint(0, width-1), random.randint(0, height-1))
        
        # Generate a random angle and length for the line
        angle = random.uniform(0, 2 * np.pi)
        length = random.randint(1, min(width, height) // 2)  # Limit length to half of the smallest dimension
        
        # Calculate the end point using the angle and length
        end_point = (int(start_point[0] + length * np.cos(angle)), 
                     int(start_point[1] + length * np.sin(angle)))
        
        # Ensure the end point is within the image boundaries
        end_point = (min(max(end_point[0], 0), width-1), min(max(end_point[1], 0), height-1))
        
        # Generate a random color (BGR format)
        color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
        
        # Generate a random thickness for the line
        thickness = random.randint(1, 10)
        
        # Draw the line on the image
        cv2.line(output_image, start_point, end_point, color, thickness)

    return output_image

# Random Region's Color Change

In [19]:
def rnd_regions(image, num_regions):
    # Get the dimensions of the image
    height, width = image.shape[:2]

    # Copy the image to avoid modifying the original
    output_image = image.copy()

    for _ in range(num_regions):
        # Generate random region shape and size
        region_shape = random.choice(['rectangle', 'ellipse'])
        
        if region_shape == 'rectangle':
            region_width = random.randint(10, width // 3)
            region_height = random.randint(10, height // 3)
            top_left_x = random.randint(0, width - region_width)
            top_left_y = random.randint(0, height - region_height)
            
            # Define the region
            region = output_image[top_left_y:top_left_y + region_height, top_left_x:top_left_x + region_width]
        
        elif region_shape == 'ellipse':
            center_x = random.randint(width // 3, width - width // 3)
            center_y = random.randint(height // 3, height - height // 3)
            axis_length = (random.randint(10, width // 3), random.randint(10, height // 3))
            angle = random.randint(0, 360)
            start_angle = 0
            end_angle = 360

            # Create a mask for the ellipse
            mask = np.zeros((height, width), dtype=np.uint8)
            cv2.ellipse(mask, (center_x, center_y), axis_length, angle, start_angle, end_angle, 255, -1)
            
            # Extract the region using the mask
            region = cv2.bitwise_and(output_image, output_image, mask=mask)

        # Change color channels within the region
        for channel in range(3):  # Assuming BGR format
            # Generate random ranges for the color channel
            low = random.randint(0, 255)
            high = random.randint(low, 255)
            if region_shape == 'rectangle':
                region[..., channel] = np.clip(region[..., channel], low, high)
            elif region_shape == 'ellipse':
                # Apply changes to the region using the mask
                channel_region = output_image[..., channel]
                channel_region[mask == 255] = np.clip(channel_region[mask == 255], low, high)
                output_image[..., channel] = channel_region

        if region_shape == 'rectangle':
            # Place the modified region back into the image for rectangles
            output_image[top_left_y:top_left_y + region_height, top_left_x:top_left_x + region_width] = region

    return output_image

In [20]:
def cartoonize_image(img, k=50, it = 5, t1 = 150, t2 = 255, ks = 1):

    # Apply bilateral filter to smooth the image
    img_color = cv2.bilateralFilter(img, d=9, sigmaColor=75, sigmaSpace=75)

    # Convert to grayscale
    img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    
    # Apply Gaussian Blur
    img_blur = cv2.GaussianBlur(img_gray, (1, 1), 0)

    # Detect edges using Canny edge detection
    edges = cv2.Canny(img_blur, threshold1=t1, threshold2=t2)
    
    # Dilate the edges to make them more prominent
    kernel = np.ones((ks, ks), np.uint8)
    edges = cv2.dilate(edges, kernel, iterations=1)

    # Invert the edges
    edges = cv2.bitwise_not(edges)

    # Convert edges back to color, so we can combine with color image
    edges_colored = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)

    # Perform K-means clustering
    img_data = np.float32(img_color).reshape((-1, 3))

    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, it, 0.2)
    _, labels, centers = cv2.kmeans(img_data, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)

    centers = np.uint8(centers)
    img_clustered = centers[labels.flatten()]
    img_clustered = img_clustered.reshape(img_color.shape)

    # Combine edge and clustered image
    cartoon = cv2.bitwise_and(img_clustered, edges_colored)

    return cartoon

In [21]:
def cartoonize_image1(img, k=8, it = 10):

    # Apply bilateral filter to smooth the image
    img_color = cv2.bilateralFilter(img, d=9, sigmaColor=75, sigmaSpace=75)

    # Perform K-means clustering
    img_data = np.float32(img_color).reshape((-1, 3))
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, it, 0.2)
    _, labels, centers = cv2.kmeans(img_data, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
    centers = np.uint8(centers)
    img_clustered = centers[labels.flatten()]
    img_clustered = img_clustered.reshape(img_color.shape)

    # Convert clustered image to grayscale
    img_gray = cv2.cvtColor(img_clustered, cv2.COLOR_RGB2GRAY)
    
    # Apply Gaussian Blur
    img_blur = cv2.GaussianBlur(img_gray, (5, 5), 0)

    # Detect edges using Canny edge detection
    edges = cv2.Canny(img_blur, threshold1=0, threshold2=255)
    
    # Dilate the edges to make them more prominent
    kernel = np.ones((3, 3), np.uint8)
    edges = cv2.dilate(edges, kernel, iterations=1)

    # Invert the edges
    edges = cv2.bitwise_not(edges)

    # Convert edges back to color, so we can combine with clustered image
    edges_colored = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)

    # Combine edge and clustered image
    cartoon = cv2.bitwise_and(img_clustered, edges_colored)

    return cartoon

In [22]:
def cartoonize_image2(img):
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Apply median blur to smoothen the image
    gray = cv2.medianBlur(gray, 5)

    # Detect edges using adaptive thresholding
    edges = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9)

    # Apply bilateral filter to smooth the image while keeping edges sharp
    color = cv2.bilateralFilter(img, 9, 300, 300)

    # Combine edges and color image
    cartoon = cv2.bitwise_and(color, color, mask=edges)

    return cartoon



In [23]:
def cartoonize_image3(img, k=8, it=10):
    # Apply bilateral filter to smooth the image
    img_color = cv2.bilateralFilter(img, d=9, sigmaColor=75, sigmaSpace=75)

    # Convert to grayscale
    img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    
    # Apply Gaussian Blur
    img_blur = cv2.GaussianBlur(img_gray, (1, 1), 0)

    # Detect edges using Canny edge detection
    edges = cv2.Canny(img_blur, threshold1=200, threshold2=200)
    
    # Dilate the edges to make them more prominent
    kernel = np.ones((3, 3), np.uint8)
    edges = cv2.dilate(edges, kernel, iterations=1)

    # Invert the edges
    edges = cv2.bitwise_not(edges)

    # Convert edges back to color, so we can combine with color image
    edges_colored = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)

    # Perform Gaussian Mixture Model clustering
    img_data = np.float32(img_color).reshape((-1, 3))
    
    gmm = GaussianMixture(n_components=k, max_iter=it, random_state=0)
    labels = gmm.fit_predict(img_data)
    centers = gmm.means_

    centers = np.uint8(centers)
    img_clustered = centers[labels]
    img_clustered = img_clustered.reshape(img_color.shape)

    # Combine edge and clustered image
    cartoon = cv2.bitwise_and(img_clustered, edges_colored)

    return cartoon


In [24]:
def cartoonize_image4(img, bandwidth=None):
    # Apply bilateral filter to smooth the image
    img_color = cv2.bilateralFilter(img, d=9, sigmaColor=75, sigmaSpace=75)

    # Convert to grayscale
    img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    
    # Apply Gaussian Blur
    img_blur = cv2.GaussianBlur(img_gray, (1, 1), 0)

    # Detect edges using Canny edge detection
    edges = cv2.Canny(img_blur, threshold1=200, threshold2=200)
    
    # Dilate the edges to make them more prominent
    kernel = np.ones((3, 3), np.uint8)
    edges = cv2.dilate(edges, kernel, iterations=1)

    # Invert the edges
    edges = cv2.bitwise_not(edges)

    # Convert edges back to color, so we can combine with color image
    edges_colored = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)

    # Perform Mean Shift clustering
    img_data = np.float32(img_color.reshape(-1, 3))
    
    if bandwidth is None:
        bandwidth = estimate_bandwidth(img_data, quantile=0.2, n_samples=500)

    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(img_data)
    labels = ms.labels_
    centers = ms.cluster_centers_

    centers = np.uint8(centers)
    img_clustered = centers[labels].reshape(img_color.shape)

    # Combine edge and clustered image
    cartoon = cv2.bitwise_and(img_clustered, edges_colored)

    return cartoon



In [25]:
def cartoonize_image5(img, eps=30, min_samples=100):
    # Apply bilateral filter to smooth the image
    img_color = cv2.bilateralFilter(img, d=9, sigmaColor=75, sigmaSpace=75)

    # Convert to grayscale
    img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    
    # Apply Gaussian Blur
    img_blur = cv2.GaussianBlur(img_gray, (1, 1), 0)

    # Detect edges using Canny edge detection
    edges = cv2.Canny(img_blur, threshold1=200, threshold2=200)
    
    # Dilate the edges to make them more prominent
    kernel = np.ones((3, 3), np.uint8)
    edges = cv2.dilate(edges, kernel, iterations=1)

    # Invert the edges
    edges = cv2.bitwise_not(edges)

    # Convert edges back to color, so we can combine with color image
    edges_colored = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)

    # Perform DBSCAN clustering
    img_data = np.float32(img_color.reshape(-1, 3))

    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    labels = dbscan.fit_predict(img_data)
    unique_labels = np.unique(labels)
    n_clusters = len(unique_labels) - (1 if -1 in labels else 0)  # Number of clusters, ignoring noise (-1)

    # Assign random colors to clusters
    centers = np.zeros((n_clusters, 3), dtype=np.uint8)
    for i, label in enumerate(unique_labels):
        if label == -1:
            continue  # Skip noise points
        mask = (labels == label)
        centers[i] = np.mean(img_data[mask], axis=0)

    img_clustered = np.zeros_like(img_color)
    for i, label in enumerate(unique_labels):
        if label == -1:
            continue  # Skip noise points
        mask = (labels == label)
        img_clustered[mask] = centers[i]

    # Combine edge and clustered image
    cartoon = cv2.bitwise_and(img_clustered, edges_colored)

    return cartoon
