In [2]:
%pip install opencv-python numpy scipy scikit-image 

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.1.1 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


## Import Dependencies

In [1]:
import cv2
import numpy as np
import time
from scipy.ndimage import label, find_objects, median_filter, shift, distance_transform_edt
from scipy.sparse import csr_matrix, diags
from scipy.sparse.linalg import spsolve
from skimage import morphology, measure
from skimage.segmentation import slic
from skimage.color import rgb2gray
from tkinter import filedialog, Tk, simpledialog, messagebox

print(cv2.__version__)

4.10.0


## Choose Video and Frame

In [3]:
root = Tk()
root.withdraw()
root.attributes("-topmost", True)

def browse_video():
    root = Tk()
    root.withdraw()
    root.attributes("-topmost", True)
    filename = filedialog.askopenfilename()
    root.destroy()
    return filename

# Choose Frame
def browse_frame(cap, total_frames):
    root = Tk()
    root.withdraw()
    root.attributes("-topmost", True)
    frame_number = simpledialog.askinteger("Choose Frame", f"Enter frame number (1 to {total_frames}):")
    root.destroy()
    if frame_number is not None and 1 <= frame_number <= total_frames:
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number - 1)
        ret, frame = cap.read()
        if not ret:
            print("Error: Unable to read frame.")
            return None
        return frame
    else:
        messagebox.showwarning("Invalid Frame", "Please enter a valid frame number.")
        return None
    
# Input RGB Video
RGB_video_path = browse_video()
if RGB_video_path:
    cap_rgb = cv2.VideoCapture(RGB_video_path)
    if not cap_rgb.isOpened():
        print("Error: Unable to open RGB video.")
    else:
        total_frames_rgb = int(cap_rgb.get(cv2.CAP_PROP_FRAME_COUNT))
        print("RGB Video selected:", RGB_video_path)
        rgb_frame = browse_frame(cap_rgb, total_frames_rgb)  # Choose frame from video RGB
        if rgb_frame is not None:
            cv2.imshow('Original RGB Frame', rgb_frame)
            cv2.waitKey(0)
            cv2.destroyAllWindows()
            
# Input Depth Video
Depth_video_path = browse_video()
if Depth_video_path:
    cap_depth = cv2.VideoCapture(Depth_video_path)
    if not cap_depth.isOpened():
        print("Error: Unable to open Depth video.")
    else:
        total_frames_depth = int(cap_depth.get(cv2.CAP_PROP_FRAME_COUNT))
        print("Depth Video selected:", Depth_video_path)
        depth_frame = browse_frame(cap_depth, total_frames_depth)  # Choose frame from video Depth
        if depth_frame is not None:
            cv2.imshow('Original Depth Frame', depth_frame)
            cv2.waitKey(0)
            cv2.destroyAllWindows()

RGB Video selected: C:/BRIN/BRIN/Dataset/Sample 1/Sample 1/7_RGB.avi
Depth Video selected: C:/BRIN/BRIN/Dataset/Sample 1/Sample 1/7_CALDP.avi


## Pre-segmentation of Human Region Using Depth Analysis

In [4]:
# Depth Processing
def depth_processing(depth_frame):
    if depth_frame is None:
        print("Error: Depth frame is None.")
        return None
    
    # Thresholding operations
    lower_threshold = 45
    upper_threshold = 85
    processed_depth = np.zeros_like(depth_frame)
    processed_depth[(depth_frame > lower_threshold) & (depth_frame < upper_threshold)] = 255
    return processed_depth

processed_depth = depth_processing(depth_frame)

if processed_depth is not None:
    cv2.imshow('Depth Processing', processed_depth)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
else:
    print("Error in depth processing.")

In [5]:
# RGB processing
def depth_processing(depth_frame):
    if depth_frame is None:
        print("Error: Depth frame is None.")
        return None

    depth_values = depth_frame.ravel()
    depth_values = depth_values[depth_values != 0]
    lower_threshold = np.percentile(depth_values, 5)
    upper_threshold = np.percentile(depth_values, 68)

    depth_enh = np.zeros_like(depth_frame, dtype=np.uint8)
    depth_enh[(depth_frame > lower_threshold) & (depth_frame < upper_threshold)] = 255

    # Morphological operations to refine the mask
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    depth_enh = cv2.erode(depth_enh, kernel, iterations=2)
    depth_enh = cv2.dilate(depth_enh, kernel, iterations=2)

    return depth_enh

def rgb_processing(rgb_frame, depth_enh):
    if rgb_frame is None or depth_enh is None:
        print("Error: RGB frame or depth enhancement is None.")
        return None

    human_region_mask = depth_enh.astype(bool)
    human_region = rgb_frame.copy()
    human_region[~human_region_mask] = 0

    return human_region

processed_depth = depth_processing(depth_frame)
human_region = rgb_processing(rgb_frame, processed_depth)

if human_region is not None:
    cv2.imshow('Human Region', human_region)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
else:
    print("Error in processing.")

## Enhancing the Pre-segmented region by Restoring The Hair Region

In [6]:
def detect_faces(image):
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    return faces

# Function for hair region restoration using GrabCut
def restore_hair_region(rgb_frame, human_region):
    faces = detect_faces(human_region)
    if len(faces) > 0:
        x, y, w, h = faces[0]

        x1 = max(0, x - int(0.2 * w))
        y1 = max(0, y - int(0.2 * h))
        x2 = min(rgb_frame.shape[1], x + int(1.2 * w))
        y2 = min(rgb_frame.shape[0], y + int(1.2 * h))

        mask = np.zeros((rgb_frame.shape[0], rgb_frame.shape[1]), dtype=np.uint8)
        rect = (x1, y1, x2 - x1, y2 - y1)
        bgdModel = np.zeros((1, 65), np.float64)
        fgdModel = np.zeros((1, 65), np.float64)
        cv2.grabCut(rgb_frame, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)

        hair_mask = np.where((mask == 1) + (mask == 3), 255, 0).astype('uint8')

        enhanced_region = np.zeros_like(rgb_frame)
        enhanced_region[human_region > 0] = rgb_frame[human_region > 0]
        enhanced_region[hair_mask > 0] = rgb_frame[hair_mask > 0]

        return enhanced_region
    else:
        return human_region

processed_depth = depth_processing(depth_frame)
human_region = rgb_processing(rgb_frame, processed_depth)
enhanced_region = restore_hair_region(rgb_frame, human_region)

# Display result
if enhanced_region is not None:
    cv2.imshow('Enhanced Human Region', enhanced_region)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
else:
    print("Error in processing.")

## Grow-cut

In [7]:
def detect_faces(image):
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    return faces

def get_hair_region(x, y, w, h, image_shape):
    # Define the hair area above the face
    x1 = x
    y1 = max(0, y - int(0.5 * h))
    x2 = x + w
    y2 = y
    return x1, y1, x2, y2

def downsample_image(image, scale):
    width = int(image.shape[1] * scale)
    height = int(image.shape[0] * scale)
    return cv2.resize(image, (width, height))

def growcut(image, labels, max_iterations=100):
    height, width = labels.shape
    strength = np.zeros((height, width), dtype=np.float32)
    strength[labels != 0] = 1.0

    for iteration in range(max_iterations):
        old_labels = labels.copy()
        for y in range(1, height-1):
            for x in range(1, width-1):
                if labels[y, x] == 0:
                    continue
                neighborhood = [(y-1, x), (y+1, x), (y, x-1), (y, x+1), (y-1, x-1), (y-1, x+1), (y+1, x-1), (y+1, x+1)]
                for ny, nx in neighborhood:
                    if ny < 0 or ny >= height or nx < 0 or nx >= width:
                        continue
                    if labels[ny, nx] == 0:
                        continue
                    dist = np.linalg.norm(image[y, x] - image[ny, nx])
                    strength_factor = 1 - dist / 255.0
                    if strength_factor * strength[ny, nx] > strength[y, x]:
                        strength[y, x] = strength_factor * strength[ny, nx]
                        labels[y, x] = labels[ny, nx]
        if np.array_equal(labels, old_labels):
            break

    return labels

def growcut_segmentation(enhanced_region, processed_depth):
    gray_depth = cv2.cvtColor(processed_depth, cv2.COLOR_BGR2GRAY)
    _, depth_mask = cv2.threshold(gray_depth, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    faces = detect_faces(enhanced_region)  
    
    hair_mask = np.zeros_like(gray_depth, dtype=np.uint8)
    for x, y, w, h in faces:
        x1, y1, x2, y2 = get_hair_region(x, y, w, h, enhanced_region.shape)
        cv2.rectangle(hair_mask, (x1, y1), (x2, y2), 255, -1)

    fg_mask_init = cv2.bitwise_or(depth_mask, hair_mask)

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))
    fg_mask = cv2.morphologyEx(fg_mask_init, cv2.MORPH_CLOSE, kernel, iterations=3)
    fg_mask = cv2.morphologyEx(fg_mask, cv2.MORPH_DILATE, kernel, iterations=2)

    height, width = gray_depth.shape
    labels = np.zeros((height, width), dtype=np.int32)
    labels[fg_mask == 255] = 1
    labels[depth_mask == 0] = -1

    small_region = downsample_image(enhanced_region, 0.5)
    small_depth = downsample_image(processed_depth, 0.5)
    small_labels = cv2.resize(labels, (labels.shape[1] // 2, labels.shape[0] // 2), interpolation=cv2.INTER_NEAREST)

    print("Performing GrowCut segmentation...")
    labels_out = growcut(small_region, small_labels)
    labels_out = cv2.medianBlur(labels_out.astype(np.float32), 3)
    labels_out = cv2.resize(labels_out, (labels.shape[1], labels.shape[0]), interpolation=cv2.INTER_NEAREST)
    grow_cut_rgb = np.repeat(labels_out[:, :, np.newaxis], 3, axis=2).astype(np.uint8) * enhanced_region
    
    # Obtain mask for hair
    hair_mask = labels_out.copy()
    hair_mask[hair_mask != 1] = 0  # Set all values other than 1 to 0

    # Obtain mask for the body (excluding hair)
    body_mask = labels_out.copy()
    body_mask[body_mask == 1] = 0  # Set all values of 1 to 0
    body_mask[body_mask == -1] = 1  # Set all values of -1 to 1

    # Combine GrowCut segmentation result with original color for hair
    grow_cut_rgb = np.zeros_like(enhanced_region)
    grow_cut_rgb[body_mask.astype(bool)] = enhanced_region[body_mask.astype(bool)]
    grow_cut_rgb[hair_mask.astype(bool)] = enhanced_region[hair_mask.astype(bool)]

    return labels_out, grow_cut_rgb

start_time = time.time()
labels_out, grow_cut_rgb = growcut_segmentation(enhanced_region, processed_depth)
end_time = time.time()

print(f"GrowCut Segmentation Time: {end_time - start_time} seconds")

# Tampilkan hasil segmentasi
cv2.imshow("The result of GrowCut segmentation", grow_cut_rgb)
cv2.waitKey(0)
cv2.destroyAllWindows()

Performing GrowCut segmentation...
GrowCut Segmentation Time: 16.667538166046143 seconds


## Trimap Generation

In [8]:
def generate_trimap(labels, kernel_size=5, erosion_iterations=1, dilation_iterations=2):
    """
    Generate a trimap from the segmentation labels.
    """
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
    
    # Define the foreground and background
    foreground = (labels == 1).astype(np.uint8)
    background = (labels == -1).astype(np.uint8)

    # Erode the foreground to get the definite foreground
    eroded_foreground = cv2.erode(foreground, kernel, iterations=erosion_iterations)

    # Dilate the foreground to get the uncertain region
    dilated_foreground = cv2.dilate(foreground, kernel, iterations=dilation_iterations)

    # Initialize the trimap with background (0)
    trimap = np.zeros_like(labels, dtype=np.uint8)
    
    # Assign definite foreground (255)
    trimap[eroded_foreground == 1] = 255
    
    # Assign uncertain region (128)
    trimap[(dilated_foreground == 1) & (eroded_foreground == 0)] = 128
    
    return trimap

start_time = time.time()
labels_out, grow_cut_rgb = growcut_segmentation(enhanced_region, processed_depth)
trimap = generate_trimap(labels_out)
end_time = time.time()

print(f"Trimap Generation Time: {end_time - start_time} seconds")

# Display segmentation result and trimap
cv2.imshow("Trimap", trimap)
cv2.waitKey(0)
cv2.destroyAllWindows()

Performing GrowCut segmentation...
Trimap Generation Time: 10.726121425628662 seconds


In [9]:
def generate_new_trimap(alpha, erosion_size=3, dilation_size=3):
    kernel_erosion = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (erosion_size, erosion_size))
    kernel_dilation = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilation_size, dilation_size))

    foreground = (alpha > 0.8).astype(np.uint8)
    background = (alpha < 0.2).astype(np.uint8)

    eroded_foreground = cv2.erode(foreground, kernel_erosion)
    dilated_background = cv2.dilate(background, kernel_dilation)

    new_trimap = np.full(alpha.shape, 128, dtype=np.uint8)
    new_trimap[eroded_foreground == 1] = 255
    new_trimap[dilated_background == 1] = 0

    return new_trimap

## Closed Form Matting

In [10]:
def closed_form_matting(image, trimap, epsilon=1e-7, win_size=1):
    # Convert image to float and normalize
    image = image.astype(np.float64) / 255.0
    
    # Get trimap regions
    is_fg = (trimap == 255)
    is_bg = (trimap == 0)
    is_unknown = (trimap == 128)

    # Compute Matting Laplacian
    h, w = trimap.shape
    win_radius = win_size // 2
    win_area = (win_size * 2 + 1) ** 2
    
    indices = np.arange(h * w).reshape(h, w)
    L_data, L_rows, L_cols = [], [], []

    for y in range(h):
        for x in range(w):
            if is_unknown[y, x]:
                window = image[max(0, y-win_radius):min(h, y+win_radius+1),
                               max(0, x-win_radius):min(w, x+win_radius+1)]
                win_indices = indices[max(0, y-win_radius):min(h, y+win_radius+1),
                                      max(0, x-win_radius):min(w, x+win_radius+1)]
                
                mean = np.mean(window, axis=(0, 1))
                cov = np.cov(window.reshape(-1, 3).T)
                
                inv_cov = np.linalg.inv(cov + (epsilon / win_area) * np.eye(3))
                
                for wy in range(window.shape[0]):
                    for wx in range(window.shape[1]):
                        w_idx = win_indices[wy, wx]
                        if w_idx != indices[y, x]:
                            diff = window[wy, wx] - mean
                            L_data.append(1 + diff.dot(inv_cov).dot(diff))
                            L_rows.append(indices[y, x])
                            L_cols.append(w_idx)
    
    L = csr_matrix((L_data, (L_rows, L_cols)), shape=(h*w, h*w))
    L = L + L.T - diags(L.sum(axis=1).A.ravel())
    
    # Solve for alpha
    b = np.zeros(h * w)
    b[is_fg.ravel()] = 1
    b[is_bg.ravel()] = 0

    x = spsolve(L + diags([1e-5] * (h*w)), b)
    alpha = np.clip(x.reshape(h, w), 0, 1)
    
    return alpha

start_time = time.time()

# Initial trimap generation
initial_trimap = generate_trimap(labels_out)

# Closed Form Matting
alpha_matte = closed_form_matting(grow_cut_rgb, initial_trimap)

# Generate new trimap based on refined alpha matte
new_trimap = generate_new_trimap(alpha_matte)

end_time = time.time()

print(f"Total Processing Time: {end_time - start_time} seconds")

# Display results
cv2.imshow("Original Image", grow_cut_rgb)
cv2.imshow("Initial Trimap", initial_trimap)
cv2.imshow("Alpha Matte", alpha_matte)
cv2.imshow("New Trimap", new_trimap)
cv2.waitKey(0)
cv2.destroyAllWindows()

# # Save results for further analysis
# cv2.imwrite("initial_trimap.png", initial_trimap)
# cv2.imwrite("alpha_matte.png", (alpha_matte * 255).astype(np.uint8))
# cv2.imwrite("new_trimap.png", new_trimap)

  cov = np.cov(window.reshape(-1, 3).T)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)


Total Processing Time: 4.899698734283447 seconds


## Learning Based Matting

In [11]:
def learned_based_matting(image, trimap, alpha_initial, window_size=7, c=1.0):
    h, w = trimap.shape
    n = h * w
    
    # Create neighborhood matrix
    def get_neighbors(i, j):
        neighbors = []
        for di in range(-window_size//2, window_size//2 + 1):
            for dj in range(-window_size//2, window_size//2 + 1):
                if 0 <= i + di < h and 0 <= j + dj < w:
                    neighbors.append((i + di) * w + (j + dj))
        return neighbors

    # Construct F matrix
    F_data, F_row, F_col = [], [], []
    for i in range(h):
        for j in range(w):
            if trimap[i, j] == 128:  # Unknown region
                neighbors = get_neighbors(i, j)
                weights = np.ones(len(neighbors)) / len(neighbors)  # Simple average weights
                for k, neighbor in enumerate(neighbors):
                    F_data.append(weights[k])
                    F_row.append(i * w + j)
                    F_col.append(neighbor)

    F = csr_matrix((F_data, (F_row, F_col)), shape=(n, n))

    # Construct identity matrix for known pixels
    I_known = diags([1 if trimap.flatten()[i] != 128 else 0 for i in range(n)])

    # Solve the quadratic cost equation
    A = diags([1] * n) - F.T
    b = c * I_known * alpha_initial.flatten()

    alpha_refined = spsolve(A.T @ A + c * I_known, A.T @ b)
    alpha_refined = np.clip(alpha_refined.reshape(h, w), 0, 1)

    return alpha_refined

# Usage
start_time = time.time()

learned_alpha = learned_based_matting(grow_cut_rgb, initial_trimap, alpha_matte)

end_time = time.time()
print(f"Learned-Based Matting Time: {end_time - start_time} seconds")

# Display results
cv2.imshow("Original Image", grow_cut_rgb)
cv2.imshow("Initial Alpha Matte", alpha_matte)
cv2.imshow("Learned-Based Alpha Matte", learned_alpha)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Save results
# cv2.imwrite("learned_alpha_matte.png", (learned_alpha * 255).astype(np.uint8))

Learned-Based Matting Time: 22.084094047546387 seconds


## Alpha Matting

In [12]:
def increase_resolution(image, scale_factor=2):
    return cv2.resize(image, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_CUBIC)

rgb_frame = increase_resolution(rgb_frame)
depth_frame = increase_resolution(depth_frame)

In [14]:
def detect_face(image):
    # Load the pre-trained face detector
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Detect faces
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)
    
    # Create a mask
    face_mask = np.zeros(image.shape[:2], dtype=np.uint8)
    
    # Draw rectangles around the faces on the mask
    for (x, y, w, h) in faces:
        cv2.rectangle(face_mask, (x, y), (x+w, y+h), 255, -1)
    
    return face_mask

In [15]:
def refine_trimap_and_segment(alpha_matte, original_trimap, face_mask, threshold_low=0.1, threshold_high=0.9, face_threshold_low=0.05, face_threshold_high=0.8):
    refined_trimap = np.zeros_like(original_trimap)
    
    # Apply different thresholds for face area
    refined_trimap[face_mask & (alpha_matte <= face_threshold_low)] = 0
    refined_trimap[face_mask & (alpha_matte >= face_threshold_high)] = 255
    refined_trimap[face_mask & (alpha_matte > face_threshold_low) & (alpha_matte < face_threshold_high)] = 128
    
    # Apply normal thresholds for the rest of the body
    body_mask = ~face_mask.astype(bool)
    refined_trimap[body_mask & (alpha_matte <= threshold_low)] = 0
    refined_trimap[body_mask & (alpha_matte >= threshold_high)] = 255
    refined_trimap[body_mask & (alpha_matte > threshold_low) & (alpha_matte < threshold_high)] = 128

    # Create final segmentation map
    segmentation_map = np.zeros_like(alpha_matte)
    segmentation_map[alpha_matte >= 0.5] = 1
    
    return refined_trimap, segmentation_map

def expand_head_region(segmentation_map, face_mask, expansion_factor=1.2):
    # Calculate bounding box of face region
    y, x = np.where(face_mask)
    if len(y) == 0 or len(x) == 0:  # No face detected
        return segmentation_map
    
    top, bottom, left, right = y.min(), y.max(), x.min(), x.max()
    
    # Expand bounding box
    height = bottom - top
    width = right - left
    center_y, center_x = (top + bottom) // 2, (left + right) // 2
    new_height = int(height * expansion_factor)
    new_width = int(width * expansion_factor)
    
    new_top = max(0, center_y - new_height // 2)
    new_bottom = min(segmentation_map.shape[0], center_y + new_height // 2)
    new_left = max(0, center_x - new_width // 2)
    new_right = min(segmentation_map.shape[1], center_x + new_width // 2)
    
    # Expand segmentation in the new bounding box
    segmentation_map[new_top:new_bottom, new_left:new_right] = 1
    
    # # Tambahkan operasi dilasi
    # kernel = np.ones((5,5), np.uint8)
    # segmentation_map = cv2.dilate(segmentation_map.astype(np.uint8), kernel, iterations=2)
   
   
    return segmentation_map

def apply_segmentation(image, segmentation_map):
    segmented_image = image.copy()
    segmented_image[segmentation_map == 0] = [0, 0, 0]  # Set background to black
    return segmented_image

# Create face mask
face_mask = detect_face(grow_cut_rgb)

# Refine trimap and create segmentation
refined_trimap, final_segmentation = refine_trimap_and_segment(learned_alpha, initial_trimap, face_mask)

# Expand head region
final_segmentation = expand_head_region(final_segmentation, face_mask)

# Apply the final segmentation to the original image
final_segmented_image = apply_segmentation(grow_cut_rgb, final_segmentation)

# Display results
cv2.imshow("Refined Trimap", refined_trimap)
cv2.imshow("Final Segmentation Map", final_segmentation.astype(np.float32))
cv2.imshow("Final Segmented Image", final_segmented_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Print some statistics
unknown_pixels_original = np.sum(initial_trimap == 128)
unknown_pixels_refined = np.sum(refined_trimap == 128)
print(f"Unknown pixels in original trimap: {unknown_pixels_original}")
print(f"Unknown pixels in refined trimap: {unknown_pixels_refined}")
print(f"Reduction in unknown pixels: {unknown_pixels_original - unknown_pixels_refined}")

Unknown pixels in original trimap: 6586
Unknown pixels in refined trimap: 42738
Reduction in unknown pixels: -36152


## Refinement

In [16]:
def refine_segmentation(segmentation_map, original_image, body_mask):
    segmentation_map = body_mask.astype(np.float32)
    kernel = np.ones((3,3), np.uint8)
    dilated_map = cv2.dilate(segmentation_map, kernel, iterations=2)
    _, sharp_map = cv2.threshold(dilated_map, 0.4, 1, cv2.THRESH_BINARY)
    cleaned_map = cv2.morphologyEx(sharp_map, cv2.MORPH_CLOSE, kernel)
    feather_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2, 2))
    feathered_map = cv2.normalize(cv2.filter2D(cleaned_map, -1, feather_kernel), None, 0, 1, cv2.NORM_MINMAX)
    sharpened = cv2.addWeighted(original_image, 1.5, cv2.GaussianBlur(original_image, (0, 0), 10), -0.45, 0)
    foreground = cv2.multiply(sharpened.astype(np.float32)/255, np.repeat(feathered_map[:,:,np.newaxis], 3, axis=2))
    hsv = cv2.cvtColor(foreground.astype(np.uint8), cv2.COLOR_BGR2HSV)
    blue_mask = cv2.inRange(hsv, np.array([100,50,50]), np.array([140,255,255]))
    foreground[blue_mask > 0] = 0
    return (foreground * 255).astype(np.uint8)

def expand_head_region_and_get_body(segmentation_map, face_mask, expansion_factor=1.2):
    y, x = np.where(face_mask)
    if len(y) == 0 or len(x) == 0:
        return segmentation_map, np.zeros_like(segmentation_map)
    top, bottom, left, right = y.min(), y.max(), x.min(), x.max()
    center_y, center_x = (top + bottom) // 2, (left + right) // 2
    new_height, new_width = int((bottom - top) * expansion_factor), int((right - left) * expansion_factor)
    new_top = max(0, center_y - new_height // 2)
    new_bottom = min(segmentation_map.shape[0], center_y + new_height // 2)
    new_left = max(0, center_x - new_width // 2)
    new_right = min(segmentation_map.shape[1], center_x + new_width // 2)
    head_mask = np.zeros_like(segmentation_map)
    head_mask[new_top:new_bottom, new_left:new_right] = 1
    expanded_head = segmentation_map.copy()
    expanded_head[new_top:new_bottom, new_left:new_right] = 1
    body_mask = np.logical_and(segmentation_map == 1, head_mask == 0)
    return expanded_head, body_mask.astype(np.float32), head_mask.astype(np.float32)

def detect_face(image):
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)
    face_mask = np.zeros(image.shape[:2], dtype=np.uint8)
    for (x, y, w, h) in faces:
        cv2.rectangle(face_mask, (x, y), (x+w, y+h), 255, -1)
    return face_mask

def initial_segmentation(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    kernel = np.ones((5,5), np.uint8)
    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
    return cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)

def post_process_segmentation(segmented_image, segmentation_map, face_mask):
    _, binary_mask = cv2.threshold(cv2.cvtColor(segmented_image, cv2.COLOR_BGR2GRAY), 1, 255, cv2.THRESH_BINARY)
    kernel = np.ones((5,5), np.uint8)
    binary_mask = cv2.morphologyEx(cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel, iterations=3), cv2.MORPH_OPEN, kernel, iterations=1)
    segmentation_map = segmentation_map * (binary_mask > 0)
    segmentation_map = cv2.morphologyEx(segmentation_map, cv2.MORPH_CLOSE, kernel, iterations=2)
    y, x = np.where(face_mask > 0)
    if len(y) > 0 and len(x) > 0:
        top, bottom, left, right = y.min(), y.max(), x.min(), x.max()
        eye_y, eye_height = top + (bottom - top) // 3, (bottom - top) // 4
        eye_mask = np.zeros_like(segmentation_map)
        eye_mask[eye_y:eye_y+eye_height, left:right] = 255
        segmentation_map[eye_mask > 0] = cv2.dilate(segmentation_map, kernel, iterations=2)[eye_mask > 0]
    segmented_image = segmented_image.copy()
    segmented_image[segmentation_map == 0] = [0, 0, 0]
    return segmented_image, segmentation_map

# Main process
face_mask = detect_face(grow_cut_rgb)
initial_seg = initial_segmentation(grow_cut_rgb)
final_segmentation = cv2.morphologyEx(cv2.morphologyEx(initial_seg, cv2.MORPH_CLOSE, np.ones((5,5), np.uint8), iterations=2), cv2.MORPH_OPEN, np.ones((5,5), np.uint8), iterations=1)
final_segmented_image, final_segmentation = post_process_segmentation(grow_cut_rgb, final_segmentation, face_mask)

feet_height = int(final_segmentation.shape[0] * 0.1)
feet_mask = np.zeros_like(final_segmentation)
feet_mask[-feet_height:, :] = final_segmentation[-feet_height:, :]
feet_image = grow_cut_rgb.copy()
feet_image[feet_mask == 0] = [0, 0, 0]

refined_trimap, final_segmentation = refine_trimap_and_segment(learned_alpha, initial_trimap, face_mask)
final_segmentation, body_mask, head_mask = expand_head_region_and_get_body(final_segmentation, face_mask)
refined_body_segmentation = refine_segmentation(final_segmentation, grow_cut_rgb, body_mask)
refined_full_segmentation = refined_body_segmentation.copy()
refined_full_segmentation[head_mask == 1] = grow_cut_rgb[head_mask == 1]

combined_foreground = refined_full_segmentation.copy()
combined_foreground[-feet_height:, :] = feet_image[-feet_height:, :]

final_segmentation_map = np.zeros((combined_foreground.shape[0], combined_foreground.shape[1]), dtype=np.uint8)
final_segmentation_map[np.any(combined_foreground != [0, 0, 0], axis=-1)] = 255

# Display results
cv2.imshow("Final Segmentation Map", final_segmentation_map)
cv2.imshow("Combined Foreground", combined_foreground)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [17]:
# fix holes in segmentation map
kernel = np.ones((5,5), np.uint8)
closed_map = cv2.morphologyEx(final_segmentation_map, cv2.MORPH_CLOSE, kernel)

# Flood fill to fill holes
flood_map = closed_map.copy()
cv2.floodFill(flood_map, None, (0,0), 255)
holes_filled = cv2.bitwise_not(flood_map)
fixed_map = cv2.bitwise_or(closed_map, holes_filled)

# Fix top head region
top_expansion = 5
fixed_map[:top_expansion, :] = 255

# Implement fixed map to combined foreground
mask = cv2.cvtColor(fixed_map, cv2.COLOR_GRAY2BGR)
fixed_foreground = cv2.bitwise_and(combined_foreground, mask)

# Display result
cv2.imshow("Fixed Segmentation Map", fixed_map)
cv2.imshow("Fixed Combined Foreground", fixed_foreground)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Save Result
cv2.imwrite("fixed_segmentation_map.png", fixed_map)
cv2.imwrite("fixed_combined_foreground.png", fixed_foreground)

True

## Input Background

In [18]:
# Choose image
def choose_image():
    root = Tk()
    root.withdraw()
    file_path = filedialog.askopenfilename()
    return file_path

# Read fixed foreground and segmentation map
fixed_foreground = cv2.imread("fixed_combined_foreground.png")
fixed_map = cv2.imread("fixed_segmentation_map.png", cv2.IMREAD_GRAYSCALE)

# Choose new background
print("Choose an image for the new background:")
background_path = choose_image()
new_background = cv2.imread(background_path)

# Ensure the background size matches the foreground
if new_background.shape[:2] != fixed_foreground.shape[:2]:
    new_background = cv2.resize(new_background, (fixed_foreground.shape[1], fixed_foreground.shape[0]))

# Create mask from segmentation map
mask = fixed_map
mask_inv = cv2.bitwise_not(mask)

# Apply the mask to the foreground and background
fg = cv2.bitwise_and(fixed_foreground, fixed_foreground, mask=mask)
bg = cv2.bitwise_and(new_background, new_background, mask=mask_inv)

# Combine foreground and background
result = cv2.add(fg, bg)

# Display result
cv2.imshow("Original Foreground", fixed_foreground)
cv2.imshow("New Background", new_background)
cv2.imshow("Result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Simpan hasil
cv2.imwrite("result_with_new_background.png", result)

Choose an image for the new background:


True

In [19]:
import nbformat

# Memuat file notebook
file_path = r'C:\BRIN\BRIN\Program\Program 2\Notebook\Program2.1.ipynb'
with open(file_path, 'r', encoding='utf-8') as f:
    notebook_content = nbformat.read(f, as_version=4)

# Mengekstrak sel-sel yang berisi kode
code_cells = [cell['source'] for cell in notebook_content['cells'] if cell['cell_type'] == 'code']

# Menampilkan semua kode yang ada dalam notebook
for i, code in enumerate(code_cells):
    print(f"Code cell {i+1}:\n{code}\n{'-'*80}\n")


Code cell 1:
%pip install opencv-python numpy scipy scikit-image matplotlib
%pip install matplotlib
%pip install tensorflow
%pip install torch torchvision
%pip install mediapipe
%pip install nbformat

--------------------------------------------------------------------------------

Code cell 2:
import cv2
import numpy as np
import time
from scipy.ndimage import label, find_objects, median_filter, shift, distance_transform_edt
from scipy.sparse import csr_matrix, diags
from scipy.sparse.linalg import spsolve
from skimage import morphology, measure
from skimage.segmentation import slic
from skimage.color import rgb2gray
from pathlib import Path
import matplotlib.pyplot as plt
from tkinter import filedialog, Tk, simpledialog, messagebox

print(cv2.__version__)
--------------------------------------------------------------------------------

Code cell 3:
root = Tk()
root.withdraw()

def browse_video():
    Tk().withdraw() 
    filename = filedialog.askopenfilename() 
    return filename

#