In [1]:
import cv2
import numpy as np
import os
from tabulate import tabulate
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from ultralytics import YOLO

# Load YOLO Model
# Load YOLOv9 Segmentation Model
def load_yolo_segmentation_model(model_path):
    return YOLO(model_path)

# Perform Object Detection and Segmentation
def detect_objects_with_masks(image, model, output_dir, target_class_id, conf_threshold=0.5):
    results = model(image)  # Perform inference
    masks, confidences, class_ids = [], [], []

    for result_idx, result in enumerate(results):
        # Process each result
        for obj_idx, (mask, conf, cls_id) in enumerate(zip(result.masks.data, result.boxes.conf, result.boxes.cls)):
            if conf > conf_threshold:  # Apply confidence threshold
                binary_mask = (mask > 0.5).cpu().numpy().astype(np.uint8)  # Threshold and convert to binary
                mask_path = os.path.join(output_dir, f"mask_result{result_idx}_obj{obj_idx}_class{int(cls_id)}.png")
                cv2.imwrite(mask_path, binary_mask * 255)  # Save mask as an image
                #print(f"Saved mask to {mask_path}")
                
                # Append data to lists for further use
                masks.append(binary_mask)
                confidences.append(conf)
                class_ids.append(int(cls_id))

    print(type(result.masks.data))  # Debugging output
    print(result.masks.data.shape)  # Debugging output

    return masks, confidences, class_ids

# Apply Segmentation Masks to Image and Save
import cv2
import numpy as np
import os

def apply_masks_to_image(image, masks, output_directory, imageside):
    # Get dimensions of the image
    height, width = image.shape[:2]  
    filename_prefix = f"{imageside}image_with_mask_"
    
    # Ensure output directory exists
    os.makedirs(output_directory, exist_ok=True)

    # List to hold all the masked images
    masked_images = []

    # Apply each mask to create separate images
    for i, mask in enumerate(masks):
        # Resize the mask to match the image dimensions
        resized_mask = cv2.resize(mask, (width, height), interpolation=cv2.INTER_NEAREST)
        
        # Ensure the mask is binary (0 or 1)
        binary_mask = (resized_mask > 0).astype(np.uint8)

        # Create a black image with the same dimensions as the original
        masked_image = np.zeros_like(image)
        
        # Apply the mask to the original image (only keep the masked area)
        masked_image[binary_mask == 1] = image[binary_mask == 1]  # Only keep the masked area

        # Save the separate image with only the masked area visible
        output_path = os.path.join(output_directory, f"{filename_prefix}{i+1}.png")
        cv2.imwrite(output_path, masked_image)
        #print(f"Saved masked image: {output_path}")
        
        # Append the masked image to the list
        masked_images.append(masked_image)

    # Return the list of all masked images
    return masked_images

# Save the Image with Masks Applied
def save_image_with_masks(image_with_masks, output_path):
    cv2.imwrite(output_path, image_with_masks)
    #print(f"Saved image with masks to {output_path}")

# Save Segmentation Masks as Separate Files (if needed)
def save_segmentation_masks(masks, output_dir, filename_prefix="mask"):
    mask_paths = []
    for i, mask in enumerate(masks):
        mask_path = os.path.join(output_dir, f"{filename_prefix}_{i}.png")
        cv2.imwrite(mask_path, mask * 255)  # Masks are 0 or 1, so multiply by 255 to visualize as white/black
        mask_paths.append(mask_path)
        #print(f"Saved mask to {mask_path}")
    
    return mask_paths

# Stereo Image Disparity Calculation (optional)
def calculate_disparity_map(left_image, right_image):
    # Convert to grayscale for disparity calculation
    left_gray = cv2.cvtColor(left_image, cv2.COLOR_BGR2GRAY)
    right_gray = cv2.cvtColor(right_image, cv2.COLOR_BGR2GRAY)
    
    # Use stereo block matching to compute disparity map
    stereo = cv2.StereoBM_create(numDisparities=16, blockSize=15)
    disparity = stereo.compute(left_gray, right_gray)
    
    # Normalize the disparity map for visualization
    disparity_normalized = cv2.normalize(disparity, None, 0, 255, cv2.NORM_MINMAX)
    disparity_normalized = np.uint8(disparity_normalized)
    
    return disparity_normalized

# Draw YOLO Bounding Boxes
def draw_bounding_boxes(image, boxes, output_path):
    for i, (x, y, w, h) in enumerate(boxes):
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(image, f"Object {i}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    cv2.imwrite(output_path, image)

# Detect and Show Keypoints Using SIFT
def detect_and_show_keypoints(image, detector, output_dir, filename_prefix="keypoints"):
    if image is None or image.size == 0:
        print("Error: Empty image passed to keypoint detection.")
        return [], None

    grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    keypoints, descriptors = detector.detectAndCompute(grayscale_image, None)

    keypoint_image = cv2.drawKeypoints(
        image, keypoints, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS
    )

    keypoint_image_path = os.path.join(output_dir, f"{filename_prefix}.jpg")
    cv2.imwrite(keypoint_image_path, keypoint_image)
    #print(f"Keypoint detection saved to {keypoint_image_path}")

    return keypoints, descriptors


# Match Keypoints with BFMatcher
def match_keypoints_sift_with_bf(image1, image2, keypoints1, descriptors1, keypoints2, descriptors2, distance_threshold=50.0):
    bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True)
    matches = bf.match(descriptors1, descriptors2)

    good_matches = [m for m in matches if m.distance < 0.8 * np.mean([m.distance for m in matches])]
    filtered_matches = []

    for match in good_matches:
        pt1 = keypoints1[match.queryIdx].pt
        pt2 = keypoints2[match.trainIdx].pt
        if np.linalg.norm(np.array(pt1) - np.array(pt2)) < distance_threshold:
            filtered_matches.append(match)

    return filtered_matches


# Process Stereo Images and Match Sub-Images with Duplicate Check
def process_stereo_images_with_masked_objects(left_images, right_images, output_dir):
    img1u = []
    img1v = []
    img2u = []
    img2v = []

    all_matches = []

    # SIFT detector setup
    sift = cv2.SIFT_create(nfeatures=12800)

    # Process each pair of left and right images
    for left_image, right_image in zip(left_images, right_images):
        # Ensure that the images are numpy arrays
        if not isinstance(left_image, np.ndarray) or not isinstance(right_image, np.ndarray):
            print("Error: Expected numpy.ndarray images")
            continue

        matched_left_indices = set()
        matched_right_indices = set()

        # Process detected objects from both images
        for i, left_image in enumerate(left_images):
            for j, right_image in enumerate(right_images):
                # Skip if either of the boxes has already been matched
                if i in matched_left_indices or j in matched_right_indices:
                    continue

                # Detect keypoints and descriptors for both images
                keypoints1, descriptors1 = detect_and_show_keypoints(left_image, sift, output_dir, f"keypoints_left_{i}_{j}")
                keypoints2, descriptors2 = detect_and_show_keypoints(right_image, sift, output_dir, f"keypoints_right_{i}_{j}")

                if not keypoints1 or not keypoints2:
                    continue

                # Match keypoints between the two images using BFMatcher
                filtered_matches = match_keypoints_sift_with_bf(left_image, right_image, keypoints1, descriptors1, keypoints2, descriptors2)

                if len(filtered_matches) > 4:
                    # Save the matching keypoints visualization
                    img_matches = cv2.drawMatches(left_image, keypoints1, right_image, keypoints2, filtered_matches, None, flags=2)
                    match_path = os.path.join(output_dir, f"matches_{i}_{j}.jpg")
                    #print(f"Saving match visualization: {match_path}")
                    cv2.imwrite(match_path, img_matches)
                    
                    img1u = []
                    img1v = []
                    img2u = []
                    img2v = []
                    # Log the matched keypoints' coordinates
                    for match in filtered_matches:
                        pt1 = keypoints1[match.queryIdx].pt
                        pt2 = keypoints2[match.trainIdx].pt
                        img1u.append(pt1[0])  # x-coordinate of image 1
                        img1v.append(pt1[1])  # y-coordinate of image 1
                        img2u.append(pt2[0])  # x-coordinate of image 2
                        img2v.append(pt2[1])  # y-coordinate of image 2
                    build_3D_cloud(img1u, img1v, img2u, img2v)

                    all_matches.append((len(filtered_matches), i, j, filtered_matches, keypoints1, keypoints2))

                    # Mark the indices as matched to prevent further matching
                    matched_left_indices.add(i)
                    matched_right_indices.add(j)

def build_3D_cloud(img1u, img1v, img2u, img2v):
    if len(img1u) == 0 or len(img2u) == 0:
        print("No points provided for reconstruction.")
        return

    # Intrinsic calibration matrix
    K = np.array([
        [9.25692841e+03, 0, 4.58239711e+02],
        [0, 8.37883743e+04, 3.59148084e+02],
        [0, 0, 1]
    ])

    # Matched points from two views
    # Assuming img1u, img1v, img2u, img2v are the matched point coordinates
    img1_points = np.column_stack((img1u, img1v)).astype(np.float32)
    img2_points = np.column_stack((img2u, img2v)).astype(np.float32)

    # Normalize the 2D points
    img1_norm = cv2.undistortPoints(img1_points, cameraMatrix=K, distCoeffs=None)
    img2_norm = cv2.undistortPoints(img2_points, cameraMatrix=K, distCoeffs=None)

    # Ensure the matrices are contiguous after undistortion and reshaping
    img1_norm = img1_norm.reshape(-1, 2).copy()  # Ensure it's contiguous
    img2_norm = img2_norm.reshape(-1, 2).copy()  # Ensure it's contiguous

    print(img1_norm.shape)  # Should be (n, 2) for keypoint matches
    print(img2_norm.shape)  # Should match the same dimensions

    print(img1_norm.shape)  # Should be (n, 2) or (n, 3) for keypoint matches
    print(img2_norm.shape)  # Should match the same dimensions

    # Compute the essential matrix
    E, mask = cv2.findEssentialMat(img1_norm, img2_norm, K, method=cv2.RANSAC, prob=0.999, threshold=1.0)

    # Recover pose (R: rotation matrix, t: translation vector)
    _, R, t, _ = cv2.recoverPose(E, img1_norm, img2_norm, K)
    print("Rotation matrix R:\n", R)
    print("Translation vector t:\n", t)
    plt.scatter(img1u, img1v, label='Image 1 Points')
    plt.scatter(img2u, img2v, label='Image 2 Points')
    plt.legend()
    plt.title("Matched Points")
    plt.show()

    # Triangulate points
    P1 = np.hstack((np.eye(3), np.zeros((3, 1))))  # First camera projection matrix
    P2 = np.hstack((R, t))                        # Second camera projection matrix

    points_4d_homogeneous = cv2.triangulatePoints(K @ P1, K @ P2, img1_norm.T, img2_norm.T)
    points_3d = points_4d_homogeneous[:3] / points_4d_homogeneous[3]  # Normalize homogeneous coordinates

    # Transpose to get points in (N, 3) format
    points_3d = points_3d.T

    # Plot the resulting 3D point cloud
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    sc = ax.scatter(points_3d[:, 0], points_3d[:, 1], points_3d[:, 2], c=points_3d[:, 2], cmap='viridis')
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    plt.colorbar(sc, label='Depth (Z)')
    plt.title("3D Point Cloud with Depth")
    plt.show()

    print(f"Generated 3D points:\n{points_3d[:5]} (showing first 5 points)")


# Main Execution
if __name__ == "__main__":
    import os
    
    target_class_id = None
    output_dir = "ultralyticsoutput"  # Directory for outputs
    model_path = "yolov9e-seg.pt"  # Replace with your model path
    model = load_yolo_segmentation_model(model_path)

    # Load stereo images (left and right)
    left_image_path = "IMG_7998.jpg"
    right_image_path = "IMG_7999.jpg"
    left_image = cv2.imread(left_image_path)
    right_image = cv2.imread(right_image_path)
    
    # Perform segmentation on both stereo images
    left_masks, left_confidences, left_class_ids = detect_objects_with_masks(left_image, model, output_dir, target_class_id, conf_threshold=0.5)
    right_masks, right_confidences, right_class_ids = detect_objects_with_masks(right_image, model, output_dir, target_class_id, conf_threshold=0.5)
    
    # Apply masks to both images
    left_image_with_masks = apply_masks_to_image(left_image, left_masks, output_dir, imageside = "left")
    right_image_with_masks = apply_masks_to_image(right_image, right_masks, output_dir, imageside = "right")
    
    os.makedirs(output_dir, exist_ok=True)  # Ensure output directory exists
    
    # Save each mask separately with unique names
    for idx, mask in enumerate(left_masks):
        mask_path = os.path.join(output_dir, f"left_mask_{idx}.png")
        cv2.imwrite(mask_path, mask * 255)  # Save mask as binary image
        print(f"Saved left mask {idx} to {mask_path}")
    
    for idx, mask in enumerate(right_masks):
        mask_path = os.path.join(output_dir, f"right_mask_{idx}.png")
        cv2.imwrite(mask_path, mask * 255)  # Save mask as binary image
        print(f"Saved right mask {idx} to {mask_path}")
    
    # Optional: Calculate disparity map (depth estimation)
    disparity_map = calculate_disparity_map(left_image, right_image)
    
    # Save or display the disparity map
    disparity_map_output_path = os.path.join(output_dir, "disparity_map.jpg")
    cv2.imwrite(disparity_map_output_path, disparity_map)
    print(f"Saved disparity map to {disparity_map_output_path}")

    process_stereo_images_with_masked_objects(left_image_with_masks, right_image_with_masks, output_dir)





KeyboardInterrupt: 