In [None]:
import cv2
import numpy as np
import os


# Load YOLO Model
def load_yolo_model(cfg_path, weights_path, names_path):
    net = cv2.dnn.readNet(weights_path, cfg_path)
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
    
    with open(names_path, 'r') as f:
        classes = [line.strip() for line in f.readlines()]
    
    return net, output_layers, classes


# Perform Object Detection with YOLO
def detect_objects(image, net, output_layers, conf_threshold=0.5):
    height, width = image.shape[:2]
    blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    detections = net.forward(output_layers)

    boxes, confidences, class_ids = [], [], []
    for detection in detections:
        for obj in detection:
            scores = obj[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > conf_threshold:
                center_x = int(obj[0] * width)
                center_y = int(obj[1] * height)
                w = int(obj[2] * width)
                h = int(obj[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Perform Non-Maximum Suppression
    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, 0.4)
    print("Indices:", indices)

    if len(indices) > 0:
        # Filter boxes using the flat indices array
        filtered_boxes = [boxes[i] for i in indices.flatten()]
    else:
        filtered_boxes = []  # Return empty if no valid boxes

    return filtered_boxes


# Draw YOLO Bounding Boxes
def draw_bounding_boxes(image, boxes, output_path):
    for i, (x, y, w, h) in enumerate(boxes):
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(image, f"Object {i}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    cv2.imwrite(output_path, image)


# Crop Images to Smaller Size
def crop_to_smaller(image1, image2):
    h1, w1 = image1.shape[:2]
    h2, w2 = image2.shape[:2]
    target_height = min(h1, h2)
    target_width = min(w1, w2)
    return image1[:target_height, :target_width], image2[:target_height, :target_width]


# Detect and Show Keypoints Using SIFT
def detect_and_show_keypoints(image, detector):
    # Check if image is empty
    if image is None or image.size == 0:
        print("Error: Empty image passed to keypoint detection.")
        return [], None
    
    # Convert image to grayscale if it's not already
    if len(image.shape) == 3:  # Check if it's a color image (RGB)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    keypoints, descriptors = detector.detectAndCompute(image, None)
    return keypoints, descriptors


# Match Keypoints with BFMatcher
def match_keypoints_sift_with_bf(image1, image2, keypoints1, descriptors1, keypoints2, descriptors2, distance_threshold=50.0):
    bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True)
    matches = bf.match(descriptors1, descriptors2)

    good_matches = [m for m in matches if m.distance < 0.8 * np.mean([m.distance for m in matches])]

    filtered_matches = []
    for match in good_matches:
        pt1 = keypoints1[match.queryIdx].pt
        pt2 = keypoints2[match.trainIdx].pt
        if np.linalg.norm(np.array(pt1) - np.array(pt2)) < distance_threshold:
            filtered_matches.append(match)
    
    return filtered_matches


# Process Stereo Images and Match Sub-Images with Duplicate Check
def process_stereo_images(left_image_path, right_image_path, yolo_model, output_dir, target_class_name="pottedplant"):
    net, output_layers, classes = yolo_model

    # Load images
    left_image = cv2.imread(left_image_path)
    right_image = cv2.imread(right_image_path)

    if left_image is None or right_image is None:
        print("Error loading images.")
        return

    # Detect objects in both images
    left_boxes = detect_objects(left_image, net, output_layers)
    right_boxes = detect_objects(right_image, net, output_layers)

    # Save YOLO bounding box visualizations
    draw_bounding_boxes(left_image.copy(), left_boxes, os.path.join(output_dir, "left_boxes.jpg"))
    draw_bounding_boxes(right_image.copy(), right_boxes, os.path.join(output_dir, "right_boxes.jpg"))

    # Keep track of already matched boxes to avoid duplicates
    matched_left_indices = set()
    matched_right_indices = set()

    # Process each detected object pair
    sift = cv2.SIFT_create(nfeatures=12800)
    for i, left_box in enumerate(left_boxes):
        for j, right_box in enumerate(right_boxes):
            
            # Skip if either of the boxes has already been matched
            if i in matched_left_indices or j in matched_right_indices:
                continue

            lx, ly, lw, lh = left_box
            rx, ry, rw, rh = right_box

            left_crop = left_image[ly:ly+lh, lx:lx+lw]
            right_crop = right_image[ry:ry+rh, rx:rx+rw]

            # Crop and normalize sizes
            left_crop, right_crop = crop_to_smaller(left_crop, right_crop)

            # Detect keypoints and descriptors
            keypoints1, descriptors1 = detect_and_show_keypoints(left_crop, sift)
            keypoints2, descriptors2 = detect_and_show_keypoints(right_crop, sift)

            if not keypoints1 or not keypoints2:  # If keypoints are empty, skip matching
                continue

            # Match keypoints
            filtered_matches = match_keypoints_sift_with_bf(left_crop, right_crop, keypoints1, descriptors1, keypoints2, descriptors2)

            # If there are good matches, draw and save
            if len(filtered_matches) > 8:
                img_matches = cv2.drawMatches(left_crop, keypoints1, right_crop, keypoints2, filtered_matches, None, flags=2)
                match_path = os.path.join(output_dir, f"matches_{i}_{j}.jpg")
                print(f"matches_{i}_{j}.jpg saved")
                cv2.imwrite(match_path, img_matches)

                # Mark the indices as matched to prevent further matching
                matched_left_indices.add(i)
                matched_right_indices.add(j)


# Main Execution
if __name__ == "__main__":
    yolo_cfg = "yolov4.cfg"
    yolo_weights = "yolov4.weights"
    yolo_names = "coco.names"
    output_dir = "output"

    os.makedirs(output_dir, exist_ok=True)
    yolo_model = load_yolo_model(yolo_cfg, yolo_weights, yolo_names)

    left_path = "IMG_7998.jpg"
    right_path = "IMG_7999.jpg"
    process_stereo_images(left_path, right_path, yolo_model, output_dir)


Indices: [6 2]
Indices: [4 8]
Error: Empty image passed to keypoint detection.
Error: Empty image passed to keypoint detection.
Error: Empty image passed to keypoint detection.
Error: Empty image passed to keypoint detection.
Error: Empty image passed to keypoint detection.
Error: Empty image passed to keypoint detection.
Error: Empty image passed to keypoint detection.
Error: Empty image passed to keypoint detection.


In [None]:
import numpy as np
from tabulate import tabulate

# Example input: Increase the number of points as needed
img1u = [6.407395839691162, 23.99433135986328, 36.878849029541016, 39.815406799316406, 57.57866668701172, 80.62291717529297, 85.26073455810547, 88.87594604492188, 113.35128021240234, 115.13031005859375, 120.58953094482422, 123.75055694580078, 125.09960174560547, 152.86256408691406, 152.86256408691406, 152.93055725097656, 153.6582794189453, 156.66094970703125, 161.39122009277344, 167.87033081054688, 174.47418212890625, 182.8557586669922, 183.5654754638672, 184.22274780273438, 196.97976684570312, 198.85235595703125, 227.01231384277344, 231.24362182617188, 246.11671447753906]
img1v = [462.1249084472656, 454.4391174316406, 432.43865966796875, 103.53443908691406, 373.8476257324219, 236.81967163085938, 239.8443145751953, 390.337890625, 235.67897033691406, 231.36708068847656, 238.24728393554688, 492.7323913574219, 103.16875457763672, 471.3157958984375, 471.3157958984375, 434.3181457519531, 466.6029357910156, 285.0070495605469, 450.04693603515625, 400.6963806152344, 419.75213623046875, 493.987548828125, 480.3666076660156, 422.72467041015625, 50.67970657348633, 408.5749816894531, 68.35357666015625, 73.46319580078125, 111.0586166381836]
img2u = [3.691209077835083, 14.697043418884277, 24.386083602905273, 61.63813018798828, 37.69493865966797, 95.7635726928711, 101.94422912597656, 93.55023956298828, 137.67079162597656, 138.65699768066406, 145.7137908935547, 136.86985778808594, 91.52183532714844, 169.7540740966797, 169.7540740966797, 169.84263610839844, 171.1566925048828, 188.53741455078125, 172.08990478515625, 178.7653350830078, 187.25868225097656, 194.6011962890625, 195.2294464111328, 198.1847381591797, 197.99534606933594, 208.58331298828125, 224.09152221679688, 230.73944091796875, 219.5247802734375]
img2v = [484.1200866699219, 477.37652587890625, 452.7850646972656, 103.78766632080078, 388.2860107421875, 242.76962280273438, 246.01145935058594, 396.0755310058594, 240.02403259277344, 234.33604431152344, 242.39105224609375, 503.11505126953125, 101.1902084350586, 476.84912109375, 476.84912109375, 439.535400390625, 472.33221435546875, 286.5150146484375, 454.1869812011719, 406.70880126953125, 422.5718688964844, 498.4800720214844, 484.67431640625, 424.1497802734375, 44.91103744506836, 408.0655822753906, 58.360595703125, 68.58309173583984, 104.48462677001953]

n = len(img1u)
assert len(img1v) == n and len(img2u) == n and len(img2v) == n, "Mismatch in number of points."

# Initialize matrix A with the correct size
A = np.zeros((n, 9), dtype=np.float64)

# Populate matrix A
for i in range(n):
    A[i][0] = img1u[i] * img2u[i]
    A[i][1] = img1v[i] * img2u[i]
    A[i][2] = img2u[i]
    A[i][3] = img1u[i] * img2v[i]
    A[i][4] = img1v[i] * img2v[i]
    A[i][5] = img2v[i]
    A[i][6] = img1u[i]
    A[i][7] = img1v[i]
    A[i][8] = 1

# Display matrix A
print("Matrix A:")
print(tabulate(A))

# Perform SVD on matrix A
U, Sigma, VT = np.linalg.svd(A)

# Extract the fundamental matrix (f is the last row of VT)
f = VT[-1]
print("\nf (last row of VT):", f)

# Reshape into a 3x3 fundamental matrix
F = f.reshape(3, 3)
print("\nFundamental Matrix F:")
print(F)

# Force F to have rank 2 by setting the smallest singular value to 0
U, Sigma, VT = np.linalg.svd(F)
Sigma[2] = 0
F2 = np.dot(U, np.dot(np.diag(Sigma), VT))
print("\nRank-2 Forced Fundamental Matrix F2:")
print(F2)

# Use calibration matrix K (as in the original code)
K = np.array([
    [9.25692841e+03, 0, 4.58239711e+02],
    [0, 8.37883743e+04, 3.59148084e+02],
    [0, 0, 1]
])

KTK = np.matmul(K.T, K)
print("\nKT * K:")
print(KTK)

# Compute the essential matrix
E = np.matmul(KTK, F2)
print("\nEssential Matrix E:")
print(E)

# Verify ranks
F2Rank = np.linalg.matrix_rank(F2)
ERank = np.linalg.matrix_rank(E)
print("\nRanks:")
print("Rank of F2:", F2Rank)
print("Rank of E:", ERank)

# Extract translation vector t from E
U, Sigma, VT = np.linalg.svd(E)
t = U[:, 2]
print("\nTranslation Vector t:")
print(t)

# Construct skew-symmetric matrix for t
tX = np.array([
    [0, -t[2], t[1]],
    [t[2], 0, -t[0]],
    [-t[1], t[0], 0]
])
print("\n[t]x (Skew-Symmetric Matrix):")
print(tX)

# Compute rotation matrix
U, Sigma, VTt = np.linalg.svd(tX)
R = np.matmul(VTt, VT.T)
print("\nRotation Matrix R:")
print(R)

# Verify R is a valid rotation matrix
print("\nDeterminant of R:", np.linalg.det(R))
print("R * R^T:")
print(np.matmul(R, R.T))

Xlist = []
Rnew = -R
tnew = (t + np.array([1, .1, 0])).reshape(3, 1)
print(R)
print(t)
Rt = np.hstack((Rnew, tnew))
print("Extrinsic Matrix:\n",Rt)

P = np.matmul(K, Rt)
print("Paramter Matrix:\n",P)

imagepoints = np.vstack((img2u, img2v, np.ones_like(img2u)))
print(imagepoints)

for i in range(imagepoints.shape[1]):
    row1 = img2u[i] * P[2, :] - P[0, :]
    row2 = img2v[i] * P[2, :] - P[1, :]
    A = np.array([row1, row2])
    U, Sigma, VT = np.linalg.svd(A)
    X = VT[-1]
    X = X / X[-1]

    Xlist.append(X[:-1])

print(Xlist)

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

Xlist2 = [np.array(coord) for coord in Xlist]
Xlist2array = np.array(Xlist2)

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(Xlist2array[:, 0], Xlist2array[:, 1], Xlist2array[:, 2], c='b', marker='o')
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
plt.show()