In [None]:
import os
import sys
from copy import deepcopy

project_dir = os.path.dirname(os.getcwd())
print(project_dir)
sys.path.append(project_dir)

import matplotlib.pyplot as plt
import numpy as np
from omegaconf import OmegaConf


import torch
from torchvision.transforms import ToPILImage, ToTensor, Normalize
from training.dataset.transforms import ComposeAPI, NormalizeAPI
from tqdm import tqdm

from helpers.configurations import TRACK_TO_METAINFO
from dataset.collate_fn import collate_fn
from dataset.mini_dataset import MiniDataset
from debugging.show import *
from custom_model_builder import build_sam2_predict

In [None]:
# Dataset
object_labels = [10]
len_video = 1
input_image_size = 512
batch_size = 1
shuffle = False
mean = [0.3551, 0.3500, 0.3469]
std = [0.2921, 0.2716, 0.2742]
transforms = [ComposeAPI([NormalizeAPI(mean=mean, std=std, v2=True)])]
revert_mean=[-.3551/.2921, -.3500/.2716, -.3469/.2742]
revert_std=[1/.2921, 1/.2716, 1/.2742]
revert_transform = Normalize(mean=revert_mean, std=revert_std)
test_dataset = MiniDataset('over_train',
                           num_frames=len_video,
                           input_image_size=input_image_size,
                           object_labels=object_labels,
                           transforms=transforms,
                           collate_fn=collate_fn,
                           batch_size=batch_size,
                           shuffle=shuffle,
                           get_seg_mask=True)
print(f'Lenght of the dataset! {len(test_dataset)}')

In [None]:
seed = 123  # Check seed 123 index 19966
torch.manual_seed(seed)
np.random.seed(seed)

len_objects = len(object_labels)
toPILimage = ToPILImage()
iid = 10000

# frame_obj_list, frames_segmentation_mask = test_dataset[idx]
# image = frame_obj_list.frames[0].data

In [None]:
len_data = len(test_dataset)
print(f'Length of the dataset: {len_data}')

# OLD SLICE
# camera_switch = (len_data//3, 2*len_data//3)
# print(test_dataset.images[0], '\n', test_dataset.images[camera_switch[0]-1], '\n', test_dataset.images[camera_switch[0]])

# NEW SLICE
print(test_dataset.images[0], '\n',test_dataset.images[1], '\n',test_dataset.images[2])

In [None]:
# Get the first three images
cam1 = test_dataset.images[iid]
cam4 = test_dataset.images[iid + 1]
cam5 = test_dataset.images[iid + 2]

# Use get item: 0) VideoDataBatch 1) Segmentation mask
cam1_vid = test_dataset[iid][0]
cam4_vid = test_dataset[iid + 1][0]
cam5_vid = test_dataset[iid + 2][0]

# Use first frame
cam1 = cam1_vid.frames[0].data
cam4 = cam4_vid.frames[0].data
cam5 = cam5_vid.frames[0].data

gt_mask1 = cam1_vid.frames[0].objects[0].segment
gt_mask4 = cam4_vid.frames[0].objects[0].segment
gt_mask5 = cam5_vid.frames[0].objects[0].segment

print(gt_mask1.shape)
print(gt_mask1.dtype)

In [None]:
import matplotlib.pyplot as plt

# Create figure and subplots
fig, axes = plt.subplots(2, 3, figsize=(10, 6))

# Transpose from tensor to numpy shape
cam1 = revert_transform(cam1).permute(1,2,0)
cam4 = revert_transform(cam4).permute(1,2,0)
cam5 = revert_transform(cam5).permute(1,2,0)

# First row (RGB, GT, Black Placeholder)
axes[0,0].imshow(cam1)
axes[0,0].set_title("CAM 1")

axes[0,1].imshow(cam4)
axes[0,1].set_title("CAM 4")

axes[0,2].imshow(cam5)
axes[0,2].set_title("CAM 5")

axes[1,0].imshow(gt_mask1, cmap='gray')
axes[1,0].set_title("GT MASK 1")

axes[1,1].imshow(gt_mask4, cmap='gray')
axes[1,1].set_title("GT MASK 4")

axes[1,2].imshow(gt_mask5, cmap='gray')
axes[1,2].set_title("GT MASK 5")

# Remove axes for a cleaner look
for ax in axes.ravel():
    ax.axis('off')

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
import os
import json

import cv2
import numpy as np
from scipy.spatial.transform import Rotation

def skew(t):
    return np.array([
        [0, -t[2], t[1]],
        [t[2], 0, -t[0]],
        [-t[1], t[0], 0]
    ])

In [None]:
def compute_epipolar_lines(F, pts1):
    # pts1: Nx2 array
    pts1_h = np.hstack((pts1, np.ones((pts1.shape[0], 1))))  # homogeneous
    lines2 = (F @ pts1_h.T).T  # Each row is [a, b, c]
    return lines2

def compute_epipolar_lines_with_E(E, pts1, K, K_inv):
    # pts1: Nx2 array
    pts1_h = np.hstack((pts1, np.ones((pts1.shape[0], 1))))  # homogeneous
    lines2 = (E @ K_inv @ pts1_h.T)  # Image coordinates
    lines2 = lines2.T  # Each row is [a, b, c]
    # lines2 = (K_inv.T @ lines2).T  # Each row is [a, b, c]
    return lines2

In [None]:
def draw_epilines(img2, lines, K=None, color=(255, 0, 0)):
    if K is None:
        h, w = img2.shape[:2]
        for r in lines:
            # a, b, c = r
            # x0, x1 = 0, w
            # y0 = int((-c - a * x0) / b)
            # y1 = int((-c - a * x1) / b)
            x0,y0 = map(int, [0, -r[2]/r[1] ])
            x1,y1 = map(int, [w, -(r[2]+r[0]*w)/r[1] ])
            img2 = cv2.line(img2, (x0, y0), (x1, y1), color, 2)
    else:
        for r in lines:
            x0,y0 = map(int, [-K[0,2], -r[2]/r[1] ])
            x1,y1 = map(int, [K[0,2], -(r[2]+r[0]*K[0,2])/r[1] ])
            img_coords = np.ones((2, 3))
            img_coords[0, :] = x0, y0, 1
            img_coords[1, :] = x1, y1, 1
            img_coords = K @ img_coords.T
            print(img_coords)
            x0, y0 = int(img_coords[0, 0]), int(img_coords[1, 0])
            x1, y1 = int(img_coords[0, 1]), int(img_coords[1, 1])
            img2 = cv2.line(img2, (x0, y0), (x1, y1), color, 2)
    return img2

In [None]:
def load_camera_data(camera_data):
    intrinsics_json = camera_data['value0']['color_parameters']['intrinsics_matrix']
    K = np.asarray([[intrinsics_json['m00'], intrinsics_json['m10'], intrinsics_json['m20']],
                    [intrinsics_json['m01'], intrinsics_json['m11'], intrinsics_json['m21']],
                    [0, 0, 1]])

    extrinsics_json = camera_data['value0']['camera_pose']
    trans = extrinsics_json['translation']
    rot = extrinsics_json['rotation']
    extrinsics = np.zeros((4, 4), dtype=np.float32)
    R = Rotation.from_quat([rot['x'], rot['y'], rot['z'], rot['w']]).as_matrix()
    extrinsics[:3, :3] = R
    t = [trans['m00'], trans['m10'], trans['m20'], 1]
    extrinsics[:, 3] = t

    return K, extrinsics

In [None]:
take_path = '/home/polyaxon-data/data1/MM-OR_processed/001_PKA/'
camera_files = ['camera01.json', 'camera04.json', 'camera05.json']
camera_int_ext = []
for json_file in camera_files:
    with open(os.path.join(take_path, json_file), 'r') as f:
        camera_data = json.load(f)
    intr, ext = load_camera_data(camera_data)
    camera_int_ext.append((intr, ext))

In [None]:
K0, ext0 = camera_int_ext[0]
K1, ext1 = camera_int_ext[1]

R0, t0 = ext0[:3, :3], ext0[:3, 3]
R1, t1 = ext1[:3, :3], ext1[:3, 3]

# Main computation
ext = ext1 @ np.linalg.inv(ext0)
R, t = ext[:3, :3], ext[:3, 3]
# print('R:', R)
# print('t:', t)

# Sanity Check (works fine)
# R = R1 @ R0.T
# t = t1 - R @ t0
# print('R:', R)
# print('t:', t)

# Check Calibration Matrices
# print('K0:', K0)
# print('K1:', K1)
K = K0

In [None]:
pts1 = np.array([[100, 100], [200, 200], [300, 300], [400, 400], [500, 500]])
E = skew(t) @ R
inv_K = np.linalg.inv(K)
F = inv_K.T @ E @ inv_K
lines2 = compute_epipolar_lines(F, pts1)
lines2_E = compute_epipolar_lines_with_E(E, pts1, K, inv_K)
# lines2 = cv2.computeCorrespondEpilines(pts1.reshape(-1, 1, 2), 1, F).squeeze(1)
# lines2 = lines2_self
img2_with_lines = draw_epilines(cam4.numpy().copy(), lines2_E, K=K)

In [None]:
print(lines2)
print(lines2_E)
# print(lines2_self)
# img2 = cam4.numpy().copy()
# img2_with_lines = cv2.line(img2, (0, 0), (512, 512), (255, 0, 0), 2)

In [None]:
# Display the image with feature points
img1_with_points = cam1.numpy().copy()
plt.imshow(img1_with_points)
plt.scatter(pts1[:, 0], pts1[:, 1], c='r', s=10)
plt.title("Feature Points")
plt.axis('off')
plt.show()

# Display the image with epipolar lines
plt.imshow(img2_with_lines)
plt.title("Epipolar Lines")
plt.axis('off')
plt.show()


# cv2.imshow('Epipolar Lines', img2_with_lines)
# cv2.waitKey(10)
# cv2.destroyAllWindows()