In [None]:
#!pip install torch torchvision torchaudio
#!pip install scikit-image

In [None]:
import zipfile
import os
import tempfile
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image 
from tabulate import tabulate
from io import BytesIO
from zipfile import ZipFile

import torch
import torchvision.transforms as T
from torchvision.models.segmentation import deeplabv3_resnet101

In [None]:
def extract_depthmap(zip_fpath, extract_dir):
    """
    Extracts the depth map file from the zip archive.
    """
    with ZipFile(zip_fpath, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
    extracted_files = os.listdir(extract_dir)
    return os.path.join(extract_dir, extracted_files[0])

def parse_header(header_line):
    """
    Parses the header line to extract width, height, depth scale, max confidence, and device pose.
    """
    parts = header_line.split('_')
    width, height = map(int, parts[0].split('x'))
    depth_scale = float(parts[1])
    max_confidence = int(parts[2])
    device_pose = tuple(map(float, parts[3:]))
    return width, height, depth_scale, max_confidence, device_pose

def read_depthmap_data(depthmap_fpath):
    with tempfile.TemporaryDirectory() as tmpdirname:
        path = extract_depthmap(depthmap_fpath, tmpdirname)
        with open(path, 'rb') as f:
            header_line = f.readline().decode().strip()
            width, height, depth_scale, max_confidence, device_pose = parse_header(header_line)
            data = f.read()
            f.close()
    return width, height, data, depth_scale, max_confidence, device_pose, header_line

def parse_depth_data(data, width, height, depth_scale) -> np.ndarray:
    output = np.zeros((height, width))
    for x in range(width):
        for y in range(height):
            output[y, x] = parse_depth(data, x, y, width, height, depth_scale)
    return output

def parse_depth(data: bytes, tx: int, ty: int, width, height, depth_scale) -> float:
    """Get depth of the point in meters"""
    if tx < 1 or ty < 1 or tx >= width or ty >= height:
        return 0.
    index = height - int(ty) - 1
    depth = data[(index * width + int(tx)) * 3 + 0] << 8
    depth += data[(index * width + int(tx)) * 3 + 1]
    depth *= depth_scale
    return depth

def read_rgb_data(rgb_fpath):
        if rgb_fpath:
            pil_im = Image.open(rgb_fpath)
            pil_im = pil_im.rotate(-90, expand=True)
            rgb_height, rgb_width = pil_im.width, pil_im.height  # Weird switch
            #assert rgb_width / width == rgb_height / height, f'{rgb_width} / {width} != {rgb_height} / {height}'
            #pil_im = pil_im.resize((height, width), Image.ANTIALIAS)
            rgb_array = np.asarray(pil_im)
        else:
            rgb_array = None
        return rgb_array

In [None]:
def plot_depth_and_rgb(depth_img, rgb_img, bmask):
    """
    Plot depth map, RGB image, and binary mask side by side.

    Args:
    - depth_img (numpy.ndarray): The depth map.
    - rgb_img (numpy.ndarray): The RGB image.
    - bmask (numpy.ndarray): The binary mask.

    Returns:
    - None
    """
    # Create a figure and subplots
    fig, axs = plt.subplots(1, 3, figsize=(15, 8))

    # Plot RGB image in the second column
    axs[1].imshow(rgb_img)
    axs[1].set_title('RGB Image')
    axs[1].set_xlabel('Width (pixels)')
    axs[1].set_ylabel('Height (pixels)')

    # Plot depth map in the first column
    im = axs[0].imshow(depth_img, cmap='jet', vmin=0, vmax=3)
    axs[0].set_title('Depth Map')
    axs[0].set_xlabel('Width (pixels)')
    axs[0].set_ylabel('Height (pixels)')
    #plt.colorbar(im, ax=axs[0], label='Depth (meters)')

    # Plot binary mask in the third column
    axs[2].imshow(bmask, cmap='gray')
    axs[2].set_title('Binary Mask')
    axs[2].set_xlabel('Width (pixels)')
    axs[2].set_ylabel('Height (pixels)')

    plt.tight_layout()
    plt.show()

In [None]:
def segment_rgb_image(rgb_image, threshold=0.5):
    # Load the pre-trained DeepLabV3 model
    model = deeplabv3_resnet101(pretrained=True)
    model.eval()

    # Preprocess the image
    transform = T.Compose([
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_image = transform(rgb_image).unsqueeze(0)

    with torch.no_grad():
        # Make predictions
        predictions = model(input_image)['out'][0]

    # Apply a threshold and convert to NumPy array
    binary_mask = (predictions.argmax(0) > threshold).cpu().numpy()

    return binary_mask

def plot_segmented_rgb_and_depth(rgb_img, depth_img, bmask):
    """
    Plot segmented RGB image and segmented depth map side by side.

    Args:
    - rgb_img (numpy.ndarray): The RGB image.
    - depth_img (numpy.ndarray): The depth map.
    - bmask (numpy.ndarray): The binary mask.

    Returns:
    - segmented_rgb_img (numpy.ndarray): Segmented RGB image with the mask applied.
    - segmented_depth_img (numpy.ndarray): Segmented depth map with the mask applied.
    """
    # Apply binary mask to RGB image
    segmented_rgb_img = rgb_img.copy()
    segmented_rgb_img[bmask == 0] = 0

    # Apply binary mask to depth map
    segmented_depth_img = depth_img.copy()
    segmented_depth_img[bmask == 0] = 0

    # Create a figure and subplots
    fig, axs = plt.subplots(1, 2, figsize=(12, 6))

    # Plot segmented RGB image in the first column
    axs[0].imshow(segmented_rgb_img)
    axs[0].set_title('Segmented RGB Image')
    axs[0].set_xlabel('Width (pixels)')
    axs[0].set_ylabel('Height (pixels)')

    # Plot segmented depth map in the second column
    im = axs[1].imshow(segmented_depth_img, cmap='jet', vmin=0, vmax=3)
    axs[1].set_title('Segmented Depth Map')
    axs[1].set_xlabel('Width (pixels)')
    axs[1].set_ylabel('Height (pixels)')
    plt.colorbar(im, ax=axs[1], label='Depth (meters)')

    plt.tight_layout()
    plt.show()

    return segmented_rgb_img, segmented_depth_img

In [None]:
local_zip_path = 'scans_realsence_direct_new/a58f8226-347a-11ef-bdc2-ff2dff52e82b/depth/2'
rgb_file_path = 'scans_realsence_direct_new/a58f8226-347a-11ef-bdc2-ff2dff52e82b/rgb/2'

# Open the local zip file
with ZipFile(local_zip_path, 'r') as zipfile:
    with zipfile.open('data') as f:
        # Example for a first_line: '180x135_0.001_7_0.57045287_-0.0057296_0.0022602521_0.82130724_-0.059177425_0.0024800065_0.030834956'
        first_line = f.readline().decode().strip()

        file_header = first_line.split("_")

        # header[0] example: 180x135
        width, height = file_header[0].split("x")
        width, height = int(width), int(height)
        #width, height =  1280, 720
        depth_scale = float(file_header[1])
        max_confidence = float(file_header[2])

        depth_data = f.read()

# Convert the depth data from bytes to a numpy array and reshape it
depth = np.frombuffer(depth_data, dtype=np.uint16).reshape(height, width)
depth = np.rot90(depth, k=-1)
depth = depth * depth_scale
rgb = read_rgb_data(rgb_file_path)

print(f"Width: {width}, Height: {height}")
print(f"Depth Scale: {depth_scale}")
print(f"Max Confidence: {max_confidence}")
print(f"Depth Data Array:\n{depth}")
seg_origin = segment_rgb_image(rgb)
bmask_origin = (seg_origin != 0).astype(np.uint8)

In [None]:
plot_depth_and_rgb(depth, rgb, bmask_origin)
segmented_rgb, segmented_depth = plot_segmented_rgb_and_depth(rgb, depth, bmask_origin)

In [None]:
depth_file_path = 'scans_realsence_align/348bc7e0-2982-11ef-a860-fbf9155790cc/depth/4'
rgb_file_path = 'scans_realsence_align/348bc7e0-2982-11ef-a860-fbf9155790cc/rgb/4'

width, height, data, depth_scale, max_confidence, device_pose, header_line = read_depthmap_data(depth_file_path)
rgb_lubo = read_rgb_data(rgb_file_path)

# Display the results
print("Width:", width)
print("Height:", height)
print("Depth Scale:", depth_scale)
print("Max Confidence:", max_confidence)
print("Device Pose:", device_pose)
print("Header Line:", header_line)

# Parse the depth data
depth_lubo = parse_depth_data(data, width, height, depth_scale)
depth_lubo = np.rot90(depth_lubo, k=-1)
depth_lubo = depth_lubo[:, ::-1] 
seg_lubo = segment_rgb_image(rgb_lubo)
bmask_lubo = (seg_lubo != 0).astype(np.uint8)

In [None]:
plot_depth_and_rgb(depth_lubo, rgb_lubo, bmask_lubo)
segmented_rgb_lubo, segmented_depth_lubo = plot_segmented_rgb_and_depth(rgb_lubo, depth_lubo, bmask_lubo)