In [1]:
# Load image splits
from random import shuffle

IMAGE_SPLIT_FILES = ["train", "test", "val"]
DOWNSIZE_SIZE = 3
SHUFFLE_BEFORE_SAVING = True
BATCH_SIZE = 128
IMAGE_USED_LABELS = [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 17, 18, 19, 23, 27, 31, 33, 34]
IMAGE_MAX_LABELS = 35
IMG_WIDTH = 1920
IMG_HEIGHT = 1200

DEPTH_MAX = 20

splits = {subset_name: [] for subset_name in IMAGE_SPLIT_FILES}

for subset_name in IMAGE_SPLIT_FILES:    
    with open("Rellis-3D/%s.lst" % subset_name) as f:
        for l in f.readlines():
            split = int(l.split("/")[0].strip())
            files = ["Rellis-3D/%s" % img for img in l.split(" ")]
            
            splits[subset_name].append((split, files[0].strip(), files[1].strip()))
    
    new_split = {img[0]: [] for img in splits[subset_name] + [(-1,)]}
    if SHUFFLE_BEFORE_SAVING:
        shuffle(splits[subset_name])
    
    all_imgs = splits[subset_name]
    #splits[subset_name].extend([(-1, i[1], i[2]) for i in all_imgs])
    
    for img in splits[subset_name]:
        new_split[img[0]].append((img[1], img[2]))
    splits[subset_name] = new_split

for subset_name in IMAGE_SPLIT_FILES:
    for split_name, split in splits[subset_name].items():
        if split_name != -1:
            splits[subset_name][split_name] = split

In [2]:
# Add ply data
import re

search_1 = r"Rellis-3D/\d\d\d\d\d"
search_2 = r"\d\d\d\d\d\d(?=[-])"
os_folder = "/os1_cloud_node_kitti_bin/"
vel_folder = "/vel_cloud_node_kitti_bin/"

def os_ply_name(img_name):
    return re.findall(search_1, img_name)[0] + os_folder + re.findall(search_2, img_name)[-1] + ".bin"

def vel_ply_name(img_name):
    return re.findall(search_1, img_name)[0] + vel_folder + re.findall(search_2, img_name)[-1] + ".bin"

for subset_name in IMAGE_SPLIT_FILES:
    for split_name, split in splits[subset_name].items():
        splits[subset_name][split_name] = [
            {
                "i": i,
                "img": "/home/ian/Rellis/" + files[0],
                "img_segmented": "/home/ian/Rellis/" + files[1],
                "cam_intrinsic": "/home/ian/Rellis/" + re.findall(search_1, files[0])[0] + "/camera_info.txt",
                "os_transform": "/home/ian/Rellis/" + re.findall(search_1, files[0])[0] + "/transforms.yaml",
                "os_ply_file": "/home/ian/Rellis/" + os_ply_name(files[0]),
                "vel_transform": "/home/ian/Rellis/" + re.findall(search_1, files[0])[0] + "/vel2os1.yaml",
                "vel_ply_file": "/home/ian/Rellis/" + vel_ply_name(files[0]),
            } for i, files in enumerate(splits[subset_name][split_name])
        ]

In [3]:
splits["train"][0][0]

{'i': 0,
 'img': '/home/ian/Rellis/Rellis-3D/00000/pylon_camera_node/frame000941-1581624746_850.jpg',
 'img_segmented': '/home/ian/Rellis/Rellis-3D/00000/pylon_camera_node_label_id/frame000941-1581624746_850.png',
 'cam_intrinsic': '/home/ian/Rellis/Rellis-3D/00000/camera_info.txt',
 'os_transform': '/home/ian/Rellis/Rellis-3D/00000/transforms.yaml',
 'os_ply_file': '/home/ian/Rellis/Rellis-3D/00000/os1_cloud_node_kitti_bin/000941.bin',
 'vel_transform': '/home/ian/Rellis/Rellis-3D/00000/vel2os1.yaml',
 'vel_ply_file': '/home/ian/Rellis/Rellis-3D/00000/vel_cloud_node_kitti_bin/000941.bin'}

In [4]:
# Functions for projections
# From: https://github.com/unmannedlab/RELLIS-3D/blob/main/utils/lidar2img.ipynb
import yaml
import cv2
import numpy as np
from scipy.spatial.transform import Rotation


def load_from_bin(bin_path):
    obj = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4)
    # ignore reflectivity info
    return obj[:,:3]


def points_filter(points,img_width,img_height,P,RT):
    ctl = RT
    ctl = np.array(ctl)
    fov_x = 2*np.arctan2(img_width, 2*P[0,0])*180/3.1415926+10
    fov_y = 2*np.arctan2(img_height, 2*P[1,1])*180/3.1415926+10
    R= np.eye(4)
    p_l = np.ones((points.shape[0],points.shape[1]+1))
    p_l[:,:3] = points
    p_c = np.matmul(ctl,p_l.T)
    p_c = p_c.T
    x = p_c[:,0]
    y = p_c[:,1]
    z = p_c[:,2]
    dist = np.sqrt(x ** 2 + y ** 2 + z ** 2)
    xangle = np.arctan2(x, z)*180/np.pi;
    yangle = np.arctan2(y, z)*180/np.pi;
    flag2 = (xangle > -fov_x/2) & (xangle < fov_x/2)
    flag3 = (yangle > -fov_y/2) & (yangle < fov_y/2)
    res = p_l[flag2&flag3,:3]
    res = np.array(res)
    x = res[:, 0]
    y = res[:, 1]
    z = res[:, 2]
    dist = np.sqrt(x ** 2 + y ** 2 + z ** 2)
    return res,dist


def get_cam_mtx(filepath):
    data = np.loadtxt(filepath)
    P = np.zeros((3,3))
    P[0,0] = data[0]
    P[1,1] = data[1]
    P[2,2] = 1
    P[0,2] = data[2]
    P[1,2] = data[3]
    return P


def get_mtx_from_yaml(filepath,key='os1_cloud_node-pylon_camera_node'):
    with open(filepath,'r') as f:
        data = yaml.load(f,Loader= yaml.Loader)
    q = data[key]['q']
    q = np.array([q['x'],q['y'],q['z'],q['w']])
    t = data[key]['t']
    t = np.array([t['x'],t['y'],t['z']])
    R_vc = Rotation.from_quat(q)
    R_vc = R_vc.as_matrix()

    RT = np.eye(4,4)
    RT[:3,:3] = R_vc
    RT[:3,-1] = t
    RT = np.linalg.inv(RT)
    return RT


distCoeff = np.array([-0.134313,-0.025905,0.002181,0.00084,0])
distCoeff = distCoeff.reshape((5,1))


def load_os_points(tranform_file, ply_file, P):
    points = load_from_bin(ply_file)
    
    RT = get_mtx_from_yaml(tranform_file)
    R_vc = RT[:3,:3]
    T_vc = RT[:3,3]
    T_vc = T_vc.reshape(3, 1)
    rvec,_ = cv2.Rodrigues(R_vc)
    tvec = T_vc
    xyz_v, dist = points_filter(points,IMG_WIDTH,IMG_HEIGHT,P,RT)
    imgpoints, _ = cv2.projectPoints(xyz_v[:,:],rvec, tvec, P, distCoeff)
    imgpoints = np.squeeze(imgpoints,1)
    return imgpoints.T, dist


def load_vel_points(tranform_file, os_transform_file, ply_file, P):
    RT = get_mtx_from_yaml(os_transform_file)
    R_vc = RT[:3,:3]
    T_vc = RT[:3,3]
    T_vc = T_vc.reshape(3, 1)
    rvec,_ = cv2.Rodrigues(R_vc)
    tvec = T_vc
    
    vel2os = get_mtx_from_yaml(tranform_file, 'vel2os1')
    
    velpoints = load_from_bin(ply_file)
    
    velpcd_ = np.ones((velpoints.shape[0],4))
    velpcd_[:,:3] = velpoints
    velpcdos = vel2os@velpcd_.T
    velpcdos = velpcdos.T[:,:3]
    
    xyz_v, c_ = points_filter(velpcdos,IMG_WIDTH,IMG_HEIGHT,P,RT)

    imgpoints, _ = cv2.projectPoints(xyz_v[:,:],rvec, tvec, P, distCoeff)
    imgpoints = np.squeeze(imgpoints,1)
    return imgpoints.T, c_


def add_depth_axis(img, points, depths):
    if img.shape[-1] == 3:
        img = np.insert(img, 3, 0, axis=-1)
    
    for i in range(points.shape[1]):
        if IMG_HEIGHT - 1 > points[1][i] > 0 and IMG_WIDTH - 1 > points[0][i] > 0:
            img[np.int32(points[1][i]), np.int32(points[0][i]), -1] = np.float16(depths[i]/DEPTH_MAX)
    
    return img

In [5]:
# Test images
import matplotlib.pyplot as plt

def draw_dot_img(img_with_depth):
    dots = []
    for y in range(img_with_depth.shape[0]):
        for x in range(img_with_depth.shape[1]):
            depth = img_with_depth[y, x, 3]
            if depth > 0:
                dots.append((x, y, depth))
    img_without_depth = img_with_depth[:, :, 0:3]
    plt.imshow(img_without_depth)
    color = [d[2] for d in dots]
    plt.scatter([d[0] for d in dots], [d[1] for d in dots], s=1, c=color, cmap="plasma")
    plt.show()


def draw_image(img):
    plt.imshow(img)
    plt.show()

In [6]:
# Load image files and labels
# Save data
from threading import Thread
from os import remove
import time

import numpy as np
from PIL import Image
from skimage.measure import block_reduce
from npy_append_array import NpyAppendArray


def convert_img_to_labels(labeled_img_file_name):
    img_labels = np.asarray(Image.open(labeled_img_file_name))
    img_labels_new = np.zeros(img_labels.shape + (len(IMAGE_USED_LABELS),))
    for i, label in enumerate(IMAGE_USED_LABELS):
        img_labels_new[:,:,i] = (img_labels[:,:] == label)
    return img_labels_new


def load_img(img_file_name):
    img_base = np.asarray(Image.open(img_file_name))
    img_base = img_base / 255
    return img_base


def add_depth_to_img(img, cam_intrinsic_file, os_calib, os_points, vel_calib, vel_points):
    P = get_cam_mtx(cam_intrinsic_file)

    points, depths = load_os_points(os_calib, os_points, P)
    img = add_depth_axis(img, points, depths)

    points, depths = load_vel_points(vel_calib, os_calib, vel_points, P)
    
    img = add_depth_axis(img, points, depths)
    return img


def downsize(img):    
    return block_reduce(img, (DOWNSIZE_SIZE, DOWNSIZE_SIZE, 1), np.max)


def append_array(np_array_file, np_array):
    np_array = np.expand_dims(np_array, 0)
    try:
        np_array_file.append(np_array)
    except ValueError:
        np_array_file.recover()
        np_array_file.append(np_array)


def create_data(img_paths):
    try:
        new_imgs = {}
        new_imgs["img"] = load_img(img_paths["img"])
        img = new_imgs["img"].astype(np.float16)
        append_array(numpy_files[0], img)
        append_array(all_numpy_files[0], img)

        new_imgs["img_with_depth"] = add_depth_to_img(new_imgs["img"], img_paths["cam_intrinsic"], img_paths["os_transform"],
            img_paths["os_ply_file"], img_paths["vel_transform"], img_paths["vel_ply_file"])
        img = new_imgs["img"].astype(np.float16)
        append_array(numpy_files[2], img)
        append_array(all_numpy_files[2], img)

        new_imgs["img_segmented"] = convert_img_to_labels(img_paths["img_segmented"])
        img = new_imgs["img"].astype(np.uint8)
        append_array(numpy_files[4], img)
        append_array(all_numpy_files[4], img)

        create_downsized(new_imgs)
    except FileNotFoundError:
        print("File not found")


def create_downsized(imgs):
    new_imgs = {}
    img = downsize(imgs["img"]).astype(np.float16)
    append_array(numpy_files[1], img)
    append_array(all_numpy_files[1], img)
    
    img = downsize(imgs["img_with_depth"]).astype(np.float16)
    append_array(numpy_files[3], img)
    append_array(all_numpy_files[3], img)
    
    img = downsize(imgs["img_segmented"]).astype(np.float16)
    append_array(numpy_files[5], img)
    append_array(all_numpy_files[5], img)
    

columns_to_save = [
    ("img", np.float16),
    ("img_ds", np.float16),
    ("img_with_depth", np.float16),
    ("img_with_depth_ds", np.float16),
    ("img_one_hot_labels", np.uint8),
    ("img_one_hot_labels_ds", np.uint8),
]

for subset_name, subset in splits.items():
    all_numpy_files = [NpyAppendArray("Processed Data/images_%s_%d_%s.npy" % (subset_name, -1, col[0])) for col in columns_to_save]
    for split_name, split in subset.items():
        if split_name == -1:
            continue
        numpy_files = [NpyAppendArray("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, col[0])) for col in columns_to_save]
        print("%s samples, split %d" % (subset_name, split_name))
        
        threads = [Thread(target=create_data, args=(s,)) for s in split]
        [t.run() for t in threads]
        
        np.savez_compressed("Processed Data/%s_%d" % (subset_name, split_name),
            img=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img")),
            img_ds=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_ds")),
            img_depth=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_with_depth")),
            img_depth_ds=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_with_depth_ds")),
            img_oh=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_one_hot_labels")),
            img_oh_ds=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_one_hot_labels_ds")),
        )
        for f in numpy_files:
            remove(f.filename)

    np.savez_compressed("Processed Data/%s_%d" % (subset_name, -1),
        img=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, -1, "img")),
        img_ds=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, -1, "img_ds")),
        img_depth=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, -1, "img_with_depth")),
        img_depth_ds=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, -1, "img_with_depth_ds")),
        img_oh=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, -1, "img_one_hot_labels")),
        img_oh_ds=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, -1, "img_one_hot_labels_ds")),
    )
    for f in all_numpy_files:
        remove(f.filename)

train samples, split 0
train samples, split 2
train samples, split 3
train samples, split 4
test samples, split 0
test samples, split 1
test samples, split 2
File not found
File not found
File not found
File not found
val samples, split 0
val samples, split 1
File not found
File not found
File not found


In [7]:
# # Code to break up images that were accidently appended together
# columns_to_save = [
#     ("img", "np.float16"),
#     ("img_ds", "np.float16"),
#     ("img_depth", "np.float16"),
#     ("img_depth_ds", "np.float16"),
#     ("img_oh", "np.uint8"),
#     ("img_oh_ds", "np.uint8"),
# ]

# for name in ["train", "val", "test"]:
#     for split in [0, 2, 3, 4, -1]:
#         vals_to_save = []
#         with np.load("Processed Data/%s_%d.npz" % (name, split)) as data:
#             for c, t in columns_to_save:
#                 exec("%s=data['%s']" % (c, c))
#                 if c.endswith("ds"):
#                     l = eval("%s" % c).shape[0]//400
#                 else:
#                     l = eval("%s" % c).shape[0]//1200
#                 exec("%s=np.split(%s, l)" % (c, c))
#                 vals_to_save.append("%s=np.array(%s).astype(%s)" % (c, c, t))
#         exec("np.savez_compressed('Processed Data/%s_%d', %s)" % (name, split, ",".join(vals_to_save)))

# Old code that doesn't work

In [8]:
# # Load image files and labels
# # Save data
# from multiprocessing import Pool
# from os import remove
# import time

# import numpy as np
# from PIL import Image
# from skimage.measure import block_reduce
# from npy_append_array import NpyAppendArray


# def convert_img_to_labels(labeled_img_file_name):
#     img_labels = np.asarray(Image.open(labeled_img_file_name))
#     img_labels_new = np.zeros(img_labels.shape + (len(IMAGE_USED_LABELS),))
#     for i, label in enumerate(IMAGE_USED_LABELS):
#         img_labels_new[:,:,i] = (img_labels[:,:] == label)
#     return img_labels_new


# def load_img(img_file_name):
#     img_base = np.asarray(Image.open(img_file_name))
#     img_base = img_base / 255
#     return img_base


# def add_depth_to_img(img, cam_intrinsic_file, os_calib, os_points, vel_calib, vel_points):
#     P = get_cam_mtx(cam_intrinsic_file)

#     points, depths = load_os_points(os_calib, os_points, P)
#     img = add_depth_axis(img, points, depths)

#     points, depths = load_vel_points(vel_calib, os_calib, vel_points, P)
    
#     img = add_depth_axis(img, points, depths)
#     return img


# def downsize(img):    
#     return block_reduce(img, (DOWNSIZE_SIZE, DOWNSIZE_SIZE, 1), np.max)


# def create_data(img_paths):
#     new_imgs = {}
#     new_imgs["img"] = load_img(img_paths["img"])
#     new_imgs["img_segmented"] = convert_img_to_labels(img_paths["img_segmented"])
#     new_imgs["img_with_depth"] = add_depth_to_img(new_imgs["img"], img_paths["cam_intrinsic"], img_paths["os_transform"],
#         img_paths["os_ply_file"], img_paths["vel_transform"], img_paths["vel_ply_file"])
#     return new_imgs


# def create_downsized(imgs):
#     new_imgs = {}
#     new_imgs["img"] = downsize(imgs["img"])
#     new_imgs["img_segmented"] = downsize(imgs["img_segmented"])
#     new_imgs["img_with_depth"] = downsize(imgs["img_with_depth"])
#     return new_imgs
    

# columns_to_save = [
#     ("img", np.float16),
#     ("img_ds", np.float16),
#     ("img_with_depth", np.float16),
#     ("img_with_depth_ds", np.float16),
#     ("img_one_hot_labels", np.uint8),
#     ("img_one_hot_labels_ds", np.uint8),
# ]

# for subset_name, subset in splits.items():
#     all_numpy_files = [NpyAppendArray("Processed Data/images_%s_%d_%s.npy" % (subset_name, -1, col[0])) for col in columns_to_save]
#     for split_name, split in subset.items():
#         numpy_files = [NpyAppendArray("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, col[0])) for col in columns_to_save]
#         print("%s samples, split %d" % (subset_name, split_name))
        
#         split = [split[i:i+BATCH_SIZE] for i in range(0, len(split), BATCH_SIZE)]
#         with Pool(BATCH_SIZE//8) as p:
#             for i, s in enumerate(split):
#                 s = p.map(create_data, s)

#                 img = np.concatenate([r["img"] for r in s]).astype(np.float16)
#                 numpy_files[0].append(img)
#                 all_numpy_files[0].append(img)

#                 img = np.concatenate([r["img_with_depth"] for r in s]).astype(np.float16)
#                 numpy_files[2].append(img)
#                 all_numpy_files[2].append(img)

#                 img = np.concatenate([r["img_segmented"] for r in s]).astype(np.uint8)
#                 numpy_files[4].append(img)
#                 all_numpy_files[4].append(img)

#                 if i == 0:
#                     draw_image(s[0]["img"])
#                     draw_dot_img(s[0]["img_with_depth"])

#                 s = p.map(create_downsized, s)

#                 img = np.concatenate([r["img"] for r in s]).astype(np.float16)
#                 numpy_files[1].append(img)
#                 all_numpy_files[1].append(img)

#                 img = np.concatenate([r["img_with_depth"] for r in s]).astype(np.float16)
#                 numpy_files[3].append(img)
#                 all_numpy_files[3].append(img)

#                 img = np.concatenate([r["img_segmented"] for r in s]).astype(np.uint8)
#                 numpy_files[5].append(img)
#                 all_numpy_files[5].append(img)

#                 if i == 0:
#                     draw_image(s[0]["img"])
#                     draw_dot_img(s[0]["img_with_depth"])

#                 print("%.2f%% Complete" % ((i+1) * 100/len(split)))
#         numpy.savez_compressed("Processed Data/rellis_%s_%d.npy" % (subset_name, split_name),
#             img=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img")),
#             img_ds=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_ds")),
#             img_depth=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_with_depth")),
#             img_depth_ds=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_with_depth_ds")),
#             img_oh=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_one_hot_labels")),
#             img_oh_ds=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_one_hot_labels_ds")),
#         )
#         for f in numpy_files:
#             remove(f)

#     split_name = -1
#     numpy.savez_compressed("Processed Data/rellis_%s_%d.npy" % (subset_name, split_name),
#         img=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img")),
#         img_ds=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_ds")),
#         img_depth=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_with_depth")),
#         img_depth_ds=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_with_depth_ds")),
#         img_oh=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_one_hot_labels")),
#         img_oh_ds=np.load("Processed Data/images_%s_%d_%s.npy" % (subset_name, split_name, "img_one_hot_labels_ds")),
#     )
#     for f in all_numpy_files:
#         remove(f)

In [9]:

# # Load image files and labels
# # Save data
# from pyspark.sql import SparkSession
# from pyspark.sql.types import ArrayType, FloatType, IntegerType
# from pyspark.ml.linalg import Vectors, VectorUDT
# from pyspark.sql import functions
# import os

# from PIL import Image
# from skimage.measure import block_reduce
# import pandas as pd
# import numpy as np
# from npy_append_array import NpyAppendArray


# def convert_img_to_labels(labeled_img_file_name):
#     img_labels = np.asarray(Image.open(labeled_img_file_name))
#     img_labels_new = np.zeros(img_labels.shape + (IMAGE_MAX_LABELS,))
#     for i in range(IMAGE_MAX_LABELS):
#         if i in IMAGE_USED_LABELS:
#             img_labels_new[:,:,i] = (img_labels[:,:] == i)
#     shape = img_base.shape
#     return Vectors.dense((img_labels_new.astype("int").reshape(shape[0] * shape[1] * shape[2])))
# convert_img_to_labels_udf = functions.udf(convert_img_to_labels, VectorUDT())


# def load_img(img_file_name):
#     print("Reading file")
#     img_base = np.asarray(Image.open(img_file_name))
#     img_base = img_base / 255
    
#     shape = img_base.shape
#     return Vectors.dense(img_base.astype("float").reshape(shape[0] * shape[1] * shape[2]))
# load_img_udf = functions.udf(load_img, VectorUDT())


# def add_depth_to_img(img, cam_intrinsic_file, os_calib, os_points, vel_calib, vel_points):
#     P = get_cam_mtx(cam_intrinsic_file)

#     points, depths = load_os_points(os_calib, os_points, P)
#     img = add_depth_axis(img, points, depths)

#     points, depths = load_vel_points(vel_calib, vel_points, P)
    
#     img = img.toArray()
#     channels = img.shape[0]//(IMG_WIDTH * IMG_HEIGHT)
#     img = img.reshape(IMG_WIDTH, IMG_HEIGHT, channels)
#     img = add_depth_axis(img, points, depths)
    
#     shape = img_base.shape
#     return Vectors.dense(img.astype("float").reshape(shape[0] * shape[1] * shape[2]))
# add_depth_to_img_udf = functions.udf(add_depth_to_img, VectorUDT())


# def downsize(img):
#     channels = img.shape[0]//(IMG_WIDTH * IMG_HEIGHT)
#     img = img.reshape(IMG_WIDTH, IMG_HEIGHT, channels)
    
#     img = block_reduce(img, (DOWNSIZE_SIZE, DOWNSIZE_SIZE, 1), np.max)
    
#     shape = img_base.shape
#     return Vectors.dense(img.reshape(shape[0] * shape[1] * shape[2]))
# downsize_udf = functions.udf(downsize, VectorUDT())


# # Process images in spark
# os.environ['PYSPARK_PYTHON'] = '/home/ian/miniconda3/envs/SparseIVA/bin/python3.9'
# os.environ['PYSPARK_DRIVER_PYTHON'] = '/home/ian/miniconda3/envs/SparseIVA/bin/python3.9'

# # spark = SparkSession.builder.master("spark://147.9.188.154:7077").appName("Rellis") 
# spark = SparkSession.builder.master("local[1]").appName("Rellis")
# spark.config('spark.driver.memory','16g')
# spark.config('spark.executor.memory','8g')
# spark.config("spark.sql.execution.arrow.pyspark.enabled", "true")
# spark.config("spark.sql.execution.arrow.pyspark.fallback.enabled", "true")
# spark = spark.getOrCreate()

# columns_to_save = [
#     ("img_loaded", np.float16),
#     ("img_loaded_ds", np.float16),
#     ("img_with_depth", np.float16),
#     ("img_with_depth_ds", np.float16),
#     ("img_one_hot_labels", np.uint8),
#     ("img_one_hot_labels_ds", np.uint8),
# ]

# for subset_name, subset in splits.items():
#     for split_name, split in subset.items():
#         numpy_files = [NpyAppendArray("Processed Data/images_%s_%d_%s.npz" % (subset_name, split_name, col[0])) for col in columns_to_save]
        
#         data = spark.createDataFrame(pd.DataFrame(split))
#         num_rows = data.count()

#         data = data.withColumn("img_loaded", load_img_udf(data["img"]))
#         data = data.withColumn("img_with_depth", add_depth_to_img_udf(data["img_loaded"],
#               data["os_transform"], data["os_ply_file"], data["vel_transform"], data["vel_ply_file"]
#         ))
        
#         data = data.withColumn("img_one_hot_labels", convert_img_to_labels_udf(data["img_segmented"]))
        
#         data = data.withColumn("img_loaded_ds", downsize_udf(data["img_loaded"]))
#         data = data.withColumn("img_with_depth_ds", downsize_udf(data["img_with_depth"]))
#         data = data.withColumn("img_one_hot_labels_ds", downsize_udf(data["img_one_hot_labels"]))
        
#         print(subset_name, split_name)
        
#         for i in range(0, num_rows, 1):
#             smaller_data = data.filter(data.i.between(i, i))
#             for col, np_file in zip(columns_to_save, numpy_files):
#                 np_file.append(smaller_data.select(col[0]).collect()[0][0].toArray().astype(col[1]))

# # def load_img(imgs):
# #     img_base = np.asarray(Image.open(imgs[0])).astype(np.uint8)

# #     img_labels = np.asarray(Image.open(imgs[1]))
# #     img_labels_new = np.zeros(img_labels.shape + (IMAGE_MAX_LABELS,))
# #     for i in range(IMAGE_MAX_LABELS):
# #         if i in IMAGE_USED_LABELS:
# #             img_labels_new[:,:,i] = (img_labels[:,:] == i)

# #     return (img_base, img_labels_new.astype(np.uint8))


# # for subset_name, subset in splits.items():
# #     for split_name, split in subset.items():
# #         with Pool(64) as p:
# #             new_split = p.map(load_img, split)
            
# # #         if SHUFFLE_BEFORE_SAVING:
# # #             shuffle(new_split)
                    
# #         dump(new_split, open("Processed Data/images_%s_%d.pickle" % (subset_name, split_name), "wb+"))
# #         print(subset_name, split_name)

In [10]:
# https://dl.acm.org/doi/pdf/10.5555/2830840.2830844

In [11]:
# Load ply data

# TODO