# Generate data for pipeline DONKEY
This notebook prepares data for PIPELINE 1

## Usage:
Run all cells

## Requirements:
./content/datasets/data_in/processed_data_testing/

## Outputs:
./content/datasets/h5_out/gt_real.h5 \
./content/datasets/h5_out/raw_image_real \
./content/datasets/pkl_out/gt_real.pkl \
./content/datasets/pkl_out/raw_image_real.pkl

In [None]:
import PIL.Image as Image
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
import h5py
import numpy as np
import re
import csv
import json

In [None]:
img = np.array(Image.open("./content/datasets/data_in/objects/real/26_cam-image_array_.jpg"))
obj_ids = np.unique(img)
print(obj_ids)

def natural_sort_key(s):
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', s)]

def map_values(value):
    if value in [128]:
        return 1
    elif value in [255]:
        return 2
    else:
        return 0
    return value

def map_to_r(number):
    colorr = [255, 128, 0, 0, 128, 0, 0, 0, 128, 0, 0, 0, 128, 0, 128, 255, 128, 255, 0, 128]
    if number<20:
      return int(colorr[number])
    else:
      return 0

def map_to_g(number):

    colorg = [0, 128, 255, 128, 0, 0, 128, 0, 0, 128, 0, 128, 0, 128, 255, 128, 0, 128, 255, 128]

    if number<20:
      return int(colorg[number])
    else:
      return 0

def map_to_b(number):

    colorb = [0, 0, 0, 128, 128, 255, 255, 128, 0, 128, 128, 255, 128, 0, 0, 0, 128, 255, 255, 128]

    if number<20:
      return int(colorb[number])
    else:
      return 255

def load_data(images_folder):
  raw_image=[]
  segmentation_mask=[]
  vectorized_map = np.vectorize(map_values)
  image_files = sorted(os.listdir(images_folder+"/"),key=natural_sort_key)
  for image_file in image_files:
      image = cv2.imread(images_folder+"/"+image_file)
      raw_image.append(image)
  mask_files = sorted(os.listdir(images_folder+"_mask_retouched/"),key=natural_sort_key)
  for mask_file in mask_files:
      if mask_file[0]!=".":
          image = cv2.imread(images_folder+"_mask_retouched/"+mask_file)
          semantic_id=vectorized_map(image[:,:,0])
          # colored_mask = np.zeros_like(raw_image[0])
          # colored_mask[:, :, 0] = np.vectorize(map_to_b)(semantic_id)
          # colored_mask[:, :, 1] = np.vectorize(map_to_g)(semantic_id)
          # colored_mask[:, :, 2] = np.vectorize(map_to_r)(semantic_id)
          segmentation_mask.append(semantic_id)
  
  return raw_image,segmentation_mask,image_files

# def load_gt(gt_file):
#   gt=[]
#   for image_file in image_files:
#       if image_file.split(".")[-1]=="json" and image_file!='meta.json':
#           with open(images_folder+image_file, 'r') as json_file:
#             data = json.load(json_file)
#             user_angle = float(data.get('user/angle', None))
#             gt.append(user_angle)
#   return gt


def load_gt(images_folder):
  raw_image=[]
  gt=[]
  image_files = sorted(os.listdir(images_folder),key=natural_sort_key)
  for image_file in image_files:
      if image_file.split(".")[-1]=="json" and image_file!='meta.json':
          with open(images_folder+image_file, 'r') as json_file:
            data = json.load(json_file)
            user_angle = float(data.get('user/angle', None))
            gt.append(user_angle)
  return gt


raw_image_sim = []
raw_image_real = []


print("-STARTING real- ")
images_folder = f"./content/datasets/processed_data_testing/real"
raw_image_real,segmentation_mask_real,image_files_real=load_data(images_folder)

print("-STARTING sim- ")
images_folder = f"./content/datasets/processed_data_testing/sim_rectified"
raw_image_sim,segmentation_mask_sim,image_files_sim=load_data(images_folder)

print("-STARTING gt- ")
gt_file = f"./content/datasets/data_in/objects_testing/real/tub320x240_test/"
gt_real=load_gt(gt_file)

In [18]:
raw_images_real={}
raw_images_real["0001"]=raw_image_real
raw_images_sim={}
raw_images_sim["0001"]=raw_image_sim

segmentation_masks_real={}
segmentation_masks_real["0001"]=segmentation_mask_real
segmentation_masks_sim={}
segmentation_masks_sim["0001"]=segmentation_mask_sim

raw_gt_real={}
raw_gt_real["0001"]=gt_real

In [None]:
for i in range(0, 100,10):
      print(i)
      # Create a 2x2 grid of subplots
      fig, axs = plt.subplots(2, 2, figsize=(15, 6))
      plt.subplots_adjust(wspace=0.2, hspace=0.4)

      # Plot the real and fake images side by side in the first row
      axs[0,0].imshow(cv2.cvtColor(raw_image_real[i], cv2.COLOR_BGR2RGB))
      axs[0,0].set_title('Real Image')
      axs[0,1].imshow(cv2.cvtColor(raw_image_sim[i], cv2.COLOR_BGR2RGB))
      axs[0,1].set_title('Sim Image GT: '+str(gt_real[i]))
      axs[1,0].imshow(segmentation_mask_real[i])
      axs[1,0].set_title('Real Mask')
      axs[1,1].imshow(segmentation_mask_sim[i])
      axs[1,1].set_title('Sim mask')
      obj_ids = np.unique(segmentation_mask_sim[i])
      print(obj_ids)
      for ax in axs.flat:
          ax.set_xticks([])
          ax.set_yticks([])

      # Show the plot for the current iteration
      plt.show()

In [20]:
real_points=[41,46,50,52,55,60,63,67,71,75,79,83,87,92,96]
real_points2=[96,101,105,110,114,118,122,126,130,135,138,142,145,149,153,157,161,165,170,176,181,188,197,203,208,211,217,223,230,235,236,238,240]
sim_points =[371,378,385,387,393,399,405,412,418,425,432,439,445,451,458]
sim_points2=[96,105,116,124,133,141,149,157,166,171,179,185,192,198,206,212,219,225,232,238,243,250,257,263,267,271,279,286,294,301,305,307,309]
print(len(real_points))
print(len(real_points2))
print(len(sim_points))
print(len(sim_points2))

15
33
15
33


In [21]:
sim_indexes_list=[]
real_indexes_list=[]


for j in range(1,len(real_points)):
        from_real=real_points[j-1]
        from_sim=sim_points[j-1]
        to_real=real_points[j]
        to_sim=sim_points[j]
        real_indexes = [*range(from_real, to_real, 1)]
        sim_indexes = [*range(from_sim, to_sim, 1)]
        print(len(sim_indexes),len(real_indexes))

        len1 = len(sim_indexes)
        len2 = len(real_indexes)
        
        if len1 < len2:
            shorter_list = sim_indexes
            longer_list = real_indexes
        else:
            shorter_list = real_indexes
            longer_list = sim_indexes
        
        # Calculate the step size to sample equally spaced elements from the longer list
        step_size = len(longer_list) // len(shorter_list)
        remainder = len(longer_list) % len(shorter_list)
        
        # Initialize a new list to store the sampled elements from the longer list
        sampled_list = []
        
        # Iterate through the longer list and sample elements at the calculated intervals
        index = 0
        for i in range(len(shorter_list)):
            sampled_list.append(longer_list[index])
            index += step_size
            if remainder > 0:
                index += 1
                remainder -= 1
        


        if len1 < len2:
              sim_indexes_list.extend(sim_indexes)
              real_indexes_list.extend(sampled_list)
        else:
              sim_indexes_list.extend(sampled_list)
              real_indexes_list.extend(real_indexes)
for j in range(1,len(real_points2)):
        from_real=real_points2[j-1]
        from_sim=sim_points2[j-1]
        to_real=real_points2[j]
        to_sim=sim_points2[j]
        real_indexes = [*range(from_real, to_real, 1)]
        sim_indexes = [*range(from_sim, to_sim, 1)]

        len1 = len(sim_indexes)
        len2 = len(real_indexes)
        
        if len1 < len2:
            shorter_list = sim_indexes
            longer_list = real_indexes
        else:
            shorter_list = real_indexes
            longer_list = sim_indexes
        
        # Calculate the step size to sample equally spaced elements from the longer list
        step_size = len(longer_list) // len(shorter_list)
        remainder = len(longer_list) % len(shorter_list)
        
        # Initialize a new list to store the sampled elements from the longer list
        sampled_list = []
        
        # Iterate through the longer list and sample elements at the calculated intervals
        index = 0
        for i in range(len(shorter_list)):
            sampled_list.append(longer_list[index])
            index += step_size
            if remainder > 0:
                index += 1
                remainder -= 1
        


        if len1 < len2:
              sim_indexes_list.extend(sim_indexes)
              real_indexes_list.extend(sampled_list)
        else:
              sim_indexes_list.extend(sampled_list)
              real_indexes_list.extend(real_indexes)


# for j in range(1,len(real_points2)):
#       from_real=real_points2[j-1]
#       from_sim=sim_points2[j-1]
#       to_real=real_points2[j]
#       to_sim=sim_points2[j]
#       real_indexes = [*range(from_real, to_real, 1)]
#       sim_indexes = [*range(from_sim, to_sim, 1)]
#       print(len(sim_indexes),len(real_indexes))
#       sim_indexes_list.extend(sim_indexes)
#       real_indexes_list.extend(real_indexes)


print(len(sim_indexes_list))
print(len(real_indexes_list))
print(len(segmentation_mask_sim))
print(len(raw_image_sim))


raw_images_sim_mapped = [raw_image_sim[i] for i in sim_indexes_list]
raw_images_real_mapped = [raw_image_real[i] for i in real_indexes_list]
gt_real_mapped = [gt_real[i] for i in real_indexes_list]


raw_images_real={}
raw_images_real["0001"]=raw_images_real_mapped
raw_gt_real={}
raw_gt_real["0001"]=gt_real_mapped
raw_images_sim={}
raw_images_sim["0001"]=raw_images_sim_mapped

raw_images_sim_mapped = [raw_image_sim[i] for i in sim_indexes_list]
raw_images_real_mapped = [raw_image_real[i] for i in real_indexes_list]
segmentation_masks_sim_mapped = [segmentation_mask_sim[i] for i in sim_indexes_list]
segmentation_masks_real_mapped = [segmentation_mask_real[i] for i in real_indexes_list]
gt_real_mapped = [gt_real[i] for i in real_indexes_list]


raw_images_real={}
raw_images_real["0001"]=raw_images_real_mapped
raw_gt_real={}
raw_gt_real["0001"]=gt_real_mapped
raw_images_sim={}
raw_images_sim["0001"]=raw_images_sim_mapped
segmentation_masks_real={}
segmentation_masks_real["0001"]=segmentation_masks_real_mapped
segmentation_masks_sim={}
segmentation_masks_sim["0001"]=segmentation_masks_sim_mapped

7 5
7 4
2 2
6 3
6 5
6 3
7 4
6 4
7 4
7 4
7 4
6 4
6 5
7 4
196
196
464
464


In [22]:
import pickle
with open('./content/datasets/pkl_out/raw_image_sim.pkl', 'wb') as file:
    pickle.dump(raw_images_sim, file)
with open('./content/datasets/pkl_out/raw_image_real.pkl', 'wb') as file:
    pickle.dump(raw_images_real, file)
with open('./content/datasets/pkl_out/mask_sim.pkl', 'wb') as file:
    pickle.dump(segmentation_masks_sim, file)
with open('./content/datasets/pkl_out/mask_real.pkl', 'wb') as file:
    pickle.dump(segmentation_masks_real, file)
with open('./content/datasets/pkl_out/gt_real.pkl', 'wb') as file:
    pickle.dump(raw_gt_real, file)

In [23]:
import pickle
with open('./content/datasets/pkl_out/raw_image_sim.pkl', 'rb') as file:
    loaded_images_sim = pickle.load(file)

with open('./content/datasets/pkl_out/raw_image_real.pkl', 'rb') as file:
    loaded_images_real = pickle.load(file)

with open('./content/datasets/pkl_out/mask_sim.pkl', 'rb') as file:
    loaded_masks_sim = pickle.load(file)

with open('./content/datasets/pkl_out/mask_real.pkl', 'rb') as file:
    loaded_masks_real = pickle.load(file)
with open('./content/datasets/pkl_out/gt_real.pkl', 'rb') as file:
    loaded_gt_real = pickle.load(file)

In [24]:
print(len(loaded_gt_real["0001"]))

196


In [None]:
for i in range(170, 320,1):
      print(i)
      # Create a 2x2 grid of subplots
      fig, axs = plt.subplots(2, 2, figsize=(15, 6))
      plt.subplots_adjust(wspace=0.2, hspace=0.4)

      # Plot the real and fake images side by side in the first row
      axs[0,0].imshow(cv2.cvtColor(loaded_images_real["0001"][i], cv2.COLOR_BGR2RGB))
      axs[0,0].set_title('Real Image  GT:'+str(loaded_gt_real["0001"][i]))
      axs[0,1].imshow(cv2.cvtColor(loaded_images_sim["0001"][i], cv2.COLOR_BGR2RGB))
      axs[0,1].set_title('Sim Image  GT:'+str(loaded_gt_real["0001"][i]))
      axs[1,0].imshow(loaded_masks_real["0001"][i])
      axs[1,0].set_title('Real Mask')
      axs[1,1].imshow(loaded_masks_sim["0001"][i])
      axs[1,1].set_title('Sim Mask')
      for ax in axs.flat:
          ax.set_xticks([])
          ax.set_yticks([])

      # Show the plot for the current iteration
      plt.show()

In [26]:
def save_dictionary_to_h5(file_path, data_dict):
    with h5py.File(file_path, 'w') as hf:
        for key, array_list in data_dict.items():
            # Create a group for each key in the dictionary
            group = hf.create_group(key)
            for i, arr in enumerate(array_list):
                # Save each array as a dataset within the group
                group.create_dataset(f'array_{i}', data=arr)



file_path = './content/datasets/h5_out/raw_image_sim.h5'
save_dictionary_to_h5(file_path, loaded_images_sim)

file_path = './content/datasets/h5_out/raw_image_real.h5'
save_dictionary_to_h5(file_path, loaded_images_real)

file_path = './content/datasets/h5_out/semantic_id_list_sim.h5'
save_dictionary_to_h5(file_path, loaded_masks_sim)

file_path = './content/datasets/h5_out/semantic_id_list_real.h5'
save_dictionary_to_h5(file_path, loaded_masks_real)

file_path = './content/datasets/h5_out/gt_real.h5'
save_dictionary_to_h5(file_path, loaded_gt_real)

In [27]:
import h5py
import numpy as np
import PIL.Image as Image
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt

def load_h5_to_dictionary(file_path):
    data_dict = {}
    with h5py.File(file_path, 'r') as hf:
        for key in hf.keys():
            # Create a list to store the arrays for each key
            array_list = []
            # Get the group corresponding to the current key
            group = hf[key]
            for dataset_name in group.keys():
                # Append each dataset (array) to the array list
                array_list.append(np.array(group[dataset_name]))
            # Store the array list for the current key in the dictionary
            data_dict[key] = array_list
    return data_dict

In [28]:
file_path = './content/datasets/h5_out/raw_image_sim.h5'
loaded_dictionary_images_sim=load_h5_to_dictionary(file_path)

file_path = './content/datasets/h5_out/raw_image_real.h5'
loaded_dictionary_images_real=load_h5_to_dictionary(file_path)

file_path = './content/datasets/h5_out/semantic_id_list_sim.h5'
loaded_dictionary_masks_sim=load_h5_to_dictionary(file_path)

file_path = './content/datasets/h5_out/semantic_id_list_real.h5'
loaded_dictionary_masks_real=load_h5_to_dictionary(file_path)


In [29]:
def find_lowest_dimensions(images_list1, images_list2):
    # Find the lowest height and width across both lists
    lowest_height = min(images_list1[0].shape[0], images_list2[0].shape[0])
    lowest_width = min(images_list1[0].shape[1], images_list2[0].shape[1])
    return lowest_height, lowest_width

def crop_images_to_lowest_dimensions(images_list, lowest_height, lowest_width):
    cropped_images = [image[:lowest_height, :lowest_width,:] for image in images_list]
    return cropped_images
def crop_masks_to_lowest_dimensions(images_list, lowest_height, lowest_width):
    cropped_images = [image[:lowest_height, :lowest_width] for image in images_list]
    return cropped_images

lowest_height=10000
lowest_width=10000


height, width = find_lowest_dimensions(loaded_dictionary_images_real["0001"], loaded_dictionary_images_sim["0001"])
lowest_height=min(height,lowest_height)
lowest_width=min(width,lowest_width)
print(lowest_height,lowest_width)



loaded_dictionary_images_real["0001"] = crop_images_to_lowest_dimensions(loaded_dictionary_images_real["0001"], lowest_height, lowest_width)
loaded_dictionary_images_sim["0001"] = crop_images_to_lowest_dimensions(loaded_dictionary_images_sim["0001"], lowest_height, lowest_width)

loaded_dictionary_masks_real["0001"] = crop_masks_to_lowest_dimensions(loaded_dictionary_masks_real["0001"], lowest_height, lowest_width)
loaded_dictionary_masks_sim["0001"] = crop_masks_to_lowest_dimensions(loaded_dictionary_masks_sim["0001"], lowest_height, lowest_width)


140 320


In [None]:
# Define the desired semantic ID you want to display

for i in range(1, 10):
  # Create a 2x2 grid of subplots
  fig, axs = plt.subplots(2, 2, figsize=(15, 6))
  plt.subplots_adjust(wspace=0.2, hspace=0.4)


  # Plot the real and fake images side by side in the first row
  axs[0,0].imshow(cv2.cvtColor(loaded_dictionary_images_real["0001"][i], cv2.COLOR_BGR2RGB))
  axs[0,0].set_title('Real Image')
  axs[0,1].imshow(cv2.cvtColor(loaded_dictionary_images_sim["0001"][i], cv2.COLOR_BGR2RGB))
  axs[0,1].set_title('Sim Image')

  axs[1,0].imshow(loaded_dictionary_masks_real["0001"][i])
  axs[1,0].set_title('Real Image')
  axs[1,1].imshow(loaded_dictionary_masks_sim["0001"][i])
  axs[1,1].set_title('Sim Image')


  for ax in axs.flat:
      ax.set_xticks([])
      ax.set_yticks([])

  # Show the plot for the current iteration
  plt.show()