If Dockerfiles have not been modified, connect to the Jupyter server with ```http://localhost:8000/tree?token=util```

The following block takes the base Place Pulse Singapore data file (```place-pulse-singapore.csv``` by default) and generates a ```.csv``` of unique location IDs and their corresponding latitudes and longitudes.

In [None]:
target_dir = "data"
input = "place-pulse-singapore.csv"
output = "place-pulse-singapore-locations.csv"

import pandas as pd

import os

df = pd.read_csv(os.path.join(target_dir, input))
df.columns = ["id", "location_id", "lat", "lon", "num_votes", "perception", "trueskill_score", "trueskill_stds"]
df = df.drop_duplicates(subset=["location_id"])[["location_id", "lat", "lon"]]
df.columns = ["id", "lat", "lon"]
with open(os.path.join(target_dir, output), 'w') as fp:
    df.to_csv(fp, index=False)

The following block takes the base Place Pulse Singapore data file (```place-pulse-singapore.csv``` by default), and for each unique location, attempts to find its ```trueskill_score``` for each ```perception```.  
One ```.json``` is created for each perception.

In [None]:
target_dir = "data"
place_pulse_singapore_file = "place-pulse-singapore.csv"
output_dir = "place-pulse-singapore-labels"

import pandas as pd

import os
from pathlib import Path

with open(os.path.join(target_dir, place_pulse_singapore_file), 'r') as fp:
    place_pulse_singapore_df = pd.read_csv(fp)

Path(os.path.join(target_dir, output_dir)).mkdir(parents=True, exist_ok=True)
for perception, indices in place_pulse_singapore_df.groupby(by="perception").groups.items():
    perception_df = place_pulse_singapore_df.iloc[indices].drop_duplicates(subset="location_id")[["location_id", "trueskill_score"]]
    with open(os.path.join(target_dir, output_dir, '-'.join(perception.split(' ')) + ".csv"), 'w') as fp:
        perception_df.to_csv(fp, index=False)

The following block takes a m×1 and n×1 matrices as ```.json```s and combines them into a (n+m)×1 matrix.

In [None]:
target_dir = "data"
input1_dir = "place-pulse-singapore-panos-encoded"
input2_dir = "place-pulse-singapore-point-clouds-encoded"
output_dir = "place-pulse-singapore-combined-encoded"

import pandas as pd
from sklearn.preprocessing import StandardScaler

import json
import os
from pathlib import Path

input1s_path = []
for dirpath, dirnames, filenames in os.walk(os.path.join(target_dir, input1_dir)):
    input1s_path.extend(filenames)
    break
input1s_id = ['.'.join(input1_path.split('.')[:-1]) for input1_path in input1s_path]
input1s = [] 
for input1_path in input1s_path:
    with open(os.path.join(target_dir, input1_dir, input1_path), 'r') as fp:
        input1s.append(json.load(fp))
input1s_df = pd.DataFrame(input1s)
input1s_df.index = input1s_id

input2s_path = []
for dirpath, dirnames, filenames in os.walk(os.path.join(target_dir, input2_dir)):
    input2s_path.extend(filenames)
    break
input2s_id = ['.'.join(input2_path.split('.')[:-1]) for input2_path in input2s_path]
input2s = [] 
for input2_path in input2s_path:
    with open(os.path.join(target_dir, input2_dir, input2_path), 'r') as fp:
        input2s.append(json.load(fp))
input2s_df = pd.DataFrame(input2s)
input2s_df.index = input2s_id

combined_df = input1s_df.join(input2s_df, how="inner", lsuffix="_left", rsuffix="_right")

Path(os.path.join(target_dir, output_dir)).mkdir(parents=True, exist_ok=True)
for i in combined_df.index:
    with open(os.path.join(target_dir, output_dir, i + ".json"), 'w') as fp:
        json.dump(combined_df.loc[i].values.flatten().tolist(), fp)

KeyboardInterrupt: 

The following block takes an image and resizes it.

In [None]:
target_dir = "data"
input = ""
output = ""

from PIL import Image

import os
from pathlib import Path

images_path = []
for dirpath, dirnames, filenames in os.walk(os.path.join(target_dir, input)):
    images_path.extend(filenames)
    break
images_id = {}
for image_path in images_path:
    images_id['.'.join(image_path.split('.')[:-1])] = os.path.join(target_dir, input, image_path)

Path(os.path.join(target_dir, output)).mkdir(parents=True, exist_ok=True)
for id, path in images_id.items():
    image = Image.open(path)
    if image is None:
        continue
    image = image.resize((512, 256))
    image.save(os.path.join(target_dir, output, id + ".jpg"))

The following block takes a ```.csv``` without headers describing the cartesian (xyz) coordinates of a point cloud and returns ```.csv``` describing a point cloud with 1024 points via furthest-point sampling.  

In [None]:
target_dir = "data"
input_dir = "place-pulse-singapore-point-clouds-google"
output_dir = "place-pulse-singapore-segmented-point-clouds-split-sampled"

import fpsample
import numpy as np

import csv
import os
from pathlib import Path

point_clouds_path = []
for dirpath, dirnames, filenames in os.walk(os.path.join(target_dir, input_dir)):
    for filename in filenames:
        if Path(os.path.join(dirpath, filename)).is_file():
            point_clouds_path.append(os.path.join(dirpath, filename).split(os.path.join(target_dir, input_dir) + '/')[-1])

for path in point_clouds_path:
    filename_no_ext = path.split('.')[0]
    if Path(os.path.join(target_dir, output_dir, filename_no_ext + ".csv")).is_file():
        continue
    if not Path(os.path.dirname(os.path.join(target_dir, output_dir, filename_no_ext + ".csv"))).is_dir():
        Path(os.path.dirname(os.path.join(target_dir, output_dir, filename_no_ext + ".csv"))).mkdir(parents=True, exist_ok=True)
    point_cloud = []
    with open(os.path.join(target_dir, input_dir, path), 'r') as fp:
        csv_reader = csv.reader(fp, delimiter=',')
        for row in csv_reader:
            point_cloud.append(row)
    if len(point_cloud) < 1024:
        continue
    fps_samples_idx = fpsample.fps_sampling(np.array([(float(row[0]), float(row[1]), float(row[2])) for row in point_cloud]), 1024)
    point_cloud_sampled = [point_cloud[i] for i in fps_samples_idx]
    with open(os.path.join(target_dir, output_dir, filename_no_ext + ".csv"), 'w') as fp:
        csv_writer = csv.writer(fp)
        csv_writer.writerows(point_cloud_sampled)

['-29.28314916054002', '43.05655577088113', '-3.5198744069624324']
['14.946376910563787', '18.98358480378811', '10.182090467566958']
['11.654060591633057', '52.515152291375415', '5.631597618523011']
['-23.826378592145538', '-28.561449579564027', '18.437113888260107']
['11.655859554971055', '-28.988077483795667', '42.671984455723745']


KeyboardInterrupt: 

The following block takes a ```.json``` of a 2D array describing a depthmap and returns ```.csv``` describing a point cloud from the depthmap.  
It assumes the depthmap is oriented with the centre facing north.  

In [None]:
target_dir = "data"
input = "place-pulse-singapore-depths-512-1024"
output = "place-pulse-singapore-depth-point-clouds"

import csv
import json
import math
import os
from pathlib import Path

def depthmap_to_xyz(depthmap: list[list[float]],
                    xrange: tuple[float, float] = (-1.0, 1.0), yrange: tuple[float, float] = (-1.0, 1.0),
                    heading: float = 0,
                    rmin: float = 0.0, rmax: float = math.inf) -> list[list[float]]:
    pi = math.pi
    sin = math.sin
    cos = math.cos
    output = []
    width = len(depthmap[0])
    height = len(depthmap)
    x0 = xrange[0]
    dx = xrange[1] - x0
    y0 = yrange[0]
    dy = yrange[1] - y0
    h = -heading
    for i in range(height):
        for j in range(width):
            r = depthmap[i][j]
            if r < rmin or r > rmax:
                continue
            xnorm = ((j + 0.5) / width) * dx + x0
            ynorm = ((i + 0.5) / height) * dy + y0
            theta = -pi * xnorm
            phi = -pi / 2 * ynorm
            cartesian = [[-r * sin(h + theta) * cos(phi)],
                         [r * cos(h + theta) * cos(phi)],
                         [r * sin(phi)]]
            output.append([cartesian[0][0], cartesian[1][0], cartesian[2][0]])
    return output

depths_path = []
for dirpath, dirnames, filenames in os.walk(os.path.join(target_dir, input)):
    depths_path.extend(filenames)
    break
depths_id = {}
for depth_path in depths_path:
    depths_id['.'.join(depth_path.split('.')[:-1])] = os.path.join(target_dir, input, depth_path)

Path(os.path.join(target_dir, output)).mkdir(parents=True, exist_ok=True)
for id, path in depths_id.items():
    depth = None
    with open(path, 'r') as fp:
        depth = json.load(fp)
    if depth is None:
        continue
    point_cloud = depthmap_to_xyz(depth)
    with open(os.path.join(target_dir, output, f"{id}.csv"), 'w') as fp:
        csv_writer = csv.writer(fp)
        csv_writer.writerows(point_cloud)

The following block takes a ```.json``` of a 2D array describing a depthmap and a ```.json``` of a 2D array describing the semantic segmentation associated with that depthmap/panorama and returns ```.csv``` describing a segmented point cloud derived from the two inputs and a ```.json``` describing the class labels.  
It assumes the inputs are oriented with the centre facing north.  

In [None]:
target_dir = "data"
depthmap_dir = "place-pulse-singapore-depths-512-1024"
segmentation_dir = "place-pulse-singapore-segmented-512-1024"
class_id = "classes.json"
output_dir = "place-pulse-singapore-segmented-point-clouds"

import numpy as np

import csv
import json
import math
import os
from pathlib import Path

def depthmap_to_segmented_xyz(depthmap: list[list[float]], segmentationmap: list[list[float]],
                    xrange: tuple[float, float] = (-1.0, 1.0), yrange: tuple[float, float] = (-1.0, 1.0),
                    heading: float = 0,
                    rmin: float = 0.0, rmax: float = math.inf) -> list[list[float]]:
    pi = math.pi
    sin = math.sin
    cos = math.cos
    output = []
    width = len(depthmap[0])
    height = len(depthmap)
    x0 = xrange[0]
    dx = xrange[1] - x0
    y0 = yrange[0]
    dy = yrange[1] - y0
    h = -heading
    for i in range(height):
        for j in range(width):
            r = depthmap[i][j]
            if r < rmin or r > rmax:
                continue
            xnorm = ((j + 0.5) / width) * dx + x0
            ynorm = ((i + 0.5) / height) * dy + y0
            theta = -pi * xnorm
            phi = -pi / 2 * ynorm
            cartesian = [[-r * sin(h + theta) * cos(phi)],
                         [r * cos(h + theta) * cos(phi)],
                         [r * sin(phi)]]
            output.append([cartesian[0][0], cartesian[1][0], cartesian[2][0], segmentationmap[i][j]])
    return output

segmented_depths_path = []
for dirpath, dirnames, filenames in os.walk(os.path.join(target_dir, depthmap_dir)):
    segmented_depths_path.extend(filenames)
    break
segmented_depths_id = {}
for segmented_depth_path in segmented_depths_path:
    segmented_depths_id['.'.join(segmented_depth_path.split('.')[:-1])] = [os.path.join(target_dir, depthmap_dir, segmented_depth_path), os.path.join(target_dir, segmentation_dir, segmented_depth_path)]

Path(os.path.join(target_dir, output_dir)).mkdir(parents=True, exist_ok=True)
for id, path in segmented_depths_id.items():
    if Path(os.path.join(target_dir, output_dir, f"{id}.csv")).is_file():
        continue
    depth = None
    with open(path[0], 'r') as fp:
        depth = json.load(fp)
    if depth is None:
        continue
    segmentation = None
    with open(path[1], 'r') as fp:
        segmentation = json.load(fp)
    if segmentation is None or np.array(depth).shape != np.array(segmentation).shape:
        continue
    segmented_point_cloud = depthmap_to_segmented_xyz(depth, segmentation)
    with open(os.path.join(target_dir, output_dir, f"{id}.csv"), 'w') as fp:
        csv_writer = csv.writer(fp)
        csv_writer.writerows(segmented_point_cloud)

The following block takes a ```.jpg``` of a panorama, a ```.json``` of a 2D array describing the depthmap of the panorama, and a ```.json``` of a 2D array describing the semantic segmentation of the panorama and returns ```.csv``` describing a segmented coloured point cloud derived from the two inputs and a ```.json``` describing the class labels.  
It assumes the inputs are oriented with the centre facing north.  

In [None]:
target_dir = "data"
pano_dir = "place-pulse-singapore-panos"
depthmap_dir = "place-pulse-singapore-depths"
segmentation_dir = "place-pulse-singapore-segmented"
class_id = "classes.json"
output_dir = "place-pulse-singapore-segmented-coloured-point-clouds"

import numpy as np
from PIL import Image

import csv
import json
import math
import os
from pathlib import Path

def depthmap_to_segmented_coloured_xyz(depthmap: list[list[float]], segmentationmap: list[list[float]], pano: list[list[list[int]]],
                    xrange: tuple[float, float] = (-1.0, 1.0), yrange: tuple[float, float] = (-1.0, 1.0),
                    heading: float = 0,
                    rmin: float = 0.0, rmax: float = math.inf) -> list[list[float]]:
    pi = math.pi
    sin = math.sin
    cos = math.cos
    output = []
    width = len(depthmap[0])
    height = len(depthmap)
    x0 = xrange[0]
    dx = xrange[1] - x0
    y0 = yrange[0]
    dy = yrange[1] - y0
    h = -heading
    for i in range(height):
        for j in range(width):
            r = depthmap[i][j]
            if r < rmin or r > rmax:
                continue
            xnorm = ((j + 0.5) / width) * dx + x0
            ynorm = ((i + 0.5) / height) * dy + y0
            theta = -pi * xnorm
            phi = -pi / 2 * ynorm
            cartesian = [[-r * sin(h + theta) * cos(phi)],
                         [r * cos(h + theta) * cos(phi)],
                         [r * sin(phi)]]
            output.append([cartesian[0][0], cartesian[1][0], cartesian[2][0], pano[i][j][0], pano[i][j][1], pano[i][j][2], segmentationmap[i][j]])
    return output

segmented_depths_path = []
for dirpath, dirnames, filenames in os.walk(os.path.join(target_dir, depthmap_dir)):
    segmented_depths_path.extend(filenames)
    break
segmented_depths_id = {}
for segmented_depth_path in segmented_depths_path:
    segmented_depths_id['.'.join(segmented_depth_path.split('.')[:-1])] = [os.path.join(target_dir, depthmap_dir, segmented_depth_path),
                                                                           os.path.join(target_dir, segmentation_dir, segmented_depth_path),
                                                                           os.path.join(target_dir, pano_dir, '.'.join(segmented_depth_path.split('.')[:-1]) + ".jpg")]

Path(os.path.join(target_dir, output_dir)).mkdir(parents=True, exist_ok=True)
for id, path in segmented_depths_id.items():
    if Path(os.path.join(target_dir, output_dir, f"{id}.csv")).is_file():
        continue
    depth = None
    with open(path[0], 'r') as fp:
        depth = json.load(fp)
    if depth is None:
        continue
    depth_shape = np.array(depth).shape
    segmentation = None
    with open(path[1], 'r') as fp:
        segmentation = json.load(fp)
    if segmentation is None or depth_shape != np.array(segmentation).shape:
        continue
    image = Image.open(path[2])
    if image is None:
        continue
    pano = np.array(image.resize((depth_shape[1], depth_shape[0])).getdata()).reshape((depth_shape[0], depth_shape[1], 3)).tolist()
    segmented_coloured_point_cloud = depthmap_to_segmented_coloured_xyz(depth, segmentation, pano)
    with open(os.path.join(target_dir, output_dir, f"{id}.csv"), 'w') as fp:
        csv_writer = csv.writer(fp)
        csv_writer.writerows(segmented_coloured_point_cloud)

The following block takes a ```.csv``` describing a segmented coloured point cloud and a ```.json``` describing its class labels and converts it into a format similar to the S3DIS dataset.  

In [None]:
target_dir = "data"
input_dir = "place-pulse-singapore-segmented-coloured-point-clouds"
classes_id = "classes.json"
output_dir = "place-pulse-singapore-point-clouds-s3dis"
cityscapes_name_to_s3dis_name = {
    "road": "floor",
    "sidewalk": "sofa",
    "building": "wall",
    "wall":  "wall",
    "fence": "wall",
    "pole": "clutter",
    "traffic light": "clutter",
    "traffic sign": "clutter",
    "vegetation": "column",
    "terrain": "table",
    "sky": "ceiling",
    "person": "clutter",
    "rider": "clutter",
    "car": "clutter",
    "truck": "clutter",
    "bus": "clutter",
    "train": "clutter",
    "motorcycle": "clutter",
    "bicycle": "clutter"
}

import csv
import json
import os
from pathlib import Path

def cityscapes_label_to_s3dis_name(label, cityscapes_name_to_s3dis_name: dict,
                                   cityscapes_label_to_name: dict = {
                                       0: "road",
                                       1: "sidewalk",
                                       2: "building",
                                       3: "wall",
                                       4: "fence",
                                       5: "pole",
                                       6: "traffic light",
                                       7: "traffic sign",
                                       8: "vegetation",
                                       9: "terrain",
                                       10: "sky",
                                       11: "person",
                                       12: "rider",
                                       13: "car",
                                       14: "truck",
                                       15: "bus",
                                       16: "train",
                                       17: "motorcycle",
                                       18: "bicycle"
                                   }) -> str:
    if type(list(cityscapes_label_to_name.keys())[0]) == str:
        cityscapes_label = str(label)
    elif type(list(cityscapes_label_to_name.keys())[0]) == int:
        cityscapes_label = int(label)
    return cityscapes_name_to_s3dis_name[cityscapes_label_to_name[cityscapes_label]]

with open(os.path.join(target_dir, classes_id), 'r') as fp:
    cityscapes_label_to_name = json.load(fp)

s3dis_label_to_name = {
    0: "ceiling",
    1: "floor",
    2: "wall",
    3: "beam",
    4: "column",
    5: "window",
    6: "door",
    7: "chair",
    8: "table",
    9: "bookscapes",
    10: "sofa",
    11: "board",
    12: "clutter"
}

segmented_coloured_point_clouds_path = []
for dirpath, dirnames, filenames in os.walk(os.path.join(target_dir, input_dir)):
    segmented_coloured_point_clouds_path.extend(filenames)
    break
segmented_coloured_point_clouds_id = {}
for segmented_coloured_point_cloud_path in segmented_coloured_point_clouds_path:
    segmented_coloured_point_clouds_id['.'.join(segmented_coloured_point_cloud_path.split('.')[:-1])] = os.path.join(target_dir, input_dir, segmented_coloured_point_cloud_path)

Path(os.path.join(target_dir, output_dir)).mkdir(parents=True, exist_ok=True)
for id, path in segmented_coloured_point_clouds_id.items():
    if Path(os.path.join(target_dir, output_dir, id, id, "Annotations")).is_dir():
        continue
    Path(os.path.join(target_dir, output_dir, id, id, "Annotations")).mkdir(parents=True, exist_ok=True)
    rows = []
    with open(path, 'r') as fp:
        reader = csv.reader(fp)
        for row in reader:
            rows.append(row)

    with open(os.path.join(target_dir, output_dir, id, id, f"{id}.txt"), 'w') as fp:
        point_cloud_writer = csv.writer(fp, delimiter=' ')
        point_cloud_writer.writerows([row[:-1] for row in rows])

    s3dis_names = set()
    class_rows = {}
    for row in rows:
        s3dis_name = cityscapes_label_to_s3dis_name(row[6], cityscapes_name_to_s3dis_name, cityscapes_label_to_name=cityscapes_label_to_name) + "_1"
        if not s3dis_name in s3dis_names:
            s3dis_names.add(s3dis_name)
            class_rows[s3dis_name] = [row[:-1]]
        else:
            class_rows[s3dis_name].append(row[:-1])
    
    for name, rows in class_rows.items():
        with open(os.path.join(target_dir, output_dir, id, id, "Annotations", f"{name}.txt"), 'w') as fp:
            segment_writer = csv.writer(fp, delimiter=' ')
            segment_writer.writerows(rows)

The following block takes a ```.csv``` describing a segmented (optionally coloured) point cloud and a ```.json``` describing its class labels and splits it separate point clouds for each label.  

In [None]:
target_dir = "data"
input_dir = "place-pulse-singapore-segmented-coloured-point-clouds"
classes_id = "classes.json"
output_dir = "place-pulse-singapore-segmented-point-clouds-split"
cityscapes_label_to_name = {
    0: "road",
    1: "sidewalk",
    2: "building",
    3: "wall",
    4: "fence",
    5: "pole",
    6: "traffic light",
    7: "traffic sign",
    8: "vegetation",
    9: "terrain",
    10: "sky",
    11: "person",
    12: "rider",
    13: "car",
    14: "truck",
    15: "bus",
    16: "train",
    17: "motorcycle",
    18: "bicycle"
}
cityscapes_name_to_split = {
    "road": "road",
    "sidewalk": "sidewalk",
    "building": "building",
    "wall": "building",
    "fence": "building",
    "vegetation": "vegetation",
    "terrain": "terrain"
}

import csv
import os
from pathlib import Path

segmented_coloured_point_clouds_path = []
for dirpath, dirnames, filenames in os.walk(os.path.join(target_dir, input_dir)):
    segmented_coloured_point_clouds_path.extend(filenames)
    break
segmented_coloured_point_clouds_id = {}
for segmented_coloured_point_cloud_path in segmented_coloured_point_clouds_path:
    segmented_coloured_point_clouds_id['.'.join(segmented_coloured_point_cloud_path.split('.')[:-1])] = os.path.join(target_dir, input_dir, segmented_coloured_point_cloud_path)

Path(os.path.join(target_dir, output_dir)).mkdir(parents=True, exist_ok=True)
for id, path in segmented_coloured_point_clouds_id.items():
    if Path(os.path.join(target_dir, output_dir, id)).is_dir():
        continue
    rows = []
    with open(path, 'r') as fp:
        reader = csv.reader(fp)
        for row in reader:
            rows.append(row)
    names = set()
    name_rows = {}
    for row in rows:
        cityscapes_name = cityscapes_label_to_name[int(row[-1])]
        if not cityscapes_name in cityscapes_name_to_split:
            continue
        split_name = cityscapes_name_to_split[cityscapes_name]
        if not split_name in names:
            names.add(split_name)
            name_rows[split_name] = [row[:-1]]
        else:
            name_rows[split_name].append(row[:-1])
    Path(os.path.join(target_dir, output_dir, id)).mkdir(parents=True, exist_ok=True)
    for name, rows in name_rows.items():
        with open(os.path.join(target_dir, output_dir, id, f"{name}.csv"), 'w') as fp:
            writer = csv.writer(fp, delimiter=' ')
            writer.writerows(rows)