If Dockerfiles have not been modified, connect to the Jupyter server with ```http://localhost:8000/tree?token=util```

The following block takes the base Place Pulse Singapore data file (```place-pulse-singapore.csv``` by default) and generates a ```.csv``` of unique location IDs and their corresponding latitudes and longitudes.

In [1]:
target_dir = "data"
input = "place-pulse-singapore.csv"
output = "place-pulse-singapore-locations.csv"

import pandas as pd

import os

df = pd.read_csv(os.path.join(target_dir, input))
df.columns = ["id", "location_id", "lat", "lon", "num_votes", "perception", "trueskill_score", "trueskill_stds"]
df = df.drop_duplicates(subset=["location_id"])[["location_id", "lat", "lon"]]
df.columns = ["id", "lat", "lon"]
with open(os.path.join(target_dir, output), 'w') as fp:
    df.to_csv(fp, index=False)

The following block takes an image and resizes it.

In [7]:
target_dir = "data"
input = ""
output = ""

from PIL import Image

import os
from pathlib import Path

images_path = []
for dirpath, dirnames, filenames in os.walk(os.path.join(target_dir, input)):
    images_path.extend(filenames)
    break
images_id = {}
for image_path in images_path:
    images_id['.'.join(image_path.split('.')[:-1])] = os.path.join(target_dir, input, image_path)

Path(os.path.join(target_dir, output)).mkdir(parents=True, exist_ok=True)
for id, path in images_id.items():
    image = Image.open(path)
    if image is None:
        continue
    image = image.resize((512, 256))
    image.save(os.path.join(target_dir, output, id + ".jpg"))

{'50f56060fdc9f065f000515e': 'data/place-pulse-singapore-panos/50f56060fdc9f065f000515e.jpg', '50f56060fdc9f065f000515f': 'data/place-pulse-singapore-panos/50f56060fdc9f065f000515f.jpg', '50f56060fdc9f065f0005160': 'data/place-pulse-singapore-panos/50f56060fdc9f065f0005160.jpg', '50f56060fdc9f065f0005161': 'data/place-pulse-singapore-panos/50f56060fdc9f065f0005161.jpg', '50f56060fdc9f065f0005162': 'data/place-pulse-singapore-panos/50f56060fdc9f065f0005162.jpg', '50f56060fdc9f065f0005163': 'data/place-pulse-singapore-panos/50f56060fdc9f065f0005163.jpg', '50f56060fdc9f065f0005166': 'data/place-pulse-singapore-panos/50f56060fdc9f065f0005166.jpg', '50f56060fdc9f065f0005167': 'data/place-pulse-singapore-panos/50f56060fdc9f065f0005167.jpg', '50f56060fdc9f065f0005168': 'data/place-pulse-singapore-panos/50f56060fdc9f065f0005168.jpg', '50f56061fdc9f065f000516a': 'data/place-pulse-singapore-panos/50f56061fdc9f065f000516a.jpg', '50f56061fdc9f065f000516b': 'data/place-pulse-singapore-panos/50f5606

The following block takes a ```.csv``` without headers describing the cartesian (xyz) coordinates of a point cloud and returns ```.csv``` describing a point cloud with 1024 points via furthest-point sampling.  

In [8]:
target_dir = "data"
input = "place-pulse-singapore-point-clouds"
output = "place-pulse-singapore-point-clouds-sampled"

import fpsample
import numpy as np

import csv
import os
from pathlib import Path

point_clouds_path = []
for dirpath, dirnames, filenames in os.walk(os.path.join(target_dir, input)):
    point_clouds_path.extend(filenames)
    break
point_clouds_id = {}
for point_cloud_path in point_clouds_path:
    point_clouds_id['.'.join(point_cloud_path.split('.')[:-1])] = os.path.join(target_dir, input, point_cloud_path)

Path(os.path.join(target_dir, output)).mkdir(parents=True, exist_ok=True)
for id, path in point_clouds_id.items():
    if Path(os.path.join(target_dir, output, id + ".csv")).is_file():
        continue
    point_cloud = []
    with open(path, 'r') as fp:
        csv_reader = csv.reader(fp)
        for row in csv_reader:
            point_cloud.append([float(i) for i in row])
    if len(point_cloud) <= 0:
        continue
    fps_samples_idx = fpsample.fps_sampling(np.array(point_cloud), 1024)
    point_cloud_sampled = [point_cloud[i] for i in fps_samples_idx]
    with open(os.path.join(target_dir, output, id + ".csv"), 'w') as fp:
        csv_writer = csv.writer(fp)
        csv_writer.writerows(point_cloud_sampled)