Skip to content

Commit

Permalink
releasing initial train/val/test split
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Roberts committed Jan 26, 2021
1 parent 4a4e85f commit 8d92d18
Show file tree
Hide file tree
Showing 8 changed files with 83,000 additions and 16 deletions.
4 changes: 2 additions & 2 deletions code/python/analysis/dataset_generate_random_image_paths.py
Expand Up @@ -27,8 +27,8 @@
metadata_camera_trajectories_csv_file = os.path.join(args.analysis_dir, "metadata_camera_trajectories.csv")

df = pd.read_csv(metadata_camera_trajectories_csv_file)
df = df.loc[df["Scene type"] != "OUTSIDE VIEWING AREA (BAD TRAJECTORY)"]
df = df.loc[df["Scene type"] != "OUTSIDE VIEWING AREA (BAD INITIALIZATION)"]
df = df[df["Scene type"] != "OUTSIDE VIEWING AREA (BAD TRAJECTORY)"]
df = df[df["Scene type"] != "OUTSIDE VIEWING AREA (BAD INITIALIZATION)"]
df = df.reset_index(drop=True)
camera_trajectories = df.to_records()

Expand Down
Expand Up @@ -21,6 +21,7 @@
args = parser.parse_args()

assert os.path.exists(args.dataset_dir)
assert os.path.exists(args.analysis_dir)

path_utils.add_path_to_sys_path(args.dataset_dir, mode="relative_to_cwd", frame=inspect.currentframe())
import _dataset_config
Expand Down
82 changes: 82 additions & 0 deletions code/python/analysis/dataset_generate_split.py
@@ -0,0 +1,82 @@
#
# For licensing see accompanying LICENSE.txt file.
# Copyright (C) 2020 Apple Inc. All Rights Reserved.
#

from pylab import *

import argparse
import os
import pandas as pd

import path_utils

parser = argparse.ArgumentParser()
parser.add_argument("--analysis_dir", required=True)
parser.add_argument("--split_mode", required=True)
parser.add_argument("--train_fraction", type=float)
parser.add_argument("--val_fraction", type=float)
args = parser.parse_args()

assert os.path.exists(args.analysis_dir)
assert args.split_mode == "scene_v1"



print("[HYPERSIM: DATASET_GENERATE_SPLIT] Begin...")



metadata_images_csv_file = os.path.join(args.analysis_dir, "metadata_images.csv")
df_images = pd.read_csv(metadata_images_csv_file)



if args.split_mode == "scene_v1":

assert args.train_fraction is not None
assert args.val_fraction is not None
assert args.train_fraction + args.val_fraction < 1.0

metadata_images_split_file = os.path.join(args.analysis_dir, "metadata_images_split_scene_v1.csv")

df_images_public = df_images[df_images["included_in_public_release"] == True]
scene_names_shuffled = df_images_public.scene_name.unique()
np.random.seed(0)
np.random.shuffle(scene_names_shuffled)
num_scenes = len(scene_names_shuffled)

scene_ind_train_begin = 0
scene_ind_val_begin = int(args.train_fraction*num_scenes)
scene_ind_test_begin = int((args.train_fraction + args.val_fraction)*num_scenes)

scene_ind_train_end = scene_ind_val_begin - 1
scene_ind_val_end = scene_ind_test_begin - 1
scene_ind_test_end = num_scenes - 1

num_scenes_train = scene_ind_train_end - scene_ind_train_begin + 1
num_scenes_val = scene_ind_val_end - scene_ind_val_begin + 1
num_scenes_test = scene_ind_test_end - scene_ind_test_begin + 1

scene_names_train = scene_names_shuffled[scene_ind_train_begin:scene_ind_train_end+1]
scene_names_val = scene_names_shuffled[scene_ind_val_begin:scene_ind_val_end+1]
scene_names_test = scene_names_shuffled[scene_ind_test_begin:scene_ind_test_end+1]

assert num_scenes == num_scenes_train + num_scenes_val + num_scenes_test

print("[HYPERSIM: DATASET_GENERATE_SPLIT] train:")
print(sort(scene_names_train))
print("[HYPERSIM: DATASET_GENERATE_SPLIT] val:")
print(sort(scene_names_val))
print("[HYPERSIM: DATASET_GENERATE_SPLIT] test:")
print(sort(scene_names_test))

df_images.loc[ df_images["included_in_public_release"] & df_images["scene_name"].isin(scene_names_train), "split_partition_name" ] = "train"
df_images.loc[ df_images["included_in_public_release"] & df_images["scene_name"].isin(scene_names_val), "split_partition_name" ] = "val"
df_images.loc[ df_images["included_in_public_release"] & df_images["scene_name"].isin(scene_names_test), "split_partition_name" ] = "test"

df_images.to_csv(metadata_images_split_file, index=False)



print("[HYPERSIM: DATASET_GENERATE_SPLIT] Finished.")
Expand Up @@ -62,7 +62,7 @@

H, H_edges = histogram(scene_type_ids, bins=scene_type_hist_n_bins, range=(scene_type_hist_min_bin_center - 0.5, scene_type_hist_max_bin_center + 0.5))

tick_label = [ scene_type_id_to_name_map[i] for i in sort(scene_type_id_to_name_map.keys()) ]
tick_label = [ scene_type_id_to_name_map[i] for i in sort(list(scene_type_id_to_name_map.keys())) ]
barh(arange(scene_type_hist_min_bin_center, scene_type_hist_max_bin_center+1), H, tick_label=tick_label)
gca().invert_yaxis()
title("Distribution of scene types")
Expand Down
4 changes: 2 additions & 2 deletions code/python/analysis/visualize_scene_labeling_statistics.py
Expand Up @@ -36,9 +36,9 @@
df = pd.read_csv(metadata_labeling_time_csv_file)

if labeling_time_seconds is None:
labeling_time_seconds = df.loc[df["scene_included_in_dataset"] == True]["labeling_time_seconds"].to_numpy()
labeling_time_seconds = df[df["scene_included_in_dataset"]]["labeling_time_seconds"].to_numpy()
else:
labeling_time_seconds = r_[ labeling_time_seconds, df.loc[df["scene_included_in_dataset"] == True]["labeling_time_seconds"].to_numpy() ]
labeling_time_seconds = r_[ labeling_time_seconds, df[df["scene_included_in_dataset"]]["labeling_time_seconds"].to_numpy() ]



Expand Down
8 changes: 4 additions & 4 deletions code/python/plots/plot_stats_color.py
Expand Up @@ -368,7 +368,7 @@
# RGB COLOR

# redefine number of bins and bin edges for visualization
brightness_hist_log_n_bins_ = brightness_hist_log_n_bins/25
brightness_hist_log_n_bins_ = int(brightness_hist_log_n_bins/25)
brightness_hist_log_bin_edges_ = logspace(brightness_hist_log_min, brightness_hist_log_max, brightness_hist_log_n_bins_+1, base=brightness_hist_log_base)

H = rgb_color_brightness_hist_log
Expand All @@ -386,7 +386,7 @@
# DIFFUSE REFLECTANCE

# redefine number of bins and bin edges for visualization
brightness_hist_log_n_bins_ = brightness_hist_log_n_bins/25
brightness_hist_log_n_bins_ = int(brightness_hist_log_n_bins/25)
brightness_hist_log_bin_edges_ = logspace(brightness_hist_log_min, brightness_hist_log_max, brightness_hist_log_n_bins_+1, base=brightness_hist_log_base)

H = diffuse_reflectance_brightness_hist_log
Expand All @@ -404,7 +404,7 @@
# DIFFUSE ILLUMINATION

# redefine number of bins and bin edges for visualization
brightness_hist_log_n_bins_ = brightness_hist_log_n_bins/25
brightness_hist_log_n_bins_ = int(brightness_hist_log_n_bins/25)
brightness_hist_log_bin_edges_ = logspace(brightness_hist_log_min, brightness_hist_log_max, brightness_hist_log_n_bins_+1, base=brightness_hist_log_base)

H = diffuse_illumination_brightness_hist_log
Expand All @@ -422,7 +422,7 @@
# RESIDUAL

# redefine number of bins and bin edges for visualization
brightness_hist_log_n_bins_ = brightness_hist_log_n_bins/25
brightness_hist_log_n_bins_ = int(brightness_hist_log_n_bins/25)
brightness_hist_log_bin_edges_ = logspace(brightness_hist_log_min, brightness_hist_log_max, brightness_hist_log_n_bins_+1, base=brightness_hist_log_base)

H = residual_brightness_hist_log
Expand Down
14 changes: 7 additions & 7 deletions code/python/plots/plot_stats_scenes_objects_images.py
Expand Up @@ -275,7 +275,7 @@

# SCENES PER SCENE TYPE
H, H_edges = histogram(scene_type_ids, bins=scene_type_hist_n_bins, range=(scene_type_hist_min_bin_center - 0.5, scene_type_hist_max_bin_center + 0.5))
tick_label = [ scene_type_id_to_name_map[i] for i in sort(scene_type_id_to_name_map.keys()) ]
tick_label = [ scene_type_id_to_name_map[i] for i in sort(list(scene_type_id_to_name_map.keys())) ]

tick_label_black_list = [ "OUTSIDE VIEWING AREA (BAD INITIALIZATION)", "OUTSIDE VIEWING AREA (BAD TRAJECTORY)" ]
num_tick_labels_include = 10
Expand All @@ -297,7 +297,7 @@

# UNIQUE OBJECTS PER CLASS
H = unique_objects_per_class_hist
tick_label = ["NO LABEL"] + [""] + [ semantic_id_to_name_map[i] for i in sort(semantic_id_to_name_map.keys()) ]
tick_label = ["NO LABEL"] + [""] + [ semantic_id_to_name_map[i] for i in sort(list(semantic_id_to_name_map.keys())) ]

tick_label_black_list = ["NO LABEL", "", "wall", "floor", "ceiling"]
num_tick_labels_include = 10
Expand All @@ -320,7 +320,7 @@
# OBJECT VOLUME (LOG)

# redefine number of bins and bin edges for visualization
object_volume_hist_log_n_bins_ = object_volume_hist_log_n_bins/25
object_volume_hist_log_n_bins_ = int(object_volume_hist_log_n_bins/25)
object_volume_hist_log_bin_edges_ = logspace(object_volume_hist_log_min, object_volume_hist_log_max, object_volume_hist_log_n_bins_+1, base=object_volume_hist_log_base)

H = object_volume_hist_log
Expand All @@ -338,7 +338,7 @@

# PIXELS PER CLASS
H = pixels_per_class_hist
tick_label = [r"$\star$"] + [""] + [ semantic_id_to_name_map[i] for i in sort(semantic_id_to_name_map.keys()) ]
tick_label = [r"$\star$"] + [""] + [ semantic_id_to_name_map[i] for i in sort(list(semantic_id_to_name_map.keys())) ]

tick_label_black_list = [""]
num_tick_labels_include = 10
Expand All @@ -361,7 +361,7 @@
# UNIQUE OBJECTS PER IMAGE

# redefine number of bins and bin edges for visualization
unique_objects_per_image_hist_n_bins_ = unique_objects_per_image_hist_n_bins/5
unique_objects_per_image_hist_n_bins_ = int(unique_objects_per_image_hist_n_bins/5)
unique_objects_per_image_hist_bin_edges_ = linspace(0, unique_objects_per_image_hist_n_bins, unique_objects_per_image_hist_n_bins_+1)

H = unique_objects_per_image_hist
Expand Down Expand Up @@ -390,7 +390,7 @@
# DEPTH (LOG)

# redefine number of bins and bin edges for visualization
depth_hist_log_n_bins_ = depth_hist_log_n_bins/20
depth_hist_log_n_bins_ = int(depth_hist_log_n_bins/20)
depth_hist_log_bin_edges_ = logspace(depth_hist_log_min, depth_hist_log_max, depth_hist_log_n_bins_+1, base=depth_hist_log_base)

H = depth_hist_log
Expand All @@ -415,7 +415,7 @@
log_H[normal_hist_bin_corners_abs_min_invalid_mask] = nan

subplot(248)
imshow(log_H, origin="bottom", extent=[-1,1,-1,1], interpolation="nearest", vmin=vmin, vmax=vmax)
imshow(log_H, origin="lower", extent=[-1,1,-1,1], interpolation="nearest", vmin=vmin, vmax=vmax)
cbar = colorbar(ticks=ticks)
title("Distribution of\nsurface normals")
xlabel(r"$x$" + "\n\n(h)")
Expand Down

0 comments on commit 8d92d18

Please sign in to comment.