In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import shutil

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

import habitat
from habitat.core.utils import try_cv2_import

from habitat.tasks.nav.shortest_path_follower import ShortestPathFollower
from habitat.utils.visualizations import maps
from habitat.utils.visualizations.utils import images_to_video

from tqdm.notebook import tqdm, trange
import seaborn as sns
sns.set(style='dark')

from PIL import Image
from habitat_sim.utils.common import d3_40_colors_rgb

cv2 = try_cv2_import()

actions_number = 5

In [2]:
def draw_top_down_map(info, heading, output_shape):
    output_size = output_shape[1]
    top_down_map = maps.colorize_topdown_map(info["top_down_map"]["map"], 
                                             info["top_down_map"]["fog_of_war_mask"])
    original_map_size = top_down_map.shape[:2]
    map_scale = np.array((1, original_map_size[1] * 1.0 / original_map_size[0]))
    new_map_size = np.round(output_size * map_scale).astype(np.int32)
    # OpenCV expects w, h but map size is in h, w
    top_down_map = cv2.resize(top_down_map, (new_map_size[1], new_map_size[0]))

    map_agent_pos = info["top_down_map"]["agent_map_coord"]
    map_agent_pos = np.round(
        map_agent_pos * new_map_size / original_map_size
    ).astype(np.int32)
    top_down_map = maps.draw_agent(
        top_down_map,
        map_agent_pos,
        heading - np.pi / 2,
        agent_radius_px=top_down_map.shape[0] / 40,
    )
    return cv2.resize(top_down_map, (output_shape[1], output_shape[0]))

In [3]:
env_config = habitat.get_config(config_paths='my_challenge_objectnav2020.local.rgbd.yaml')
# config = habitat.get_config(config_paths='my_challenge_pointnav2020.local.rgbd.yaml')
env_config.defrost()
# config.DATASET.SPLIT = "train"
width = height = 256
env_config.SIMULATOR.RGB_SENSOR.WIDTH = width
env_config.SIMULATOR.RGB_SENSOR.HEIGHT = height
env_config.SIMULATOR.DEPTH_SENSOR.WIDTH = width
env_config.SIMULATOR.DEPTH_SENSOR.HEIGHT = height
env_config.SIMULATOR.SEMANTIC_SENSOR.WIDTH = width
env_config.SIMULATOR.SEMANTIC_SENSOR.HEIGHT = height
env_config.freeze()

In [4]:
from arguments import get_args
config = get_args(get_default=True)

Auto GPU config:
Number of processes: 36
Number of processes on GPU 0: 0
Number of processes per GPU: 12


In [5]:
%load_ext autoreload
%autoreload 2
%aimport env.habitat.objectnav_env
from env.habitat.objectnav_env import ObjectNav_Env
from habitat.datasets.object_nav.object_nav_dataset import ObjectNavDatasetV1
dataset = ObjectNavDatasetV1(env_config.DATASET)


env = ObjectNav_Env(args=config, 
                    rank=0,
                    config_env=env_config,
                    dataset=dataset)
# Forces env to switch scenes
# env._env.episode_iterator = dataset.get_episode_iterator(max_scene_repeat_episodes=1)
follower = ShortestPathFollower(env.habitat_env.sim, env_config.SIMULATOR.FORWARD_STEP_SIZE, False)
follower.mode = "approximate_gradient"

2020-08-05 19:19:33,385 initializing sim Sim-v0
initializing sim Sim-v0
2020-08-05 19:19:55,200 Initializing task ObjectNav-v1
Initializing task ObjectNav-v1


In [6]:
def concat_frames(frames, base_height=200):
    images = []
    max_horizontal = 4
    width = base_height * min(len(frames), max_horizontal)
    height = base_height * (len(frames) // max_horizontal + 1)

    for i in trange(len(frames[0])):
        base = np.zeros((height, width, 3)).astype(np.uint8)
        for j in range(len(frames)):
            img = frames[j][i]
            img = img if len(img.shape) == 3 else img[:,:,np.newaxis]
            img = img if img.shape[2] == 3 else np.repeat(img, 3, 2) * 255
            img = cv2.resize(img, (base_height, base_height))
            x1 = (j % max_horizontal) * base_height
            y1 = j // max_horizontal * base_height
            x2 = x1 + base_height
            y2 = y1 + base_height
            base[y1:y2, x1:x2] = img.astype(np.uint8)
        images.append(base)
    return images

In [7]:
def semantic_to_rgb(semantic_obs):
    semantic_img = Image.new("P", (semantic_obs.shape[1], semantic_obs.shape[0]))
    semantic_img.putpalette(d3_40_colors_rgb.flatten())
    semantic_img.putdata((semantic_obs.flatten() % 40).astype(np.uint8))
    semantic_img = semantic_img.convert("RGB")
    return np.array(semantic_img)


In [8]:
def prepare_semantic_observation(semantic):
    scene = env._env.sim.semantic_annotations()
    instance_id_to_label_id = {int(obj.id.split("_")[-1]): obj.category.index() for obj in scene.objects}
    mapping = np.array([ instance_id_to_label_id[i] for i in range(len(instance_id_to_label_id)) ])

    return np.take(mapping, semantic)

In [9]:
from collections import defaultdict
log_images = defaultdict(lambda: [])

observations, info = env.reset()

scene = env._env.sim.semantic_annotations()
instance_id_to_label_id = {int(obj.id.split("_")[-1]): obj.category.index() for obj in scene.objects}
mapping = np.array([ instance_id_to_label_id[i] for i in range(len(instance_id_to_label_id)) ])
index_to_title_map = {obj.category.index(): obj.category.name() for obj in scene.objects }
title_to_index_map = {obj.category.name(): obj.category.index() for obj in scene.objects }

im = info["rgb"]
done = False

for i in trange(500):
    if not done:
        if i % 25 == 0:
            goal = env._env.sim.sample_navigable_point()
#         goal = env.habitat_env.current_episode.goals[0].position
        best_action = follower.get_next_action(goal)
        if best_action is None:
            best_action = 0

        # Action remapping
        if best_action == 1: # Forward
            best_action = 2
        elif best_action == 3: # Right
            best_action = 1
        elif best_action == 2: # Left
            best_action = 0
        observations, reward, done, info = env.step({'action': best_action})
         
        im = info["rgb"]
        top_down_map = draw_top_down_map(info, 
                                         info["heading"][0], 
                                         im.shape)
        output_im = np.concatenate((im, top_down_map), axis=1)
        output_resized = cv2.resize(np.rollaxis(output_im, 0, 1), (im.shape[0]*2, im.shape[0]))
        
        semantic_obs = prepare_semantic_observation(info['semantic'])
        
        log_images['debug'].append(output_resized)
        log_images['rgb'].append(info['rgb'])
        log_images['depth'].append(info['depth'])
        log_images['semantic'].append(semantic_obs)
        log_images['semantic_rgb'].append(semantic_to_rgb(semantic_obs))
        log_images['top_down_map'].append(top_down_map)
        log_images['explored_map'].append(env.explored_map)
        log_images['semantic_map'].append(env.semantic_map)
        log_images['map'].append(env.map)
        log_images['fp_proj'].append(info['fp_proj'])
        log_images['fp_semantic'].append(info['fp_semantic'])
        log_images['depth_semantic'].append(env.mapper.depth_semantic)
        log_images['binary_semantic'].append(env.mapper.binary_semantic)
        
    else:
        print('Episode is done!')
        break

2020-08-05 19:19:58,345 Computing map for /data/scene_datasets/mp3d/mJXqzFtmKg4/mJXqzFtmKg4.glb
Computing map for /data/scene_datasets/mp3d/mJXqzFtmKg4/mJXqzFtmKg4.glb
2020-08-05 19:20:19,115 Invalid map: /data/scene_datasets/mp3d/mJXqzFtmKg4/mJXqzFtmKg4.glb/1
Invalid map: /data/scene_datasets/mp3d/mJXqzFtmKg4/mJXqzFtmKg4.glb/1
2020-08-05 19:20:20,063 Computing map for /data/scene_datasets/mp3d/mJXqzFtmKg4/mJXqzFtmKg4.glb
Computing map for /data/scene_datasets/mp3d/mJXqzFtmKg4/mJXqzFtmKg4.glb
2020-08-05 19:20:40,743 Invalid map: /data/scene_datasets/mp3d/mJXqzFtmKg4/mJXqzFtmKg4.glb/1
Invalid map: /data/scene_datasets/mp3d/mJXqzFtmKg4/mJXqzFtmKg4.glb/1
2020-08-05 19:20:41,904 Computing map for /data/scene_datasets/mp3d/mJXqzFtmKg4/mJXqzFtmKg4.glb
Computing map for /data/scene_datasets/mp3d/mJXqzFtmKg4/mJXqzFtmKg4.glb
2020-08-05 19:21:00,651 Invalid map: /data/scene_datasets/mp3d/mJXqzFtmKg4/mJXqzFtmKg4.glb/1
Invalid map: /data/scene_datasets/mp3d/mJXqzFtmKg4/mJXqzFtmKg4.glb/1
2020-08-05

HBox(children=(IntProgress(value=0, max=500), HTML(value='')))

KeyboardInterrupt: 

In [10]:
wall_id = title_to_index_map['chair']

In [11]:
wall_map = np.array(log_images['semantic_map'])[:, wall_id]
fp_semantic = np.array(log_images['fp_semantic'])[:, wall_id]


In [12]:
fp_depth = np.array(log_images['depth_semantic'])[..., wall_id]
binary_semantic = np.array(log_images['binary_semantic'])[..., wall_id]

In [13]:
from moviepy.editor import ImageSequenceClip
frames = concat_frames([log_images['rgb'],
                        log_images['explored_map'], 
                        log_images['semantic_rgb'],
                        fp_depth/255,#[::-1],
                        wall_map,
                        np.array(log_images['depth']), 
                        fp_semantic[:, ::-1],
                        np.array(log_images['fp_proj'])[:, ::-1],
                        binary_semantic,])
ImageSequenceClip(frames, fps=20).ipython_display(maxduration=1000)

HBox(children=(IntProgress(value=0, max=494), HTML(value='')))


Moviepy - Building video __temp__.mp4.
Moviepy - Writing video __temp__.mp4



                                                               

Moviepy - Done !
Moviepy - video ready __temp__.mp4


In [None]:
%matplotlib inline

In [None]:
fp_semantic[0][0][0]

In [None]:
plt.imshow(fp_semantic[0])

In [None]:
np.array(log_images['semantic_map']).shape

In [None]:
plt.imshow(wall_map[0])

In [None]:
frames = np.array(log_images['depth'])*255
ImageSequenceClip(list(frames), fps=20).ipython_display(maxduration=1000)

In [None]:
semantic = np.array(log_images['semantic'])
semantic.shape

In [None]:
info['semantic']

In [None]:
index_to_title_map[instance_id_to_label_id[280]]

In [None]:
from collections import Counter
Counter(semantic[0].flatten())

In [None]:
index_to_title_map

In [None]:
depth_semantic = np.array(log_images['depth_semantic'])
depth_semantic.shape

In [None]:
env.mapper.vision_range# // self.resolution

In [None]:
2400/5