diff --git a/dev_requirements.txt b/dev_requirements.txt new file mode 100644 index 0000000..d8a701d --- /dev/null +++ b/dev_requirements.txt @@ -0,0 +1,186 @@ +absl-py==0.12.0 +addict==2.4.0 +# ai2thor==0+dc0f9ecd8672dc2d62651f567ff95c63f3542332 +aiohttp==3.8.1 +aiosignal==1.2.0 +allenact==0.4.0 +allenact-plugins==0.4.0 +antlr4-python3-runtime==4.8 +appdirs==1.4.4 +appnope==0.1.2 +argon2-cffi==20.1.0 +astunparse==1.6.3 +async-generator==1.10 +async-timeout==4.0.2 +asynctest==0.13.0 +attrs==20.3.0 +aws-requests-auth==0.4.3 +backcall==0.2.0 +black==21.4b2 +bleach==3.3.0 +botocore==1.20.67 +cachetools==4.2.2 +certifi==2020.12.5 +cffi==1.14.5 +chardet==4.0.0 +charset-normalizer==2.0.12 +click==7.1.2 +clip @ git+https://github.com/openai/CLIP.git@04f4dc2ca1ed0acc9893bd1a3b526a7e02c4bb10 +cloudpickle==1.6.0 +cmake==3.18.4.post1 +colour==0.1.5 +cycler==0.10.0 +Cython==0.29.23 +dataclasses>0.5 +datasets==1.18.4 +decorator==4.4.2 +defusedxml==0.7.1 +# detectron2 @ git+https://github.com/facebookresearch/detectron2.git@d285dea0a58675217b061f48182dc031d2e3b677 +dill==0.3.4 +entrypoints==0.3 +filelock==3.0.12 +Flask==1.1.2 +flatbuffers==1.12 +frozenlist==1.2.0 +fsspec==2022.1.0 +ftfy==6.0.3 +future==0.18.2 +fvcore==0.1.5.post20210609 +gast==0.3.3 +gin==0.1.6 +gin-config==0.4.0 +google-auth==1.30.0 +google-auth-oauthlib==0.4.4 +google-pasta==0.2.0 +grpcio==1.32.0 +gym==0.17.3 +h5py==2.10.0 +huggingface-hub==0.4.0 +hydra-core==1.1.0 +idna==2.10 +idna-ssl==1.1.0 +imageio==2.9.0 +imageio-ffmpeg==0.4.3 +importlib-metadata==4.0.1 +importlib-resources==5.1.4 +iopath==0.1.8 +ipykernel==5.5.4 +ipython==7.16.1 +ipython-genutils==0.2.0 +ipywidgets==7.6.3 +itsdangerous==1.1.0 +jedi==0.17.0 +Jinja2==2.11.3 +jmespath==0.10.0 +joblib==1.1.0 +jsonschema==3.2.0 +jupyter==1.0.0 +jupyter-client==6.1.12 +jupyter-console==6.4.0 +jupyter-core==4.7.1 +jupyterlab-pygments==0.1.2 +jupyterlab-widgets==1.0.0 +Keras-Preprocessing==1.1.2 +kiwisolver==1.3.1 +llvmlite==0.36.0 +Markdown==3.3.4 +MarkupSafe==1.1.1 +matplotlib==3.3.4 +mistune==0.8.4 +moviepy==1.0.3 +msgpack==1.0.2 +multidict==5.2.0 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclient==0.5.3 +nbconvert==6.0.7 +nbformat==5.1.3 +nest-asyncio==1.5.1 +networkx==2.5.1 +notebook==6.3.0 +numba==0.53.1 +numpy==1.19.5 +numpy-quaternion==2021.4.5.14.42.35 +oauthlib==3.1.0 +omegaconf==2.1.0 +open3d==0.13.0 +opencv-contrib-python==4.5.5.64 +opt-einsum==3.3.0 +packaging==20.9 +pandas==1.1.5 +pandocfilters==1.4.3 +parso==0.8.2 +pathspec==0.8.1 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==8.2.0 +portalocker==2.3.0 +proglog==0.1.9 +progressbar2==3.53.1 +prometheus-client==0.10.1 +prompt-toolkit==3.0.18 +protobuf==3.15.8 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycocotools==2.0.2 +pycparser==2.20 +pydot==1.4.2 +pyglet==1.5.0 +Pygments==2.9.0 +pyparsing==2.4.7 +pyquaternion==0.9.9 +pyrsistent==0.17.3 +python-dateutil==2.8.1 +python-utils==2.5.6 +python-xlib==0.29 +pytz==2021.3 +PyWavelets==1.1.1 +PyYAML==5.4.1 +pyzmq==22.0.3 +qtconsole==5.1.0 +QtPy==1.9.0 +regex==2021.4.4 +requests==2.25.1 +requests-oauthlib==1.3.0 +responses==0.17.0 +rsa==4.7.2 +scikit-image==0.17.2 +scikit-learn==0.24.2 +scipy==1.5.4 +Send2Trash==1.5.0 +setproctitle==1.2.2 +Shapely==1.8.1.post1 +six==1.15.0 +tabulate==0.8.9 +tensorboard==2.5.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboardX==2.2 +tensorflow==2.4.1 +tensorflow-estimator==2.4.0 +termcolor==1.1.0 +terminado==0.9.4 +testpath==0.4.4 +threadpoolctl==3.0.0 +tifffile==2020.9.3 +toml==0.10.2 +torch==1.8.1 +torchaudio==0.8.1 +torchvision==0.9.1 +tornado==6.1 +tqdm==4.63.0 +traitlets==4.3.3 +typed-ast==1.4.3 +typing-extensions==3.7.4.3 +urllib3==1.26.4 +wcwidth==0.2.5 +webencodings==0.5.1 +Werkzeug==1.0.1 +widgetsnbextension==3.5.1 +wrapt==1.12.1 +xxhash==3.0.0 +yacs==0.1.8 +yarl==1.7.2 +zipp==3.4.1 diff --git a/ithor_arm/ithor_arm_environment.py b/ithor_arm/ithor_arm_environment.py index c15cfea..004c8c9 100644 --- a/ithor_arm/ithor_arm_environment.py +++ b/ithor_arm/ithor_arm_environment.py @@ -1,5 +1,6 @@ """A wrapper for engaging with the ManipulaTHOR environment.""" +from ast import For import copy import math import typing @@ -7,11 +8,15 @@ from typing import Tuple, Dict, List, Set, Union, Any, Optional import ai2thor.server +from cv2 import USAGE_DEFAULT import numpy as np from ai2thor.controller import Controller from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment +from ithor_arm.ithor_arm_noise_models import NoiseInMotionHabitatFlavor, NoiseInMotionSimple1DNormal +from ithor_arm.arm_calculation_utils import convert_world_to_agent_coordinate + from ithor_arm.ithor_arm_constants import ( ADITIONAL_ARM_ARGS, ARM_MIN_HEIGHT, @@ -56,6 +61,7 @@ def __init__( verbose: bool = False, env_args=None, ) -> None: + """Initializer. # Parameters @@ -84,6 +90,9 @@ def __init__( simplify_physics : Whether or not to simplify physics when applicable. Currently this only simplies object interactions when opening drawers (when simplified, objects within a drawer do not slide around on their own when the drawer is opened or closed, instead they are effectively glued down). + *_noise_meta_dist_params : [mean, variance] defines the normal distribution over which the actual noise parameters + for a motion noise distribution will be drawn. Distributions for noise in motion will be re-rolled every scene + reset with new bias and variance values drawn from these meta-distributions. """ self._start_player_screen_width = player_screen_width @@ -112,6 +121,21 @@ def __init__( self._always_return_visible_range = False self.simplify_physics = simplify_physics + if 'motion_noise_type' in env_args.keys(): + if env_args['motion_noise_type'] == 'habitat': + self.noise_model = NoiseInMotionHabitatFlavor(**env_args['motion_noise_args']) + elif env_args['motion_noise_type'] == 'simple1d': + self.noise_model = NoiseInMotionSimple1DNormal(**env_args['motion_noise_args']) + else: + print('Unrecognized motion noise model type') + ForkedPdb().set_trace() + else: + self.noise_model = NoiseInMotionHabitatFlavor(effect_scale=0.0) # un-noise model + + self.ahead_nominal = 0.2 + self.rotate_nominal = 45 + + self.start(None) # self.check_controller_version() @@ -124,6 +148,16 @@ def __init__( self.env_args["quality"] = self._quality # self.memory_frames = [] + def check_controller_version(self): + if MANIPULATHOR_COMMIT_ID is not None: + assert ( + MANIPULATHOR_COMMIT_ID in self.controller._build.url + ), "Build number is not right, {} vs {}, use pip3 install -e git+https://github.com/allenai/ai2thor.git@{}#egg=ai2thor".format( + self.controller._build.url, + MANIPULATHOR_COMMIT_ID, + MANIPULATHOR_COMMIT_ID, + ) + # def check_controller_version(self): # if MANIPULATHOR_COMMIT_ID is not None: # assert ( @@ -222,6 +256,10 @@ def reset( self.list_of_actions_so_far = [] + self.noise_model.reset_noise_model() + self.nominal_agent_location = self.get_agent_location() + + def randomize_agent_location( self, seed: int = None, partial_position: Optional[Dict[str, float]] = None ) -> Dict: @@ -299,6 +337,36 @@ def get_absolute_hand_state(self): xyz_dict = self.correct_nan_inf(xyz_dict, "absolute hand") return dict(position=xyz_dict, rotation={"x": 0, "y": 0, "z": 0}) + + def update_nominal_location(self, action_dict): + # location = { + # "x": metadata["agent"]["position"]["x"], + # "y": metadata["agent"]["position"]["y"], + # "z": metadata["agent"]["position"]["z"], + # "rotation": metadata["agent"]["rotation"]["y"], + # "horizon": metadata["agent"]["cameraHorizon"], + # "standing": metadata.get("isStanding", metadata["agent"].get("isStanding")), + # } + + curr_loc = self.nominal_agent_location + new_loc = copy.deepcopy(curr_loc) + + if action_dict['action'] is 'RotateLeft': + new_loc["rotation"] = (new_loc["rotation"] - self.rotate_nominal) % 360 + elif action_dict['action'] is 'RotateRight': + new_loc["rotation"] = (new_loc["rotation"] + self.rotate_nominal) % 360 + elif action_dict['action'] is 'MoveAhead': + new_loc["x"] += self.ahead_nominal * np.sin(new_loc["rotation"] * np.pi / 180) + new_loc["z"] += self.ahead_nominal * np.cos(new_loc["rotation"] * np.pi / 180) + elif action_dict['action'] is 'TeleportFull': + new_loc["x"] = action_dict['x'] + new_loc["y"] = action_dict['y'] + new_loc["z"] = action_dict['z'] + new_loc["rotation"] = action_dict['rotation']['y'] + new_loc["horizon"] = action_dict['horizon'] + + self.nominal_agent_location = new_loc + def get_pickupable_objects(self): event = self.controller.last_event @@ -361,6 +429,7 @@ def step( ) -> ai2thor.server.Event: """Take a step in the ai2thor environment.""" action = typing.cast(str, action_dict["action"]) + original_action_dict = copy.deepcopy(action_dict) skip_render = "renderImage" in action_dict and not action_dict["renderImage"] last_frame: Optional[np.ndarray] = None @@ -396,18 +465,41 @@ def step( copy_aditions = copy.deepcopy(ADITIONAL_ARM_ARGS) + # RH: order matters, nominal action happens last action_dict = {**action_dict, **copy_aditions} if action in [MOVE_AHEAD]: + noise = self.noise_model.get_ahead_drift(self.ahead_nominal) + + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = noise[2] + sr = self.controller.step(action_dict) + + action_dict = dict() action_dict["action"] = "MoveAgent" - action_dict["ahead"] = 0.2 + action_dict["ahead"] = self.ahead_nominal + noise[0] + action_dict["right"] = noise[1] elif action in [ROTATE_RIGHT]: + noise = self.noise_model.get_rotate_drift() + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0] + action_dict["right"] = noise[1] + sr = self.controller.step(action_dict) + + action_dict = dict() action_dict["action"] = "RotateAgent" - action_dict["degrees"] = 45 + action_dict["degrees"] = noise[2] + self.rotate_nominal elif action in [ROTATE_LEFT]: + noise = self.noise_model.get_rotate_drift() + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0] + action_dict["right"] = noise[1] + sr = self.controller.step(action_dict) + + action_dict = dict() action_dict["action"] = "RotateAgent" - action_dict["degrees"] = -45 + action_dict["degrees"] = noise[2] - self.rotate_nominal elif "MoveArm" in action: copy_aditions = copy.deepcopy(ADITIONAL_ARM_ARGS) @@ -441,10 +533,13 @@ def step( k: v for (k, v) in base_position.items() if k in ["x", "y", "z"] } - - sr = self.controller.step(action_dict) self.list_of_actions_so_far.append(action_dict) + + # RH: Nominal location only updates for successful actions. Note that that drift + # action might succeed even if the "main" action fails + if sr.metadata["lastActionSuccess"]: + self.update_nominal_location(original_action_dict) # if action in SET_OF_ALL_AGENT_ACTIONS: # self.update_memory() diff --git a/ithor_arm/ithor_arm_noise_models.py b/ithor_arm/ithor_arm_noise_models.py new file mode 100644 index 0000000..a07f044 --- /dev/null +++ b/ithor_arm/ithor_arm_noise_models.py @@ -0,0 +1,120 @@ +"""Utility classes and functions for noise models used by the platforms.""" +from typing import Tuple, Dict, List, Set, Union, Any, Optional + +from utils.noise_from_habitat import MotionNoiseModel, _TruncatedMultivariateGaussian +import numpy as np + +class NoiseInMotionHabitatFlavor: + # Sample usage in __init__ for a config file: + # self.ENV_ARGS['motion_noise_type'] = 'habitat' + # self.ENV_ARGS['motion_noise_args'] = dict() + # self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] + # self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0,0,0,0,0,0,0] + # self.ENV_ARGS['motion_noise_args']['effect_scale'] = 1 + + def __init__(self, + multiplier_means: List[float] = [1,1,1,1,1,1], + multiplier_sigmas: List[float] = [0,0,0,0,0,0,0], + effect_scale: float = 1, + ) -> None: + + self.multiplier_means = multiplier_means + self.multiplier_sigmas = multiplier_sigmas + self.effect_scale = effect_scale + + self.ahead = None + self.rotate = None + + self.reset_noise_model() + + def generate_linear_submodel(self,m1,m2,m3,s1,s2,s3): + return MotionNoiseModel( + _TruncatedMultivariateGaussian([m1 * 0.074, m2 * 0.036], [s1 * 0.019, s2 * 0.033]), + _TruncatedMultivariateGaussian([m3 * 0.189], [s3 * 0.038])) + + def generate_rotational_submodel(self,m1,m2,m3,s1,s2,s3): + return MotionNoiseModel( + _TruncatedMultivariateGaussian([m1 * 0.002, m2 * 0.003], [s1 * 0.0, s2 * 0.002]), + _TruncatedMultivariateGaussian([m3 * 0.219], [s3 * 0.019])) + + def generate_model_multipliers(self): + return [np.random.normal(self.multiplier_means[i],self.multiplier_sigmas[i]) + for i in range(len(self.multiplier_means))] + + def reset_noise_model(self): + model_multipliers = self.generate_model_multipliers() + + self.ahead = self.generate_linear_submodel(*model_multipliers) + self.rotate = self.generate_rotational_submodel(*model_multipliers) + + def get_ahead_drift(self,*_): + # returns [ahead change, left/right change, rot change] from an ahead command in [m,m,deg] + + if self.effect_scale == 0: #speedup for trivial case + return [0,0,0] + + rotation_drift = self.effect_scale * self.ahead.rotation.sample() + linear_drift = self.effect_scale * self.ahead.linear.sample() + return [linear_drift[0], linear_drift[1], rotation_drift[0] * 180 / np.pi] + + def get_rotate_drift(self): + # returns [ahead change, left/right change, rot change] from an ahead command in [m,m,deg] + + if self.effect_scale == 0: #speedup for trivial case + return [0,0,0] + + rotation_drift = self.effect_scale * self.rotate.rotation.sample() + linear_drift = self.effect_scale * self.rotate.linear.sample() + + return [linear_drift[0], linear_drift[1], rotation_drift[0] * 180 / np.pi] + + +class NoiseInMotionSimple1DNormal: + # Sample usage in __init__ for a config file: + # self.ENV_ARGS['motion_noise_type'] = 'simple1d' + # self.ENV_ARGS['motion_noise_args'] = dict() + # self.ENV_ARGS['motion_noise_args']['ahead_noise_meta_dist_params'] = {'bias_dist': [0,0.04], 'variance_dist': [0,0.10]} + # self.ENV_ARGS['motion_noise_args']['lateral_noise_meta_dist_params'] = {'bias_dist': [0,0.04], 'variance_dist': [0,0.04]} + # self.ENV_ARGS['motion_noise_args']['turning_noise_meta_dist_params'] = {'bias_dist': [0,10], 'variance_dist': [0,10]} + + def __init__(self, + ahead_noise_meta_dist_params: Dict = {'bias_dist': [0,0], 'variance_dist': [0,0]}, + lateral_noise_meta_dist_params: Dict= {'bias_dist': [0,0], 'variance_dist': [0,0]}, + turning_noise_meta_dist_params: Dict = {'bias_dist': [0,0], 'variance_dist': [0,0]}, + effect_scale: float = 1, + ) -> None: + + self.ahead_noise_meta_dist_params = ahead_noise_meta_dist_params + self.lateral_noise_meta_dist_params = lateral_noise_meta_dist_params + self.turning_noise_meta_dist_params = turning_noise_meta_dist_params + self.effect_scale = effect_scale + + self.ahead_noise_params = [0,0] + self.lateral_noise_params = [0,0] + self.turning_noise_params = [0,0] + + self.reset_noise_model() + + def generate_motion_noise_params(self,meta_dist): + bias = np.random.normal(*meta_dist['bias_dist']) + variance = np.abs(np.random.normal(*meta_dist['variance_dist'])) + return [bias,variance] + + def reset_noise_model(self): + self.ahead_noise_params = self.generate_motion_noise_params(self.ahead_noise_meta_dist_params) + self.lateral_noise_params = self.generate_motion_noise_params(self.lateral_noise_meta_dist_params) + self.turning_noise_params = self.generate_motion_noise_params(self.turning_noise_meta_dist_params) + + def get_ahead_drift(self, nominal_ahead): + # returns [ahead change, left/right change, rot change] from an ahead command in [m,m,deg] + linear_drift = self.effect_scale * np.random.normal(*self.ahead_noise_params) + side_drift = self.effect_scale * np.random.normal(*self.lateral_noise_params) + rotation_drift = np.arctan2(side_drift,linear_drift + nominal_ahead) * 180 / np.pi + + return [linear_drift, side_drift, rotation_drift ] + + def get_rotate_drift(self): + # for this model, rotating incurs no positional drift. Returns [0,0,deg] + rotation_drift = self.effect_scale * np.random.normal(*self.turning_noise_params) + + return [0, 0, rotation_drift] diff --git a/ithor_arm/near_deadline_sensors.py b/ithor_arm/near_deadline_sensors.py index 76b60fc..f4e1086 100644 --- a/ithor_arm/near_deadline_sensors.py +++ b/ithor_arm/near_deadline_sensors.py @@ -1,4 +1,5 @@ """Utility classes and functions for sensory inputs used by the models.""" +from ast import For import copy import datetime import math @@ -39,6 +40,8 @@ from utils.noise_from_habitat import ControllerNoiseModel, MotionNoiseModel, _TruncatedMultivariateGaussian from utils.noise_in_motion_util import NoiseInMotion, squeeze_bool_mask, tensor_from_dict +KINECT_FOV_W, KINECT_FOV_H = 59, 90 + class FancyNoisyObjectMaskWLabels(Sensor): def __init__(self, type: str,noise, height, width, uuid: str = "object_mask", distance_thr: float = -1, **kwargs: Any): @@ -366,8 +369,141 @@ def average_so_far(self, camera_xyz, camera_rotation, arm_state): # self.noise = something # return super(PointNavEmulatorSensorwScheduler, self).get_observation(env, task, *args, **kwargs) +class PointNavEmulSensorDeadReckoning(Sensor): + + def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "point_nav_emul", **kwargs: Any): + observation_space = gym.spaces.Box( + low=0, high=1, shape=(1,), dtype=np.float32 + ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) + self.type = type + self.mask_sensor = mask_sensor + self.depth_sensor = depth_sensor + uuid = '{}_{}'.format(uuid, type) + + self.min_xyz = np.zeros((3)) + + self.dummy_answer = torch.zeros(3) + self.dummy_answer[:] = 4 # is this good enough? + self.device = torch.device("cpu") + super().__init__(**prepare_locals_for_super(locals())) + + def get_agent_belief_state(self,env): + #TODO all these values need to be checked + fov=max(KINECT_FOV_W, KINECT_FOV_H)#TODO are you sure? it should be smaller one I think + belief_agent_state = env.nominal_agent_location + real_agent_state = env.get_agent_location() + + belief_camera_horizon = 45 + belief_camera_xyz = np.array([belief_agent_state[k] for k in ['x','y','z']]) + belief_camera_rotation = (belief_agent_state['rotation'] + 90) % 360 + + real_camera_xyz = np.array([real_agent_state[k] for k in ['x','y','z']]) + + self.belief_prev_location.append(belief_camera_xyz) + self.real_prev_location.append(real_camera_xyz) + + return fov, belief_camera_horizon, belief_camera_xyz, belief_camera_rotation + + def get_observation( + self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any + ) -> Any: + #TODO remove + if self.type == 'destination': + return self.dummy_answer + mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) #TODO this is called multiple times? + depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1) + + if task.num_steps_taken() == 0: + self.pointnav_history_aggr = [] + self.real_prev_location = [] + self.belief_prev_location = [] + fov, camera_horizon, camera_xyz, camera_rotation = self.get_agent_belief_state(env) + arm_agent_coord = env.get_current_arm_state() + + if mask.sum() != 0: + + midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) + self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken())) + + # if len(self.belief_prev_location) > 190: + # import matplotlib + # matplotlib.use('TkAgg') + # import matplotlib.pyplot as plt + # fig = plt.figure() + # ax = fig.add_subplot(projection='3d') + # def draw_points(locations, color): + # xs = [x[0] for x in locations] + # ys = [x[1] for x in locations] + # zs = [x[2] for x in locations] + # ax.plot(xs, zs, ys, marker='o' if color=='g' else 'x', color=color) + + # def draw(locations, color): + # xs = [x['camera_xyz'][0] for x in locations] + # ys = [x['camera_xyz'][1] for x in locations] + # zs = [x['camera_xyz'][2] for x in locations] + # ax.plot(xs, zs, ys, marker='o' if color=='g' else 'x', color=color) + + # draw_points(self.real_prev_location, 'g'); draw_points(self.belief_prev_location, 'b'); plt.show() + # ForkedPdb().set_trace() + + + + result = self.history_aggregation(camera_xyz, camera_rotation, arm_agent_coord, task.num_steps_taken()) + return result + + def history_aggregation(self, camera_xyz, camera_rotation, arm_agent_coord, current_step_number): + if len(self.pointnav_history_aggr) == 0: + return self.dummy_answer + else: + weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] + total_weights = sum(weights) + total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] + total_sum = sum(total_sum) + midpoint = total_sum / total_weights + agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) + midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) + midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) + midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']]) + + # arm_world_coord = dict(position=dict(x=arm_world_coord[0], y=arm_world_coord[1], z=arm_world_coord[2]), rotation=dict(x=0,y=0,z=0)) + # arm_state_agent_coord = convert_world_to_agent_coordinate(arm_world_coord, agent_state) + arm_state_agent_coord = torch.Tensor([arm_agent_coord[k] for k in ['x','y','z']]) + # ForkedPdb().set_trace() + + distance_in_agent_coord = midpoint_agent_coord - arm_state_agent_coord + + return distance_in_agent_coord.cpu() + + +def calc_world_coordinates(min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device, depth_frame): + with torch.no_grad(): + camera_xyz = ( + torch.from_numpy(camera_xyz - min_xyz).float().to(device) + ) + + depth_frame = torch.from_numpy(depth_frame).to(device) + depth_frame[depth_frame == -1] = np.NaN + world_space_point_cloud = depth_frame_to_world_space_xyz( + depth_frame=depth_frame, + camera_world_xyz=camera_xyz, + rotation=camera_rotation, + horizon=camera_horizon, + fov=fov, + ) + return world_space_point_cloud + +def get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device): + mask = squeeze_bool_mask(mask) + depth_frame_masked = depth_frame_original.copy() + depth_frame_masked[~mask] = -1 + world_space_point_cloud = calc_world_coordinates(min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device, depth_frame_masked) + valid_points = (world_space_point_cloud == world_space_point_cloud).sum(dim=-1) == 3 + point_in_world = world_space_point_cloud[valid_points] + midpoint_agent_coord = point_in_world.mean(dim=0) + return midpoint_agent_coord + class PredictionObjectMask(Sensor): def __init__(self, type: str,object_query_sensor, rgb_for_detection_sensor, uuid: str = "predict_object_mask", **kwargs: Any): diff --git a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py new file mode 100644 index 0000000..13e0407 --- /dev/null +++ b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py @@ -0,0 +1,133 @@ +import platform +from typing import Dict + +import gym +import torch +from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor +from torch import nn + +from ithor_arm.bring_object_sensors import CategorySampleSensor, NoisyObjectMask, NoGripperRGBSensorThor, CategoryFeatureSampleSensor +from ithor_arm.bring_object_task_samplers import DiverseBringObjectTaskSampler +from ithor_arm.bring_object_tasks import WPickUPExploreBringObjectTask, ExploreWiseRewardTask +from ithor_arm.ithor_arm_constants import ENV_ARGS, TRAIN_OBJECTS, TEST_OBJECTS +from ithor_arm.ithor_arm_sensors import ( + InitialAgentArmToObjectSensor, + InitialObjectToGoalSensor, + PickedUpObjSensor, + DepthSensorThor, RelativeAgentArmToObjectSensor, RelativeObjectToGoalSensor, +) +from ithor_arm.ithor_arm_viz import MaskImageVisualizer +from ithor_arm.near_deadline_sensors import PointNavEmulatorSensor +from manipulathor_baselines.bring_object_baselines.experiments.bring_object_mixin_ddppo import BringObjectMixInPPOConfig +from manipulathor_baselines.bring_object_baselines.experiments.bring_object_mixin_simplegru import BringObjectMixInSimpleGRUConfig +from manipulathor_baselines.bring_object_baselines.experiments.ithor.bring_object_ithor_base import BringObjectiThorBaseConfig +from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator +from manipulathor_baselines.bring_object_baselines.models.query_obj_w_gt_mask_rgb_model import SmallBringObjectWQueryObjGtMaskRGBDModel +from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator + + +class PointNavNewModelAndHandWAgentLocationAndMotionNoiseMoreNoise( + BringObjectiThorBaseConfig, + BringObjectMixInPPOConfig, + BringObjectMixInSimpleGRUConfig, +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + NOISE_LEVEL = 0 + AGENT_LOCATION_NOISE = 0.4 #? + distance_thr = 1.5 # is this a good number? + source_mask_sensor = NoisyObjectMask(height=BringObjectiThorBaseConfig.SCREEN_SIZE, width=BringObjectiThorBaseConfig.SCREEN_SIZE,noise=NOISE_LEVEL, type='source', distance_thr=distance_thr) + destination_mask_sensor = NoisyObjectMask(height=BringObjectiThorBaseConfig.SCREEN_SIZE, width=BringObjectiThorBaseConfig.SCREEN_SIZE,noise=NOISE_LEVEL, type='destination', distance_thr=distance_thr) + SENSORS = [ + RGBSensorThor( + height=BringObjectiThorBaseConfig.SCREEN_SIZE, + width=BringObjectiThorBaseConfig.SCREEN_SIZE, + use_resnet_normalization=True, + uuid="rgb_lowres", + ), + DepthSensorThor( + height=BringObjectiThorBaseConfig.SCREEN_SIZE, + width=BringObjectiThorBaseConfig.SCREEN_SIZE, + use_normalization=True, + uuid="depth_lowres", + ), + PickedUpObjSensor(), + CategorySampleSensor(type='source'), + CategorySampleSensor(type='destination'), + CategoryFeatureSampleSensor(type='source'), + CategoryFeatureSampleSensor(type='destination'), + source_mask_sensor, + destination_mask_sensor, + PointNavEmulatorSensor(type='source', mask_sensor=source_mask_sensor, noise=AGENT_LOCATION_NOISE), + PointNavEmulatorSensor(type='destination', mask_sensor=destination_mask_sensor, noise=AGENT_LOCATION_NOISE), + # TempRealArmpointNav(uuid='point_nav_emul',type='source'), + # TempRealArmpointNav(uuid='point_nav_emul', type='destination'), + ] + + MAX_STEPS = 200 + + TASK_SAMPLER = DiverseBringObjectTaskSampler + # TASK_TYPE = TestPointNavExploreWiseRewardTask + TASK_TYPE = ExploreWiseRewardTask + + NUM_PROCESSES = 20 + + OBJECT_TYPES = TRAIN_OBJECTS + TEST_OBJECTS + + + def train_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseMoreNoise, self).train_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + + for pointnav_emul_sensor in sampler_args['sensors']: + if isinstance(pointnav_emul_sensor, PointNavEmulatorSensor): + pointnav_emul_sensor.device = torch.device(kwargs["devices"][0]) + + return sampler_args + def test_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseMoreNoise, self).test_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + + for pointnav_emul_sensor in sampler_args['sensors']: + if isinstance(pointnav_emul_sensor, PointNavEmulatorSensor): + pointnav_emul_sensor.device = torch.device(kwargs["devices"][0]) + + return sampler_args + + def __init__(self): + super().__init__() + self.REWARD_CONFIG['exploration_reward'] = 0.1 # is this too big? + self.REWARD_CONFIG['object_found'] = 1 # is this too big? + self.ENV_ARGS['visibilityDistance'] = self.distance_thr + + # self.ENV_ARGS['motion_noise_type'] = 'habitat' + # self.ENV_ARGS['motion_noise_args'] = dict() + # self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] + # self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0,0,0,0,0,0,0] + # self.ENV_ARGS['motion_noise_args']['effect_scale'] = 1 + + # self.ENV_ARGS['motion_noise_type'] = 'simple1d' + # self.ENV_ARGS['motion_noise_args'] = dict() + # self.ENV_ARGS['motion_noise_args']['ahead_noise_meta_dist_params'] = {'bias_dist': [0,0.04], 'variance_dist': [0,0.10]} + # self.ENV_ARGS['motion_noise_args']['lateral_noise_meta_dist_params'] = {'bias_dist': [0,0.04], 'variance_dist': [0,0.04]} + # self.ENV_ARGS['motion_noise_args']['turning_noise_meta_dist_params'] = {'bias_dist': [0,10], 'variance_dist': [0,10]} + + + @classmethod + def create_model(cls, **kwargs) -> nn.Module: + return RGBDModelWPointNavEmulator( + action_space=gym.spaces.Discrete( + len(cls.TASK_TYPE.class_action_names()) + ), + observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + hidden_size=512, + visualize=cls.VISUALIZE + ) + + @classmethod + def tag(cls): + return cls.__name__ diff --git a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py new file mode 100644 index 0000000..dfb36f2 --- /dev/null +++ b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py @@ -0,0 +1,135 @@ +import platform +from typing import Dict + +import gym +import torch +from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor +from torch import nn + +from ithor_arm.bring_object_sensors import CategorySampleSensor, NoisyObjectMask, NoGripperRGBSensorThor, CategoryFeatureSampleSensor +from ithor_arm.bring_object_task_samplers import DiverseBringObjectTaskSampler +from ithor_arm.bring_object_tasks import WPickUPExploreBringObjectTask, ExploreWiseRewardTask +from ithor_arm.ithor_arm_constants import MANIPULATHOR_ENV_ARGS, TRAIN_OBJECTS, TEST_OBJECTS +from ithor_arm.ithor_arm_sensors import ( + InitialAgentArmToObjectSensor, + InitialObjectToGoalSensor, + PickedUpObjSensor, + DepthSensorThor, RelativeAgentArmToObjectSensor, RelativeObjectToGoalSensor, +) +from ithor_arm.ithor_arm_viz import MaskImageVisualizer +from ithor_arm.near_deadline_sensors import PointNavEmulatorSensor, PointNavEmulSensorDeadReckoning +from manipulathor_baselines.bring_object_baselines.experiments.bring_object_mixin_ddppo import BringObjectMixInPPOConfig +from manipulathor_baselines.bring_object_baselines.experiments.bring_object_mixin_simplegru import BringObjectMixInSimpleGRUConfig +from manipulathor_baselines.bring_object_baselines.experiments.ithor.bring_object_ithor_base import BringObjectiThorBaseConfig +from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator +from manipulathor_baselines.bring_object_baselines.models.query_obj_w_gt_mask_rgb_model import SmallBringObjectWQueryObjGtMaskRGBDModel +from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator + + +class PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoResetDebugged( + BringObjectiThorBaseConfig, + BringObjectMixInPPOConfig, + BringObjectMixInSimpleGRUConfig, +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + NOISE_LEVEL = 0 + AGENT_LOCATION_NOISE = 0.4 #? + distance_thr = 1.5 # is this a good number? + source_mask_sensor = NoisyObjectMask(height=BringObjectiThorBaseConfig.SCREEN_SIZE, width=BringObjectiThorBaseConfig.SCREEN_SIZE,noise=NOISE_LEVEL, type='source', distance_thr=distance_thr) + destination_mask_sensor = NoisyObjectMask(height=BringObjectiThorBaseConfig.SCREEN_SIZE, width=BringObjectiThorBaseConfig.SCREEN_SIZE,noise=NOISE_LEVEL, type='destination', distance_thr=distance_thr) + depth_sensor = DepthSensorThor( + height=BringObjectiThorBaseConfig.SCREEN_SIZE, + width=BringObjectiThorBaseConfig.SCREEN_SIZE, + use_normalization=True, + uuid="depth_lowres", + ) + + SENSORS = [ + RGBSensorThor( + height=BringObjectiThorBaseConfig.SCREEN_SIZE, + width=BringObjectiThorBaseConfig.SCREEN_SIZE, + use_resnet_normalization=True, + uuid="rgb_lowres", + ), + depth_sensor, + PickedUpObjSensor(), + CategorySampleSensor(type='source'), + CategorySampleSensor(type='destination'), + CategoryFeatureSampleSensor(type='source'), + CategoryFeatureSampleSensor(type='destination'), + source_mask_sensor, + destination_mask_sensor, + PointNavEmulSensorDeadReckoning(type='source', mask_sensor=source_mask_sensor, noise=AGENT_LOCATION_NOISE, depth_sensor=depth_sensor), + PointNavEmulSensorDeadReckoning(type='destination', mask_sensor=destination_mask_sensor, noise=AGENT_LOCATION_NOISE, depth_sensor=depth_sensor), + # TempRealArmpointNav(uuid='point_nav_emul',type='source'), + # TempRealArmpointNav(uuid='point_nav_emul', type='destination'), + ] + + MAX_STEPS = 200 + + TASK_SAMPLER = DiverseBringObjectTaskSampler + # TASK_TYPE = TestPointNavExploreWiseRewardTask + TASK_TYPE = ExploreWiseRewardTask + + NUM_PROCESSES = 20 + + OBJECT_TYPES = TRAIN_OBJECTS + TEST_OBJECTS + + + def train_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoResetDebugged, self).train_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + + for pointnav_emul_sensor in sampler_args['sensors']: + if isinstance(pointnav_emul_sensor, PointNavEmulSensorDeadReckoning): + pointnav_emul_sensor.device = torch.device(kwargs["devices"][0]) + + return sampler_args + def test_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoResetDebugged, self).test_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + + for pointnav_emul_sensor in sampler_args['sensors']: + if isinstance(pointnav_emul_sensor, PointNavEmulSensorDeadReckoning): + pointnav_emul_sensor.device = torch.device(kwargs["devices"][0]) + + return sampler_args + + def __init__(self): + super().__init__() + self.REWARD_CONFIG['exploration_reward'] = 0.1 # is this too big? + self.REWARD_CONFIG['object_found'] = 1 # is this too big? + self.ENV_ARGS['visibilityDistance'] = self.distance_thr + + self.ENV_ARGS['motion_noise_type'] = 'habitat' + self.ENV_ARGS['motion_noise_args'] = dict() + self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] + self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01] + self.ENV_ARGS['motion_noise_args']['effect_scale'] = .5 + + # self.ENV_ARGS['motion_noise_type'] = 'simple1d' + # self.ENV_ARGS['motion_noise_args'] = dict() + # self.ENV_ARGS['motion_noise_args']['ahead_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.02]} + # self.ENV_ARGS['motion_noise_args']['lateral_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.02]} + # self.ENV_ARGS['motion_noise_args']['turning_noise_meta_dist_params'] = {'bias_dist': [0,2], 'variance_dist': [0,2]} + + + @classmethod + def create_model(cls, **kwargs) -> nn.Module: + return RGBDModelWPointNavEmulator( + action_space=gym.spaces.Discrete( + len(cls.TASK_TYPE.class_action_names()) + ), + observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + hidden_size=512, + visualize=cls.VISUALIZE + ) + + @classmethod + def tag(cls): + return cls.__name__ diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py deleted file mode 100644 index 1222f03..0000000 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py +++ /dev/null @@ -1,181 +0,0 @@ -import platform -import random -from typing import Sequence, Union -from typing_extensions import final - -import gym -import numpy as np -from torch import nn -import yaml -import copy - -from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor -from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel, RGBSensorStretchKinectBigFov -from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor - - -from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment - -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig -from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS -from manipulathor_utils.debugger_util import ForkedPdb - -from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic -from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.experiment_utils import Builder -from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer -from ithor_arm.ithor_arm_viz import TestMetricLogger - -from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL -from ithor_arm.ithor_arm_constants import TRAIN_OBJECTS, TEST_OBJECTS -from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic - - - -class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( - ProcTHORObjectNavBaseConfig -): - """An Object Navigation experiment configuration in iThor with RGB - input.""" - - TRAIN_SCENES = [ - "FloorPlan{}_physics".format(str(i)) - for i in range(1, 20 + 1) - ] - VALID_SCENES = [ - "FloorPlan{}_physics".format(str(i)) - for i in range(21, 26) - ] - TEST_SCENES = [ - "FloorPlan{}_physics".format(str(i)) - for i in range(26, 31) - ] - - - ALL_SCENES = TRAIN_SCENES + TEST_SCENES + VALID_SCENES - - # OBJECT_TYPES = tuple(sorted(TRAIN_OBJECTS)) - with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: - OBJECT_TYPES=yaml.safe_load(f) - - # UNSEEN_OBJECT_TYPES = tuple(sorted(TEST_OBJECTS)) - - - NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) - SENSORS = [ - RGBSensorThor( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres", - ), - RGBSensorStretchKinectBigFov( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres_arm", - ), - GoalObjectTypeThorSensor( - object_types=OBJECT_TYPES, - ), - ] - - MAX_STEPS = 500 - if platform.system() == "Darwin": - MAX_STEPS = 100 - - TASK_SAMPLER = AllRoomsObjectNavTaskSampler - TASK_TYPE = StretchObjectNavTask - ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment - POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - - - NUM_PROCESSES = 40 - - def __init__(self): - super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 - - self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - self.ENV_ARGS['visibilityDistance'] = self.distance_thr - self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - # self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - self.ENV_ARGS['allow_flipping'] = True - - - @classmethod - @final - def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: - preprocessors = [] - # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) - - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet", - visualize=cls.VISUALIZE - ) - ) - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres_arm", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet_arm", - visualize=cls.VISUALIZE - ) - ) - return preprocessors - - @classmethod - @final - def create_model(cls, **kwargs) -> nn.Module: - goal_sensor_uuid = next( - (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - None, - ) - resnet_preprocessor_uuids = ["rgb_clip_resnet","rgb_clip_resnet_arm"] - - return ResnetTensorNavNCameraActorCritic( - action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - goal_sensor_uuid=goal_sensor_uuid, - resnet_preprocessor_uuids=resnet_preprocessor_uuids, - hidden_size=512, - goal_dims=32, - add_prev_actions=True, - ) - # return ResnetTensorNavActorCritic( - # action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - # goal_sensor_uuid=goal_sensor_uuid, - # rgb_resnet_preprocessor_uuid="rgb_clip_resnet", - # depth_resnet_preprocessor_uuid="rgb_clip_resnet_arm", # a convenient lie - can't use with a depth sensor too - # hidden_size=512, - # goal_dims=32, - # add_prev_actions=True, - # ) - - @classmethod - def tag(cls): - return cls.__name__ - - diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py deleted file mode 100644 index f097c55..0000000 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py +++ /dev/null @@ -1,168 +0,0 @@ -import platform -import random -from typing import Sequence, Union -from typing_extensions import final - -import gym -import numpy as np -from torch import nn -import yaml -import copy - -from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor -from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel -from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor - - -from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment - -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig -from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS -from manipulathor_utils.debugger_util import ForkedPdb - -from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic -from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.experiment_utils import Builder -from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer -from ithor_arm.ithor_arm_viz import TestMetricLogger -from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic - - - -class ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV( - ProcTHORObjectNavBaseConfig -): - """An Object Navigation experiment configuration in iThor with RGB - input.""" - - with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: - OBJECT_TYPES=yaml.safe_load(f) - - NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) - SENSORS = [ - RGBSensorStretchIntel( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres", - ), - RGBSensorStretchKinect( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres_arm", - ), - GoalObjectTypeThorSensor( - object_types=OBJECT_TYPES, - ), - ] - - MAX_STEPS = 500 - if platform.system() == "Darwin": - MAX_STEPS = 100 - - TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = StretchObjectNavTask - ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment - POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - - - NUM_PROCESSES = 56 - - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(9999)] # 9999 is all of train - if platform.system() == "Darwin": - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)] - - TEST_SCENES = [f'ProcTHOR{i}' for i in range(100)] - random.shuffle(TRAIN_SCENES) - - - def __init__(self): - super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 - - self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - self.ENV_ARGS['visibilityDistance'] = self.distance_thr - self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - self.ENV_ARGS['allow_flipping'] = True - - - @classmethod - @final - def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: - preprocessors = [] - # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) - - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet", - visualize=cls.VISUALIZE - ) - ) - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres_arm", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet_arm", - visualize=cls.VISUALIZE - ) - ) - return preprocessors - - - @classmethod - @final - def create_model(cls, **kwargs) -> nn.Module: - goal_sensor_uuid = next( - (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - None, - ) - resnet_preprocessor_uuids = ["rgb_clip_resnet","rgb_clip_resnet_arm"] - - return ResnetTensorNavNCameraActorCritic( - action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - goal_sensor_uuid=goal_sensor_uuid, - resnet_preprocessor_uuids=resnet_preprocessor_uuids, - hidden_size=512, - goal_dims=32, - add_prev_actions=True, - ) - # return ResnetTensorNavActorCritic( - # action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - # goal_sensor_uuid=goal_sensor_uuid, - # rgb_resnet_preprocessor_uuid="rgb_clip_resnet", - # depth_resnet_preprocessor_uuid="rgb_clip_resnet_arm", # a convenient lie - can't use with a depth sensor too - # hidden_size=512, - # goal_dims=32, - # add_prev_actions=True, - # ) - - - @classmethod - def tag(cls): - return cls.__name__ - - diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py deleted file mode 100644 index 2c042d8..0000000 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py +++ /dev/null @@ -1,194 +0,0 @@ -import platform -import random -from typing import Sequence, Union -from typing_extensions import final - -import gym -import numpy as np -from torch import nn -import yaml -import copy - -from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor - -from utils.procthor_utils.procthor_helper import PROCTHOR_INVALID_SCENES -from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchKinectBigFov -from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor - - -from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment - -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig -from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS -from manipulathor_utils.debugger_util import ForkedPdb - -from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic -from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.experiment_utils import Builder -from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer -from ithor_arm.ithor_arm_viz import TestMetricLogger -from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic - - - -class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( - ProcTHORObjectNavBaseConfig -): - """An Object Navigation experiment configuration in iThor with RGB - input.""" - - with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: - OBJECT_TYPES=yaml.safe_load(f) - - NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) - #TODO use resnet clip normalizations? - SENSORS = [ - RGBSensorThor( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres", - ), - - RGBSensorStretchKinectBigFov( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres_arm", - ), - - GoalObjectTypeThorSensor( - object_types=OBJECT_TYPES, - ), - ] - - MAX_STEPS = 500 - if platform.system() == "Darwin": - MAX_STEPS = 10 - SENSORS += [ - RGBSensorStretchKinectBigFov( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres_arm_only_viz", - ), - RGBSensorThor( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres_only_viz", - ), - ] - - TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = StretchObjectNavTask - ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment - POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - - NUM_PROCESSES = 50 - - - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(9999) if i not in PROCTHOR_INVALID_SCENES] # 9999 is all of train - if platform.system() == "Darwin": - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)] - - TEST_SCENES = [f'ProcTHOR{i}' for i in range(1)] - random.shuffle(TRAIN_SCENES) - - - def __init__(self): - super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 - - self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - self.ENV_ARGS['visibilityDistance'] = self.distance_thr - self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' - #TODO depth is not False. Kiana added this: - self.ENV_ARGS['renderInstanceSegmentation'] = False - self.ENV_ARGS['renderDepthImage'] = False - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - self.ENV_ARGS['allow_flipping'] = False #TODO important change everywhere - - - @classmethod - @final - def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: - preprocessors = [] - # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) - - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet", - visualize=cls.VISUALIZE - ) - ) - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres_arm", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet_arm", - visualize=cls.VISUALIZE - ) - ) - return preprocessors - - - @classmethod - @final - def create_model(cls, **kwargs) -> nn.Module: - goal_sensor_uuid = next( - (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - None, - ) - resnet_preprocessor_uuids = ["rgb_clip_resnet","rgb_clip_resnet_arm"] - - # return ResnetTensorNavActorCritic( - # action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - # goal_sensor_uuid=goal_sensor_uuid, - # rgb_resnet_preprocessor_uuid="rgb_clip_resnet", - # depth_resnet_preprocessor_uuid="rgb_clip_resnet_arm", # a convenient lie - can't use with a depth sensor too - # hidden_size=512, - # goal_dims=32, - # add_prev_actions=True, - # ) - - return ResnetTensorNavNCameraActorCritic( - action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - goal_sensor_uuid=goal_sensor_uuid, - resnet_preprocessor_uuids=resnet_preprocessor_uuids, - hidden_size=512, - goal_dims=32, - add_prev_actions=True, - ) - - @classmethod - def tag(cls): - return cls.__name__ - - diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py deleted file mode 100644 index 4ee0dd3..0000000 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py +++ /dev/null @@ -1,172 +0,0 @@ -import platform -import random -from typing import Sequence, Union -from typing_extensions import final - -import gym -import numpy as np -from torch import nn -import yaml -import copy - -from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor -from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel -from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor - - -from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment - -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig -from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS -from manipulathor_utils.debugger_util import ForkedPdb - -from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic -from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.experiment_utils import Builder -from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer -from ithor_arm.ithor_arm_viz import TestMetricLogger -from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic - -from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL - - - -class RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOV( - ProcTHORObjectNavBaseConfig -): - """An Object Navigation experiment configuration in iThor with RGB - input.""" - - TRAIN_SCENES = ROBOTHOR_TRAIN - TEST_SCENES = ROBOTHOR_VAL - # OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ == 'robothor'])) - # OBJECT_TYPES.sort() - - with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: - OBJECT_TYPES=yaml.safe_load(f) - - - random.shuffle(TRAIN_SCENES) - random.shuffle(TEST_SCENES) - - NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) - SENSORS = [ - RGBSensorStretchIntel( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres", - ), - RGBSensorStretchKinect( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres_arm", - ), - GoalObjectTypeThorSensor( - object_types=OBJECT_TYPES, - ), - ] - - MAX_STEPS = 500 - if platform.system() == "Darwin": - MAX_STEPS = 500 - - TASK_SAMPLER = AllRoomsObjectNavTaskSampler - TASK_TYPE = StretchObjectNavTask - ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment - POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - VISUALIZE = False - - - NUM_PROCESSES = 56 - - - - def __init__(self): - super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 - - self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - self.ENV_ARGS['visibilityDistance'] = self.distance_thr - self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - # self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - self.ENV_ARGS['allow_flipping'] = True - - - @classmethod - @final - def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: - preprocessors = [] - # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) - - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet", - visualize=cls.VISUALIZE - ) - ) - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres_arm", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet_arm", - visualize=cls.VISUALIZE - ) - ) - return preprocessors - - @classmethod - @final - def create_model(cls, **kwargs) -> nn.Module: - goal_sensor_uuid = next( - (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - None, - ) - resnet_preprocessor_uuids = ["rgb_clip_resnet","rgb_clip_resnet_arm"] - - return ResnetTensorNavNCameraActorCritic( - action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - goal_sensor_uuid=goal_sensor_uuid, - resnet_preprocessor_uuids=resnet_preprocessor_uuids, - hidden_size=512, - goal_dims=32, - add_prev_actions=True, - ) - # return ResnetTensorNavActorCritic( - # action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - # goal_sensor_uuid=goal_sensor_uuid, - # rgb_resnet_preprocessor_uuid="rgb_clip_resnet", - # depth_resnet_preprocessor_uuid="rgb_clip_resnet_arm", # a convenient lie - can't use with a depth sensor too - # hidden_size=512, - # goal_dims=32, - # add_prev_actions=True, - # ) - - @classmethod - def tag(cls): - return cls.__name__ - - diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py deleted file mode 100644 index 74a0fef..0000000 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py +++ /dev/null @@ -1,139 +0,0 @@ -from typing import Any, Dict, List, Optional, Sequence -from typing_extensions import Literal, final - -import datasets -import numpy as np -import torch -import torch.optim as optim - -from ai2thor.platform import CloudRendering -from allenact.embodiedai.sensors.vision_sensors import DepthSensor -from manipulathor_baselines.bring_object_baselines.experiments.bring_object_thor_base import BringObjectThorBaseConfig -from allenact.utils.experiment_utils import ( - Builder, - PipelineStage, - TrainingPipeline, - TrainingSettings, -) -from allenact.algorithms.onpolicy_sync.losses import PPO -from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig - -from allenact.base_abstractions.sensor import Sensor - -# from allenact.utils.system import get_logger -# from training import cfg -# from training.tasks.object_nav import ObjectNavTaskSampler -# from training.utils.types import RewardConfig, TaskSamplerArgs -from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import ObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID - - -class ProcTHORObjectNavBaseConfig(BringObjectThorBaseConfig): - """The base config for ProcTHOR ObjectNav experiments.""" - - TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = ObjectNavTask - - TRAIN_DEVICES = ( - tuple(range(torch.cuda.device_count())) - if torch.cuda.is_available() - else (torch.device("cpu"),) - ) - VAL_DEVICES = ( - (torch.cuda.device_count() - 1,) - if torch.cuda.is_available() - else (torch.device("cpu"),) - ) - TEST_DEVICES = ( - tuple(range(torch.cuda.device_count())) - if torch.cuda.is_available() - else (torch.device("cpu"),) - ) - - # TEST_ON_VALIDATION: bool = cfg.evaluation.test_on_validation - - # AGENT_MODE = cfg.agent.agent_mode - # CAMERA_WIDTH = cfg.agent.camera_width - # CAMERA_HEIGHT = cfg.agent.camera_height - # STEP_SIZE = cfg.agent.step_size - # VISIBILITY_DISTANCE = cfg.agent.visibility_distance - # FIELD_OF_VIEW = cfg.agent.field_of_view - # ROTATE_STEP_DEGREES = cfg.agent.rotate_step_degrees - - # HOUSE_DATASET = datasets.load_dataset( - # "allenai/houses", use_auth_token=True, ignore_verifications=True - # ) - - SENSORS: Sequence[Sensor] = [] - - DISTANCE_TYPE = "l2" # "geo" # Can be "geo" or "l2" - - ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[ - int - ] = 20 # default config/main.yaml - RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = ( - -1 - ) # Should be > 0 if `ADVANCE_SCENE_ROLLOUT_PERIOD` is `None` - RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 1 # TODO apparently this won't work with 100 (why?) - - @final - def training_pipeline(self, **kwargs): - log_interval_small = ( - self.NUM_PROCESSES * 32 * 10 - if torch.cuda.is_available - else 1 - ) - log_interval_medium = ( - self.NUM_PROCESSES * 64 * 5 - if torch.cuda.is_available - else 1 - ) - log_interval_large = ( - self.NUM_PROCESSES * 128 * 5 - if torch.cuda.is_available - else 1 - ) - - batch_steps_0 = int(10e6) - batch_steps_1 = int(10e6) - batch_steps_2 = int(1e9) - batch_steps_1 - batch_steps_0 - - return TrainingPipeline( - save_interval=5_000_000, - metric_accumulate_interval=10_000, - optimizer_builder=Builder(optim.Adam, dict(lr=0.0003)), - num_mini_batch=1, - update_repeats=4, - max_grad_norm=0.5, - num_steps=128, - named_losses={"ppo_loss": PPO(**PPOConfig)}, - gamma=0.99, - use_gae=True, - gae_lambda=0.95, - advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, - pipeline_stages=[ - PipelineStage( - loss_names=["ppo_loss"], - max_stage_steps=batch_steps_0, - training_settings=TrainingSettings( - num_steps=32, metric_accumulate_interval=log_interval_small - ), - ), - PipelineStage( - loss_names=["ppo_loss"], - max_stage_steps=batch_steps_1, - training_settings=TrainingSettings( - num_steps=64, metric_accumulate_interval=log_interval_medium - ), - ), - PipelineStage( - loss_names=["ppo_loss"], - max_stage_steps=batch_steps_2, - training_settings=TrainingSettings( - num_steps=128, metric_accumulate_interval=log_interval_large - ), - ), - ], - ) - diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py deleted file mode 100644 index 5d3ea82..0000000 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ /dev/null @@ -1,159 +0,0 @@ -import platform -import random -from typing import Sequence, Union -from typing_extensions import final - -import gym -import numpy as np -from torch import nn -import yaml -import copy - -from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor -from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor - - -from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment - -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig -from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask, ObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS -from manipulathor_utils.debugger_util import ForkedPdb - -from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.experiment_utils import Builder -from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic -from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic -from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer -from ithor_arm.ithor_arm_viz import TestMetricLogger - - - -class ProcTHORObjectNavClipResnet50RGBOnly( - ProcTHORObjectNavBaseConfig -): - """An Object Navigation experiment configuration in iThor with RGB - input.""" - - with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: - OBJECT_TYPES=yaml.safe_load(f) - - NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) - SENSORS = [ - RGBSensorThor( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres", - ), - GoalObjectTypeThorSensor( - object_types=OBJECT_TYPES, - ), - ] - - MAX_STEPS = 500 - if platform.system() == "Darwin": - MAX_STEPS = 50 - - TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = ObjectNavTask - ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment - POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - VISUALIZE = False - - - NUM_PROCESSES = 30 - - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(10)] # 9999 is all of train - # if platform.system() == "Darwin": - # TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)] - - TEST_SCENES = [f'ProcTHOR{i}' for i in range(1)] - random.shuffle(TRAIN_SCENES) - - - def __init__(self): - super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 - - # self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - self.ENV_ARGS['visibilityDistance'] = self.distance_thr - self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - self.ENV_ARGS['allow_flipping'] = True - - - @classmethod - @final - def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: - preprocessors = [] - rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) - - if rgb_sensor is not None: - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid=rgb_sensor.uuid, - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet", - visualize=cls.VISUALIZE - ) - ) - - return preprocessors - - # @classmethod - # @final - # def create_model(cls, **kwargs) -> nn.Module: - # goal_sensor_uuid = next( - # (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - # None, - # ) - # resnet_preprocessor_uuids = ["rgb_clip_resnet"] - - # return ResnetTensorNavNCameraActorCritic( - # action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - # goal_sensor_uuid=goal_sensor_uuid, - # resnet_preprocessor_uuids=resnet_preprocessor_uuids, - # hidden_size=512, - # goal_dims=32, - # add_prev_actions=True, - # ) - @classmethod - def create_model(cls, **kwargs) -> nn.Module: - has_rgb = any(isinstance(s, RGBSensorThor) for s in cls.SENSORS) - has_depth = False #any(isinstance(s, DepthSensor) for s in cls.SENSORS) - - goal_sensor_uuid = next( - (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - None, - ) - - return ResnetTensorNavActorCritic( - action_space=gym.spaces.Discrete(len(ObjectNavTask.class_action_names())), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - goal_sensor_uuid=goal_sensor_uuid, - rgb_resnet_preprocessor_uuid="rgb_clip_resnet" if has_rgb else None, - depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None, - hidden_size=512, - goal_dims=32, - add_prev_actions=True #cfg.model.add_prev_actions_embedding, - ) - - @classmethod - def tag(cls): - return cls.__name__ diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_rgb_only.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_rgb_only.py deleted file mode 100644 index 9a0c637..0000000 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_rgb_only.py +++ /dev/null @@ -1,113 +0,0 @@ -import platform -import random -from typing import Sequence, Union - -import gym -import numpy as np -from torch import nn -import yaml - -# from ithor_arm.bring_object_sensors import RGBSensorThorNoNan -from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor -from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor - - -from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment - -from manipulathor_baselines.procthor_baselines.models.objnav_model_rgb_only import ObjNavOnlyRGBModel -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig -from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import ProcTHORObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID -# from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS -from manipulathor_utils.debugger_util import ForkedPdb - -# from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -# from allenact.base_abstractions.preprocessor import Preprocessor -# from allenact.utils.experiment_utils import Builder -# from allenact_plugins.navigation_plugin.objectnav.models import ( -# ResnetTensorNavActorCritic, -# ) - - - -class ProcTHORObjectNavRGBOnly( - ProcTHORObjectNavBaseConfig -): - """An Object Navigation experiment configuration in iThor with RGB - input.""" - - with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: - OBJECT_TYPES=yaml.safe_load(f) - - NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) - SENSORS = [ - RGBSensorThor( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres", - ), - GoalObjectTypeThorSensor( - object_types=OBJECT_TYPES, - ), - ] - - MAX_STEPS = 500 - if platform.system() == "Darwin": - MAX_STEPS = 10 - - TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = ProcTHORObjectNavTask - ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment - - NUM_PROCESSES = 56 - - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(9999)] # 9999 is all of train - if platform.system() == "Darwin": - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)] - - TEST_SCENES = [f'ProcTHOR{i}' for i in range(1)] - - - # OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list])) - - random.shuffle(TRAIN_SCENES) - - if platform.system() == "Darwin": - MAX_STEPS = 10 - - - def __init__(self): - super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 - - self.ENV_ARGS['visibilityDistance'] = self.distance_thr - self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - - @classmethod - def create_model(cls, **kwargs) -> nn.Module: - return ObjNavOnlyRGBModel( - action_space=gym.spaces.Discrete( - len(cls.TASK_TYPE.class_action_names()) - ), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - hidden_size=512, - visualize=cls.VISUALIZE - ) - - @classmethod - def tag(cls): - return cls.__name__ diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_robothor_rgb_only.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_robothor_rgb_only.py deleted file mode 100644 index 46f4626..0000000 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_robothor_rgb_only.py +++ /dev/null @@ -1,153 +0,0 @@ -import platform -import random -from typing import Sequence, Union -from typing_extensions import final - -import gym -import numpy as np -from torch import nn -import copy - -from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor -from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor - - -from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment - -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig -from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS -from manipulathor_utils.debugger_util import ForkedPdb - -from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.experiment_utils import Builder -from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic - -from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL -from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer -from ithor_arm.ithor_arm_viz import BringObjImageVisualizer, TestMetricLogger - - -class RoboTHORObjectNavClipResnet50RGBOnly( - ProcTHORObjectNavBaseConfig -): - """An Object Navigation experiment configuration in iThor with RGB - input.""" - - - TRAIN_SCENES = ROBOTHOR_TRAIN - TEST_SCENES = ROBOTHOR_VAL - OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ == 'robothor'])) - OBJECT_TYPES.sort() - OBJECT_TYPES = ['AlarmClock', 'Apple', 'BaseballBat', 'BasketBall', 'Bed', 'Bowl', 'Chair', - 'GarbageCan', 'HousePlant', 'Laptop', 'Mug', 'Sofa', 'SprayBottle', 'Television', - 'Toilet', 'Vase'] # habitat challenge object - just get it to eval bc trained with goal sensor. - # ForkedPdb().set_trace() - - random.shuffle(TRAIN_SCENES) - random.shuffle(TEST_SCENES) - - NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) - SENSORS = [ - RGBSensorThor( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres", - ), - GoalObjectTypeThorSensor( - object_types=OBJECT_TYPES, - ), - ] - - MAX_STEPS = 500 - if platform.system() == "Darwin": - MAX_STEPS = 50 - - TASK_SAMPLER = AllRoomsObjectNavTaskSampler - TASK_TYPE = StretchObjectNavTask - ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment - - POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - - NUM_PROCESSES = 56 - - # TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(9999)] # 9999 is all of train - # if platform.system() == "Darwin": - # TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)] - - # TEST_SCENES = [f'ProcTHOR{i}' for i in range(1)] - # random.shuffle(TRAIN_SCENES) - - def __init__(self): - super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 - - self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - self.ENV_ARGS['visibilityDistance'] = self.distance_thr - self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - # self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - self.ENV_ARGS['allow_flipping'] = True - - - @classmethod - @final - def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: - preprocessors = [] - rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) - - if rgb_sensor is not None: - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid=rgb_sensor.uuid, - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet", - visualize=cls.VISUALIZE - ) - ) - - return preprocessors - - - @classmethod - @final - def create_model(cls, **kwargs) -> nn.Module: - has_rgb = any(isinstance(s, RGBSensorThor) for s in cls.SENSORS) - # has_depth = any(isinstance(s, DepthSensor) for s in cls.SENSORS) - has_depth=False - - goal_sensor_uuid = next( - (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - None, - ) - - return ResnetTensorNavActorCritic( - action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - goal_sensor_uuid=goal_sensor_uuid, - rgb_resnet_preprocessor_uuid="rgb_clip_resnet" if has_rgb else None, - depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None, - hidden_size=512, - goal_dims=32, - add_prev_actions=True, - ) - - @classmethod - def tag(cls): - return cls.__name__ - - diff --git a/manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py b/manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py deleted file mode 100644 index d7bcfc2..0000000 --- a/manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py +++ /dev/null @@ -1,183 +0,0 @@ -from typing import Any, Dict, List, Optional, cast -from typing_extensions import Literal - -import clip -import gym -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.misc_utils import prepare_locals_for_super -from manipulathor_utils.debugger_util import ForkedPdb -from utils.hacky_viz_utils import hacky_visualization -from datetime import datetime - - -class ClipResNetEmbedder(nn.Module): - def __init__(self, resnet, pool=True): - super().__init__() - self.model = resnet - self.pool = pool - self.eval() - - def forward(self, x): - m = self.model.visual - with torch.no_grad(): - - def stem(x): - for conv, bn in [(m.conv1, m.bn1), (m.conv2, m.bn2), (m.conv3, m.bn3)]: - x = m.relu(bn(conv(x))) - x = m.avgpool(x) - return x - - x = x.type(m.conv1.weight.dtype) - x = stem(x) - x = m.layer1(x) - x = m.layer2(x) - x = m.layer3(x) - x = m.layer4(x) - if self.pool: - x = F.adaptive_avg_pool2d(x, (1, 1)) - x = torch.flatten(x, 1) - return x - -class ClipResNetPreprocessor(Preprocessor): - """Preprocess RGB or depth image using a ResNet model with CLIP model - weights.""" - - def __init__( - self, - rgb_input_uuid: str, - clip_model_type: Literal["RN50", "RN50x16"], - pool: bool, - device: Optional[torch.device] = None, - device_ids: Optional[List[torch.device]] = None, - visualize=False, # TODO this is such a hack job - **kwargs: Any, - ): - assert clip_model_type in clip.available_models() - - if clip_model_type == "RN50": - output_shape = (2048, 7, 7) - elif clip_model_type == "RN50x16": - output_shape = (3072, 7, 7) - else: - raise NotImplementedError( - f"Currently `clip_model_type` must be one of 'RN50' or 'RN50x16'" - ) - - if pool: - output_shape = output_shape[:1] - - self.clip_model_type = clip_model_type - - self.pool = pool - - self.device = torch.device("cpu") if device is None else device - self.device_ids = device_ids or cast( - List[torch.device], list(range(torch.cuda.device_count())) - ) - self._resnet: Optional[ClipResNetEmbedder] = None - - low = -np.inf - high = np.inf - shape = output_shape - - input_uuids = [rgb_input_uuid] - assert ( - len(input_uuids) == 1 - ), "resnet preprocessor can only consume one observation type" - - observation_space = gym.spaces.Box(low=low, high=high, shape=shape) - self.starting_time = datetime.now().strftime("{}_%m_%d_%Y_%H_%M_%S_%f".format(self.__class__.__name__)) - self.visualize = visualize - - super().__init__(**prepare_locals_for_super(locals())) - - @property - def resnet(self) -> ClipResNetEmbedder: - import clip - - if self._resnet is None: - self._resnet = ClipResNetEmbedder( - clip.load(self.clip_model_type, device=self.device)[0], pool=self.pool - ).to(self.device) - return self._resnet - - def to(self, device: torch.device) -> "ClipResNetPreprocessor": - self._resnet = self.resnet.to(device) - self.device = device - return self - - def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: - x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw - # if self.visualize: - # obs['rgb_lowres'] = x.permute(0,2,3,1).unsqueeze(0) - # hacky_visualization(obs, base_directory_to_right_images=self.starting_time) - # If the input is depth, repeat it across all 3 channels - if x.shape[1] == 1: - x = x.repeat(1, 3, 1, 1) - x = self.resnet(x).float() - return x - - -class ClipTextPreprocessor(Preprocessor): - def __init__( - self, - goal_sensor_uuid: str, - object_types: List[str], - device: Optional[torch.device] = None, - device_ids: Optional[List[torch.device]] = None, - **kwargs: Any, - ): - try: - import clip - - self.clip = clip - except ImportError as _: - raise ImportError( - "Cannot `import clip` when instatiating `CLIPResNetPreprocessor`." - " Please install clip from the openai/CLIP git repository:" - "\n`pip install git+https://github.com/openai/CLIP.git@3b473b0e682c091a9e53623eebc1ca1657385717`" - ) - - output_shape = (1024,) - - self.object_types = object_types - - self.device = torch.device("cpu") if device is None else device - self.device_ids = device_ids or cast( - List[torch.device], list(range(torch.cuda.device_count())) - ) - - low = -np.inf - high = np.inf - shape = output_shape - - observation_space = gym.spaces.Box(low=low, high=high, shape=shape) - - input_uuids = [goal_sensor_uuid] - - super().__init__(**prepare_locals_for_super(locals())) - - @property - def text_encoder(self): - if self._clip_model is None: - self._clip_model = self.clip.load("RN50", device=self.device)[0] - self._clip_model.eval() - return self._clip_model.encode_text - - def to(self, device: torch.device): - self.device = device - self._clip_model = None - return self - - def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: - object_inds = obs[self.input_uuids[0]] - object_types = [self.object_types[ind] for ind in object_inds] - x = self.clip.tokenize([f"navigate to the {obj}" for obj in object_types]).to( - self.device - ) - with torch.no_grad(): - return self.text_encoder(x).float() diff --git a/manipulathor_baselines/procthor_baselines/models/objnav_model_rgb_only.py b/manipulathor_baselines/procthor_baselines/models/objnav_model_rgb_only.py deleted file mode 100644 index 91ef13c..0000000 --- a/manipulathor_baselines/procthor_baselines/models/objnav_model_rgb_only.py +++ /dev/null @@ -1,199 +0,0 @@ -"""Baseline models for use in the object navigation task. - -Object navigation is currently available as a Task in AI2-THOR and -Facebook's Habitat. -""" -import platform -from datetime import datetime -from typing import Tuple, Optional - -import gym -import torch -from allenact.algorithms.onpolicy_sync.policy import ( - ActorCriticModel, - LinearCriticHead, - DistributionType, - Memory, - ObservationType, -) -from allenact.base_abstractions.distributions import CategoricalDistr -from allenact.base_abstractions.misc import ActorCriticOutput -from allenact.embodiedai.models.basic_models import RNNStateEncoder -from allenact.utils.model_utils import make_cnn, compute_cnn_output -from gym.spaces.dict import Dict as SpaceDict -from torch import nn - -from manipulathor_utils.debugger_util import ForkedPdb -from utils.model_utils import LinearActorHeadNoCategory -from utils.hacky_viz_utils import hacky_visualization - - -class ObjNavOnlyRGBModel(ActorCriticModel[CategoricalDistr]): - """Baseline recurrent actor critic model for preddistancenav task. - - # Attributes - action_space : The space of actions available to the agent. Currently only discrete - actions are allowed (so this space will always be of type `gym.spaces.Discrete`). - observation_space : The observation space expected by the agent. This observation space - should include (optionally) 'rgb' images and 'depth' images. - hidden_size : The hidden size of the GRU RNN. - object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal - object type. - """ - - def __init__( - self, - action_space: gym.spaces.Discrete, - observation_space: SpaceDict, - hidden_size=512, - obj_state_embedding_size=512, - trainable_masked_hidden_state: bool = False, - num_rnn_layers=1, - rnn_type="GRU", - teacher_forcing=1, - visualize=False, - ): - """Initializer. - - See class documentation for parameter definitions. - """ - super().__init__(action_space=action_space, observation_space=observation_space) - self.visualize = visualize - - self._hidden_size = hidden_size - self.object_type_embedding_size = obj_state_embedding_size - - # sensor_names = self.observation_space.spaces.keys() - network_args = {'input_channels': 3, 'layer_channels': [32, 64, 32], 'kernel_sizes': [(8, 8), (4, 4), (3, 3)], 'strides': [(4, 4), (2, 2), (1, 1)], 'paddings': [(0, 0), (0, 0), (0, 0)], 'dilations': [(1, 1), (1, 1), (1, 1)], 'output_height': 24, 'output_width': 24, 'output_channels': 512, 'flatten': True, 'output_relu': True} - self.full_visual_encoder = make_cnn(**network_args) - - self.state_encoder = RNNStateEncoder( - 512, - self._hidden_size, - trainable_masked_hidden_state=trainable_masked_hidden_state, - num_layers=num_rnn_layers, - rnn_type=rnn_type, - ) - - self.actor_pickup = LinearActorHeadNoCategory(self._hidden_size, action_space.n) - self.critic_pickup = LinearCriticHead(self._hidden_size) - - self.train() - # self.detection_model.eval() - - self.starting_time = datetime.now().strftime("{}_%m_%d_%Y_%H_%M_%S_%f".format(self.__class__.__name__)) - - - - @property - def recurrent_hidden_state_size(self) -> int: - """The recurrent hidden state size of the model.""" - return self._hidden_size - - @property - def num_recurrent_layers(self) -> int: - """Number of recurrent hidden layers.""" - return self.state_encoder.num_recurrent_layers - - def _recurrent_memory_specification(self): - return dict( - rnn=( - ( - ("layer", self.num_recurrent_layers), - ("sampler", None), - ("hidden", self.recurrent_hidden_state_size), - ), - torch.float32, - ) - ) - - def forward( # type:ignore - self, - observations: ObservationType, - memory: Memory, - prev_actions: torch.Tensor, - masks: torch.FloatTensor, - ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: - """Processes input batched observations to produce new actor and critic - values. Processes input batched observations (along with prior hidden - states, previous actions, and masks denoting which recurrent hidden - states should be masked) and returns an `ActorCriticOutput` object - containing the model's policy (distribution over actions) and - evaluation of the current state (value). - - # Parameters - observations : Batched input observations. - memory : `Memory` containing the hidden states from initial timepoints. - prev_actions : Tensor of previous actions taken. - masks : Masks applied to hidden states. See `RNNStateEncoder`. - # Returns - Tuple of the `ActorCriticOutput` and recurrent hidden state. - """ - - #we really need to switch to resnet now that visual features are actually important - - # pickup_bool = observations["pickedup_object"] - # after_pickup = pickup_bool == 1 - - visual_observation = observations['rgb_lowres'] - - visual_observation_encoding = compute_cnn_output(self.full_visual_encoder, visual_observation) - - - # arm_distance_to_obj_source = observations['arm_point_nav_source'] - # arm_distance_to_obj_destination = observations['arm_point_nav_destination'] - - # arm_distance_to_obj_source_embedding = self.pointnav_embedding(arm_distance_to_obj_source) - # arm_distance_to_obj_destination_embedding = self.pointnav_embedding(arm_distance_to_obj_destination) - # pointnav_embedding = arm_distance_to_obj_source_embedding - # pointnav_embedding[after_pickup] = arm_distance_to_obj_destination_embedding[after_pickup] - - #TODO remove as soon as the bug is resolved - # assert not torch.any(torch.isinf(pointnav_embedding) + torch.isnan(pointnav_embedding)), 'pointnav_embedding is nan' - assert not torch.any(torch.isinf(visual_observation) + torch.isnan(visual_observation)), 'visual_observation is nan' - - # arm_distance_to_obj = arm_distance_to_obj_source - # arm_distance_to_obj[after_pickup] = arm_distance_to_obj_destination[after_pickup] - # pointnav_embedding = self.pointnav_embedding(arm_distance_to_obj) - - visual_observation_encoding = torch.cat([visual_observation_encoding], dim=-1) - - - x_out, rnn_hidden_states = self.state_encoder( - visual_observation_encoding, memory.tensor("rnn"), masks - ) - - - # I think we need two model one for pick up and one for drop off - - actor_out_pickup = self.actor_pickup(x_out) - critic_out_pickup = self.critic_pickup(x_out) - - - actor_out_final = actor_out_pickup - critic_out_final = critic_out_pickup - - actor_out = CategoricalDistr(logits=actor_out_final) - - actor_critic_output = ActorCriticOutput( - distributions=actor_out, values=critic_out_final, extras={} - ) - - memory = memory.set_tensor("rnn", rnn_hidden_states) - - #TODO remove as soon as bug is resolved - actor_is_nan = torch.isinf(actor_out_final) + torch.isnan(actor_out_final) - if torch.any(actor_is_nan): - print('actor is nan', actor_is_nan.sum()) - print('scene number', observations['scene_number']) - - # TODO really bad design - if self.visualize: - hacky_visualization(observations, base_directory_to_right_images=self.starting_time) - - - return ( - actor_critic_output, - memory, - ) - diff --git a/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion.py b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion.py new file mode 100644 index 0000000..6170c31 --- /dev/null +++ b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion.py @@ -0,0 +1,158 @@ +import platform +import random + +import gym +import torch +from torch import nn + + +from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_ithor_base import \ + StretchBringObjectiThorBaseConfig +from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_mixin_ddppo import \ + StretchBringObjectMixInPPOConfig +from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_mixin_simplegru import \ + StretchBringObjectMixInSimpleGRUConfig +from manipulathor_baselines.stretch_bring_object_baselines.models.stretch_pointnav_emul_model import StretchPointNavEmulModel +from manipulathor_baselines.stretch_bring_object_baselines.models.stretch_real_pointnav_model import StretchRealPointNavModel +from manipulathor_utils.debugger_util import ForkedPdb +from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, KITCHEN_TRAIN, LIVING_ROOM_TRAIN, \ + BEDROOM_TRAIN, ROBOTHOR_TRAIN, ROBOTHOR_VAL, BATHROOM_TEST, BATHROOM_TRAIN, BEDROOM_TEST, LIVING_ROOM_TEST, \ + KITCHEN_TEST +from utils.stretch_utils.stretch_bring_object_task_samplers import StretchDiverseBringObjectTaskSampler +from utils.stretch_utils.stretch_bring_object_tasks import StretchExploreWiseRewardTask, \ + StretchExploreWiseRewardTaskOnlyPickUp, StretchObjectNavTask +from utils.stretch_utils.stretch_constants import STRETCH_ENV_ARGS, STRETCH_MANIPULATHOR_COMMIT_ID, INTEL_CAMERA_WIDTH +from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchIntel, DepthSensorStretchIntel, \ + RGBSensorStretchKinect, DepthSensorStretchKinect, AgentBodyPointNavSensor, AgentBodyPointNavEmulSensor, \ + RGBSensorStretchKinectZero, \ + DepthSensorStretchKinectZero, IntelRawDepthSensor, ArmPointNavEmulSensor, KinectRawDepthSensor, \ + KinectNoisyObjectMask, IntelNoisyObjectMask, StretchPickedUpObjSensor +from utils.stretch_utils.stretch_visualizer import StretchBringObjImageVisualizer + + +class PointNavEmulStretchAllRooms( + StretchBringObjectiThorBaseConfig, + StretchBringObjectMixInPPOConfig, + StretchBringObjectMixInSimpleGRUConfig, +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + desired_screen_size = INTEL_CAMERA_WIDTH + NOISE_LEVEL = 0 + distance_thr = 1.5 # is this a good number? + + source_mask_sensor_intel = IntelNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='source', distance_thr=distance_thr, only_close_big_masks=True) + destination_mask_sensor_intel = IntelNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='destination', distance_thr=distance_thr, only_close_big_masks=True) + depth_sensor_intel = IntelRawDepthSensor() + + source_mask_sensor_kinect = KinectNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='source', distance_thr=distance_thr, only_close_big_masks=True) + destination_mask_sensor_kinect = KinectNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='destination', distance_thr=distance_thr, only_close_big_masks=True) + depth_sensor_kinect = KinectRawDepthSensor() + + + SENSORS = [ + RGBSensorStretchIntel( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + uuid="rgb_lowres", + ), + DepthSensorStretchIntel(height=desired_screen_size,width=desired_screen_size,use_normalization=True,uuid="depth_lowres",), + RGBSensorStretchKinect( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + uuid="rgb_lowres_arm", + ), + DepthSensorStretchKinect( + height=desired_screen_size, + width=desired_screen_size, + use_normalization=True, + uuid="depth_lowres_arm", + ), + StretchPickedUpObjSensor(), + AgentBodyPointNavEmulSensor(type='source', mask_sensor=source_mask_sensor_intel, depth_sensor=depth_sensor_intel), + AgentBodyPointNavEmulSensor(type='destination', mask_sensor=destination_mask_sensor_intel, depth_sensor=depth_sensor_intel), + ArmPointNavEmulSensor(type='source', mask_sensor=source_mask_sensor_kinect, depth_sensor=depth_sensor_kinect), + ArmPointNavEmulSensor(type='destination', mask_sensor=destination_mask_sensor_kinect, depth_sensor=depth_sensor_kinect), + source_mask_sensor_intel, + destination_mask_sensor_intel, + source_mask_sensor_kinect, + destination_mask_sensor_kinect, + + ] + + MAX_STEPS = 200 + + TASK_SAMPLER = StretchDiverseBringObjectTaskSampler + TASK_TYPE = StretchExploreWiseRewardTask + + NUM_PROCESSES = 20 + + + TRAIN_SCENES = KITCHEN_TRAIN + LIVING_ROOM_TRAIN + BEDROOM_TRAIN + BATHROOM_TRAIN + TEST_SCENES = KITCHEN_TEST + LIVING_ROOM_TEST + BEDROOM_TEST + BATHROOM_TEST + OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ != 'robothor'])) + + + random.shuffle(TRAIN_SCENES) + + def __init__(self): + super().__init__() + self.REWARD_CONFIG['exploration_reward'] = 0.1 + self.REWARD_CONFIG['object_found'] = 1 + # self.ENV_ARGS = STRETCH_ENV_ARGS + self.ENV_ARGS['visibilityDistance'] = self.distance_thr + self.ENV_ARGS['commit_id'] = STRETCH_MANIPULATHOR_COMMIT_ID + self.ENV_ARGS['renderInstanceSegmentation'] = True + + self.ENV_ARGS['motion_noise_type'] = 'habitat' + self.ENV_ARGS['motion_noise_args'] = dict() + self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] + self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01] + self.ENV_ARGS['motion_noise_args']['effect_scale'] = .25 + + def test_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavEmulStretchAllRooms, self).test_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, AgentBodyPointNavEmulSensor): + sensor_type.device = torch.device(kwargs["devices"][0]) + + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, ArmPointNavEmulSensor): + sensor_type.device = torch.device(kwargs["devices"][0]) + + return sampler_args + + def train_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavEmulStretchAllRooms, self).train_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, AgentBodyPointNavEmulSensor): + sensor_type.device = torch.device(kwargs["devices"][0]) + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, ArmPointNavEmulSensor): + sensor_type.device = torch.device(kwargs["devices"][0]) + return sampler_args + + + @classmethod + def create_model(cls, **kwargs) -> nn.Module: + return StretchPointNavEmulModel( + action_space=gym.spaces.Discrete( + len(cls.TASK_TYPE.class_action_names()) + ), + observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + hidden_size=512, + visualize=cls.VISUALIZE + ) + + @classmethod + def tag(cls): + return cls.__name__ diff --git a/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py new file mode 100644 index 0000000..8f9ecec --- /dev/null +++ b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py @@ -0,0 +1,159 @@ +import platform +import random + +import gym +import torch +from torch import nn + + +from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_ithor_base import \ + StretchBringObjectiThorBaseConfig +from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_mixin_ddppo import \ + StretchBringObjectMixInPPOConfig +from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_mixin_simplegru import \ + StretchBringObjectMixInSimpleGRUConfig +from manipulathor_baselines.stretch_bring_object_baselines.models.stretch_pointnav_emul_model import StretchPointNavEmulModel +from manipulathor_baselines.stretch_bring_object_baselines.models.stretch_real_pointnav_model import StretchRealPointNavModel +from manipulathor_utils.debugger_util import ForkedPdb +from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, KITCHEN_TRAIN, LIVING_ROOM_TRAIN, \ + BEDROOM_TRAIN, ROBOTHOR_TRAIN, ROBOTHOR_VAL, BATHROOM_TEST, BATHROOM_TRAIN, BEDROOM_TEST, LIVING_ROOM_TEST, \ + KITCHEN_TEST +from utils.stretch_utils.stretch_bring_object_task_samplers import StretchDiverseBringObjectTaskSampler +from utils.stretch_utils.stretch_bring_object_tasks import StretchExploreWiseRewardTask, \ + StretchExploreWiseRewardTaskOnlyPickUp, StretchObjectNavTask +from utils.stretch_utils.stretch_constants import STRETCH_ENV_ARGS, STRETCH_MANIPULATHOR_COMMIT_ID, INTEL_CAMERA_WIDTH +from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchIntel, DepthSensorStretchIntel, \ + RGBSensorStretchKinect, DepthSensorStretchKinect, AgentBodyPointNavSensor, AgentBodyPointNavEmulSensor, \ + RGBSensorStretchKinectZero, AgentBodyPointNavEmulSensorDeadReckoning, ArmPointNavEmulSensorDeadReckoning, \ + DepthSensorStretchKinectZero, IntelRawDepthSensor, ArmPointNavEmulSensor, KinectRawDepthSensor, \ + KinectNoisyObjectMask, IntelNoisyObjectMask, StretchPickedUpObjSensor +from utils.stretch_utils.stretch_visualizer import StretchBringObjImageVisualizer + + +class PointNavEmulStretchAllRooms( + StretchBringObjectiThorBaseConfig, + StretchBringObjectMixInPPOConfig, + StretchBringObjectMixInSimpleGRUConfig, +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + desired_screen_size = INTEL_CAMERA_WIDTH + NOISE_LEVEL = 0 + distance_thr = 1.5 # is this a good number? + + source_mask_sensor_intel = IntelNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='source', distance_thr=distance_thr, only_close_big_masks=True) + destination_mask_sensor_intel = IntelNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='destination', distance_thr=distance_thr, only_close_big_masks=True) + depth_sensor_intel = IntelRawDepthSensor() + + source_mask_sensor_kinect = KinectNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='source', distance_thr=distance_thr, only_close_big_masks=True) + destination_mask_sensor_kinect = KinectNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='destination', distance_thr=distance_thr, only_close_big_masks=True) + depth_sensor_kinect = KinectRawDepthSensor() + + + SENSORS = [ + RGBSensorStretchIntel( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + uuid="rgb_lowres", + ), + DepthSensorStretchIntel(height=desired_screen_size,width=desired_screen_size,use_normalization=True,uuid="depth_lowres",), + RGBSensorStretchKinect( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + uuid="rgb_lowres_arm", + ), + DepthSensorStretchKinect( + height=desired_screen_size, + width=desired_screen_size, + use_normalization=True, + uuid="depth_lowres_arm", + ), + StretchPickedUpObjSensor(), + AgentBodyPointNavEmulSensorDeadReckoning(type='source', mask_sensor=source_mask_sensor_intel, depth_sensor=depth_sensor_intel), + AgentBodyPointNavEmulSensorDeadReckoning(type='destination', mask_sensor=destination_mask_sensor_intel, depth_sensor=depth_sensor_intel), + ArmPointNavEmulSensorDeadReckoning(type='source', mask_sensor=source_mask_sensor_kinect, depth_sensor=depth_sensor_kinect), + ArmPointNavEmulSensorDeadReckoning(type='destination', mask_sensor=destination_mask_sensor_kinect, depth_sensor=depth_sensor_kinect), + source_mask_sensor_intel, + destination_mask_sensor_intel, + source_mask_sensor_kinect, + destination_mask_sensor_kinect, + + ] + + MAX_STEPS = 200 + + TASK_SAMPLER = StretchDiverseBringObjectTaskSampler + TASK_TYPE = StretchExploreWiseRewardTask + + NUM_PROCESSES = 20 + + + TRAIN_SCENES = KITCHEN_TRAIN + LIVING_ROOM_TRAIN + BEDROOM_TRAIN + BATHROOM_TRAIN + TEST_SCENES = KITCHEN_TEST + LIVING_ROOM_TEST + BEDROOM_TEST + BATHROOM_TEST + OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ != 'robothor'])) + + + random.shuffle(TRAIN_SCENES) + + def __init__(self): + super().__init__() + self.REWARD_CONFIG['exploration_reward'] = 0.1 + self.REWARD_CONFIG['object_found'] = 1 + self.REWARD_CONFIG['failed_action_penalty'] = -0.06 # double the current - stretch really likes to back up into things + # self.ENV_ARGS = STRETCH_ENV_ARGS + self.ENV_ARGS['visibilityDistance'] = self.distance_thr + self.ENV_ARGS['commit_id'] = STRETCH_MANIPULATHOR_COMMIT_ID + self.ENV_ARGS['renderInstanceSegmentation'] = True + + self.ENV_ARGS['motion_noise_type'] = 'habitat' + self.ENV_ARGS['motion_noise_args'] = dict() + self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] + self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01] + self.ENV_ARGS['motion_noise_args']['effect_scale'] = .25 + + def test_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavEmulStretchAllRooms, self).test_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, AgentBodyPointNavEmulSensorDeadReckoning): + sensor_type.device = torch.device(kwargs["devices"][0]) + + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, ArmPointNavEmulSensorDeadReckoning): + sensor_type.device = torch.device(kwargs["devices"][0]) + + return sampler_args + + def train_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavEmulStretchAllRooms, self).train_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, AgentBodyPointNavEmulSensorDeadReckoning): + sensor_type.device = torch.device(kwargs["devices"][0]) + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, ArmPointNavEmulSensorDeadReckoning): + sensor_type.device = torch.device(kwargs["devices"][0]) + return sampler_args + + + @classmethod + def create_model(cls, **kwargs) -> nn.Module: + return StretchPointNavEmulModel( + action_space=gym.spaces.Discrete( + len(cls.TASK_TYPE.class_action_names()) + ), + observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + hidden_size=512, + visualize=cls.VISUALIZE + ) + + @classmethod + def tag(cls): + return cls.__name__ diff --git a/manipulathor_baselines/stretch_bring_object_baselines/models/stretch_pointnav_emul_model.py b/manipulathor_baselines/stretch_bring_object_baselines/models/stretch_pointnav_emul_model.py index 47290bf..0e63d26 100644 --- a/manipulathor_baselines/stretch_bring_object_baselines/models/stretch_pointnav_emul_model.py +++ b/manipulathor_baselines/stretch_bring_object_baselines/models/stretch_pointnav_emul_model.py @@ -182,6 +182,7 @@ def forward( # type:ignore visual_observation_arm = torch.cat([observations['depth_lowres_arm'], observations['rgb_lowres_arm'], arm_mask], dim=-1).float() visual_observation_arm = check_for_nan_visual_observations(visual_observation_arm) visual_observation_encoding_arm = compute_cnn_output(self.full_visual_encoder_arm, visual_observation_arm) + # ForkedPdb().set_trace() # arm_distance_to_obj_source = observations['point_nav_real_source'].copy() diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py new file mode 100644 index 0000000..396f0a0 --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py @@ -0,0 +1,142 @@ +import platform +from typing import Sequence, Union + +from torch import nn +import torch +import yaml + +from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor +from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel, RGBSensorStretchKinectBigFov +from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor + + +from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.obj_nav_base_config import ObjectNavBaseConfig +from utils.stretch_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask +from manipulathor_utils.debugger_util import ForkedPdb + +from manipulathor_baselines.stretch_object_nav_baselines.models.clip_resnet_ncamera_preprocess_mixin import \ + ClipResNetPreprocessNCameraGRUActorCriticMixin +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor +from allenact.base_abstractions.preprocessor import Preprocessor +from allenact.utils.experiment_utils import Builder +from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer +from ithor_arm.ithor_arm_viz import TestMetricLogger + + + +class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( + ObjectNavBaseConfig +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + + TRAIN_SCENES = [ + "FloorPlan{}_physics".format(str(i)) + for i in range(1, 20 + 1) + ] + VALID_SCENES = [ + "FloorPlan{}_physics".format(str(i)) + for i in range(21, 26) + ] + TEST_SCENES = [ + "FloorPlan{}_physics".format(str(i)) + for i in range(26, 31) + ] + + ALL_SCENES = TRAIN_SCENES + TEST_SCENES + VALID_SCENES + + WHICH_AGENT = 'stretch' + + # OBJECT_TYPES = tuple(sorted(TRAIN_OBJECTS)) + with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: + OBJECT_TYPES=yaml.safe_load(f) + + SENSORS = [ + RGBSensorThor( + height=ObjectNavBaseConfig.SCREEN_SIZE, + width=ObjectNavBaseConfig.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres", + ), + RGBSensorStretchKinectBigFov( + height=ObjectNavBaseConfig.SCREEN_SIZE, + width=ObjectNavBaseConfig.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_arm", + ), + GoalObjectTypeThorSensor( + object_types=OBJECT_TYPES, + ), + ] + + if platform.system() == "Darwin": + MAX_STEPS = 100 + # SENSORS += [ #TODO FIX ORDER HERE + # RGBSensorStretchKinectBigFov( + # height=ObjectNavBaseConfig.SCREEN_SIZE, + # width=ObjectNavBaseConfig.SCREEN_SIZE, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_arm_only_viz", + # ), + # RGBSensorThor( + # height=ObjectNavBaseConfig.SCREEN_SIZE, + # width=ObjectNavBaseConfig.SCREEN_SIZE, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_only_viz", + # ), + # ] + + TASK_SAMPLER = AllRoomsObjectNavTaskSampler + TASK_TYPE = StretchObjectNavTask + ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment + POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] + + TEST_GPU_IDS = list(range(torch.cuda.device_count())) + NUMBER_OF_TEST_PROCESS = 8 + + NUM_PROCESSES = 40 + CLIP_MODEL_TYPE = "RN50" + + + def __init__(self): + super().__init__() + self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice + + self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin( + sensors=self.SENSORS, + clip_model_type=self.CLIP_MODEL_TYPE, + screen_size=self.SCREEN_SIZE, + ) + self.REWARD_CONFIG['shaping_weight'] = 0.0 + self.REWARD_CONFIG['exploration_reward'] = 0.05 + self.REWARD_CONFIG['got_stuck_penalty'] = 0.0 + self.REWARD_CONFIG['failed_action_penalty'] = -0.5 + self.ENV_ARGS['renderInstanceSegmentation'] = True + + def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: + return self.preprocessing_and_model.preprocessors() + + + def create_model(self, **kwargs) -> nn.Module: + return self.preprocessing_and_model.create_model( + num_actions=len(self.TASK_TYPE.class_action_names()), **kwargs, + visualize=self.VISUALIZE + ) + + + @classmethod + def tag(cls): + return cls.TASK_TYPE.__name__ + '-RGB-2Camera-iTHOR' + '-' + cls.WHICH_AGENT + + diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py new file mode 100644 index 0000000..9c6ccfe --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py @@ -0,0 +1,397 @@ +from abc import ABC +import platform +from math import ceil +from typing import Optional, Sequence, Dict, Any, List, Union + +import gym +import numpy as np +import torch +import torch.optim as optim + +from allenact.base_abstractions.experiment_config import ExperimentConfig +from allenact.base_abstractions.task import TaskSampler +import ai2thor.fifo_server + +from allenact.utils.experiment_utils import ( + Builder, + PipelineStage, + TrainingPipeline, + TrainingSettings, + evenly_distribute_count_into_bins +) +from allenact.base_abstractions.experiment_config import MachineParams +from allenact.algorithms.onpolicy_sync.losses import PPO +from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig + +from allenact.base_abstractions.sensor import Sensor +from allenact.base_abstractions.preprocessor import Preprocessor +from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph +from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor + +from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer +from ithor_arm.ithor_arm_viz import TestMetricLogger + +from utils.stretch_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler +from utils.stretch_utils.stretch_object_nav_tasks import ObjectNavTask +from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS, UPDATED_PROCTHOR_COMMIT_ID + +LOCOBOT_ENV_ARGS = dict( + gridSize=0.25, + width=224, + height=224, + visibilityDistance=1.0, + agentMode="locobot", + fieldOfView=63.453048374758716, + rotateStepDegrees=30.0, + snapToGrid=False, + renderInstanceSegmentation=False, + renderDepthImage=False, + commit_id=UPDATED_PROCTHOR_COMMIT_ID +) + +STRETCH_ENV_ARGS = dict( + gridSize=0.25, + width=224, + height=224, + visibilityDistance=3.0, + agentMode='stretch', + fieldOfView=69, + agentControllerType="mid-level", + server_class=ai2thor.fifo_server.FifoServer, + snapToGrid=False, + useMassThreshold=True, + massThreshold=10, + autoSimulation=False, + autoSyncTransforms=True, + renderInstanceSegmentation=False, + renderDepthImage=False, + commit_id=UPDATED_PROCTHOR_COMMIT_ID, #### TODO not sure if this works for stretch + horizon_init=15 +) + + +class ObjectNavBaseConfig(ExperimentConfig, ABC): + """The base config for ProcTHOR ObjectNav experiments.""" + + ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None + SENSORS: Optional[Sequence[Sensor]] = None + + # STEP_SIZE = 0.25 + # ROTATION_DEGREES = 30.0 + # VISIBILITY_DISTANCE = 1.0 + # STOCHASTIC = False + + VISUALIZE = False + if platform.system() == "Darwin": + VISUALIZE = True + + TRAIN_DEVICES = ( + tuple(range(torch.cuda.device_count())) + if torch.cuda.is_available() + else (torch.device("cpu"),) + ) + VAL_DEVICES = ( + (torch.cuda.device_count() - 1,) + if torch.cuda.is_available() + else (torch.device("cpu"),) + ) + TEST_DEVICES = ( + tuple(range(torch.cuda.device_count())) + if torch.cuda.is_available() + else (torch.device("cpu"),) + ) + distributed_nodes = 1 + + NUM_PROCESSES: Optional[int] = None + NUMBER_OF_TEST_PROCESS: Optional[int] = None + NUMBER_OF_VALID_PROCESS: Optional[int] = 1 + TRAIN_GPU_IDS = list(range(torch.cuda.device_count())) + SAMPLER_GPU_IDS = TRAIN_GPU_IDS + VALID_GPU_IDS = [] + TEST_GPU_IDS = [] + + TRAIN_SCENES: str = None + VALID_SCENES: str = None + TEST_SCENES: str = None + VALID_SAMPLES_IN_SCENE = 1 + TEST_SAMPLES_IN_SCENE = 1 + OBJECT_TYPES: Optional[Sequence[str]] = None + + WHICH_AGENT = None + + CAMERA_WIDTH = 224 + CAMERA_HEIGHT = 224 + SCREEN_SIZE = 224 + + POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] + + TASK_SAMPLER = AllRoomsObjectNavTaskSampler + TASK_TYPE = ObjectNavTask + DISTANCE_TYPE = "l2" # "geo" # Can be "geo" or "l2" + MAX_STEPS = 500 + + def __init__(self): + # super().__init__() + self.REWARD_CONFIG = { + "step_penalty": -0.01, + "goal_success_reward": 10.0, + "reached_horizon_reward": 0.0, + "failed_stop_reward": 0.0, + "shaping_weight": 1.0, + "positive_only_reward": False, + } # shaping weight removed for eval in task sampler + if self.WHICH_AGENT == 'locobot': + self.ENV_ARGS = LOCOBOT_ENV_ARGS + elif self.WHICH_AGENT == 'stretch': + self.ENV_ARGS = STRETCH_ENV_ARGS + else: + raise NotImplementedError + + + @classmethod + def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: + return tuple() + + @staticmethod + def _partition_inds(n: int, num_parts: int): + return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype( + np.int32 + ) + + @classmethod + def make_sampler_fn(cls, **kwargs) -> TaskSampler: + from datetime import datetime + + now = datetime.now() + + exp_name_w_time = cls.__name__ + "_" + now.strftime("%m_%d_%Y_%H_%M_%S_%f") + if cls.VISUALIZE: + visualizers = [ + viz(exp_name=exp_name_w_time) for viz in cls.POTENTIAL_VISUALIZERS + ] + kwargs["visualizers"] = visualizers + kwargs["objects"] = cls.OBJECT_TYPES + kwargs["task_type"] = cls.TASK_TYPE + kwargs["exp_name"] = exp_name_w_time + return cls.TASK_SAMPLER(**kwargs) + + def _get_sampler_args_for_scene_split( + self, + scenes: List[str], + process_ind: int, + total_processes: int, + seeds: Optional[List[int]] = None, + deterministic_cudnn: bool = False, + devices: Optional[List[int]] = None + ) -> Dict[str, Any]: + if total_processes > len(scenes): # oversample some scenes -> bias + if total_processes % len(scenes) != 0: + print( + "Warning: oversampling some of the scenes to feed all processes." + " You can avoid this by setting a number of workers divisible by the number of scenes" + ) + scenes = scenes * int(ceil(total_processes / len(scenes))) + scenes = scenes[: total_processes * (len(scenes) // total_processes)] + else: + if len(scenes) % total_processes != 0: + print( + "Warning: oversampling some of the scenes to feed all processes." + " You can avoid this by setting a number of workers divisor of the number of scenes" + ) + inds = self._partition_inds(len(scenes), total_processes) + + out = { + "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], + "env_args": self.ENV_ARGS, + "max_steps": self.MAX_STEPS, + "sensors": self.SENSORS, + "action_space": gym.spaces.Discrete( + len(self.TASK_TYPE.class_action_names()) + ), + "seed": seeds[process_ind] if seeds is not None else None, + "deterministic_cudnn": deterministic_cudnn, + "rewards_config": self.REWARD_CONFIG, + } + + x_display = (("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None) + out['env_args']["x_display"] = x_display + return out + + def train_task_sampler_args( + self, + process_ind: int, + total_processes: int, + devices: Optional[List[int]] = None, + seeds: Optional[List[int]] = None, + deterministic_cudnn: bool = False, + ) -> Dict[str, Any]: + res = self._get_sampler_args_for_scene_split( + self.TRAIN_SCENES, + process_ind, + total_processes, + seeds=seeds, + deterministic_cudnn=deterministic_cudnn, + devices=devices + ) + res["scene_period"] = "manual" + res["sampler_mode"] = "train" + + return res + + def valid_task_sampler_args( + self, + process_ind: int, + total_processes: int, + devices: Optional[List[int]], + seeds: Optional[List[int]] = None, + deterministic_cudnn: bool = False, + ) -> Dict[str, Any]: + res = self._get_sampler_args_for_scene_split( + self.VALID_SCENES, + process_ind, + total_processes, + seeds=seeds, + deterministic_cudnn=deterministic_cudnn, + devices=devices + ) + res["scene_period"] = self.VALID_SAMPLES_IN_SCENE + res["sampler_mode"] = "val" + res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"]) + return res + + def test_task_sampler_args( + self, + process_ind: int, + total_processes: int, + devices: Optional[List[int]], + seeds: Optional[List[int]] = None, + deterministic_cudnn: bool = False, + ) -> Dict[str, Any]: + res = self._get_sampler_args_for_scene_split( + self.TEST_SCENES, + process_ind, + total_processes, + seeds=seeds, + deterministic_cudnn=deterministic_cudnn, + devices=devices + ) + res["scene_period"] = self.TEST_SAMPLES_IN_SCENE + res["sampler_mode"] = "test" + return res + + + def training_pipeline(self, **kwargs): + log_interval_small = ( + self.distributed_nodes*self.NUM_PROCESSES * 32 * 10 + if torch.cuda.is_available + else 1 + ) + log_interval_medium = ( + self.distributed_nodes*self.NUM_PROCESSES * 64 * 5 + if torch.cuda.is_available + else 1 + ) + log_interval_large = ( + self.distributed_nodes*self.NUM_PROCESSES * 128 * 5 + if torch.cuda.is_available + else 1 + ) + + batch_steps_0 = int(10e6) + batch_steps_1 = int(10e6) + batch_steps_2 = int(1e9) - batch_steps_1 - batch_steps_0 + + return TrainingPipeline( + save_interval=2_000_000, + metric_accumulate_interval=10_000, + optimizer_builder=Builder(optim.Adam, dict(lr=0.0003)), + num_mini_batch=1, + update_repeats=4, + max_grad_norm=0.5, + num_steps=128, + named_losses={"ppo_loss": PPO(**PPOConfig)}, + gamma=0.99, + use_gae=True, + gae_lambda=0.95, + advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, + pipeline_stages=[ + PipelineStage( + loss_names=["ppo_loss"], + max_stage_steps=batch_steps_0, + training_settings=TrainingSettings( + num_steps=32, metric_accumulate_interval=log_interval_small + ), + ), + PipelineStage( + loss_names=["ppo_loss"], + max_stage_steps=batch_steps_1, + training_settings=TrainingSettings( + num_steps=64, metric_accumulate_interval=log_interval_medium + ), + ), + PipelineStage( + loss_names=["ppo_loss"], + max_stage_steps=batch_steps_2, + training_settings=TrainingSettings( + num_steps=128, metric_accumulate_interval=log_interval_large + ), + ), + ], + ) + + def machine_params(self, mode="train", **kwargs): + sampler_devices: Sequence[int] = [] + if mode == "train": + workers_per_device = 1 + gpu_ids = ( + [] + if not torch.cuda.is_available() + else self.TRAIN_GPU_IDS * workers_per_device + ) + nprocesses = ( + 1 + if not torch.cuda.is_available() + else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids)) + ) + sampler_devices = self.SAMPLER_GPU_IDS + elif mode == "valid": + nprocesses = self.NUMBER_OF_VALID_PROCESS + gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS + elif mode == "test": + # nprocesses = self.NUMBER_OF_TEST_PROCESS if torch.cuda.is_available() else 1 + gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS + nprocesses = ( + 1 + if not torch.cuda.is_available() + else evenly_distribute_count_into_bins(self.NUMBER_OF_TEST_PROCESS, len(gpu_ids)) + ) + + # print('Test Mode', gpu_ids, 'because cuda is',torch.cuda.is_available(), 'number of workers', nprocesses) + else: + raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") + + sensors = [*self.SENSORS] + if mode != "train": + sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)] + + sensor_preprocessor_graph = ( + SensorPreprocessorGraph( + source_observation_spaces=SensorSuite(sensors).observation_spaces, + preprocessors=self.preprocessors(), + ) + if mode == "train" + or ( + (isinstance(nprocesses, int) and nprocesses > 0) + or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) + ) + else None + ) + + # remove + # print('MACHINE PARAM', 'nprocesses',nprocesses,'devices',gpu_ids,'sampler_devices',sampler_devices,'mode = train', mode == "train",'gpu ids', gpu_ids,) # ignored with > 1 gpu_ids) + return MachineParams(nprocesses=nprocesses, + devices=gpu_ids, + sampler_devices=sampler_devices if mode == "train" else gpu_ids, # ignored with > 1 gpu_ids + sensor_preprocessor_graph=sensor_preprocessor_graph) + + diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py new file mode 100644 index 0000000..ed96bf4 --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py @@ -0,0 +1,62 @@ +import platform +import yaml + +from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel +from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide \ + import ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor + + +class ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV( + ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + + with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: + OBJECT_TYPES=yaml.safe_load(f) + + + SENSORS = [ + RGBSensorStretchIntel( + height=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres", + ), + RGBSensorStretchKinect( + height=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_arm", + ), + GoalObjectTypeThorSensor( + object_types=OBJECT_TYPES, + ), + ] + + if platform.system() == "Darwin": + SENSORS += [ + RGBSensorStretchKinect( + height=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_arm_only_viz", + ), + RGBSensorStretchIntel( + height=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_only_viz", + ), + ] diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py new file mode 100644 index 0000000..f25bf23 --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py @@ -0,0 +1,96 @@ +import platform +import yaml + +from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor +from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchKinectBigFov +from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only \ + import ProcTHORObjectNavClipResnet50RGBOnly +from utils.stretch_utils.stretch_object_nav_tasks import \ + StretchObjectNavTaskSegmentationSuccess, StretchObjectNavTaskSegmentationSuccessActionFail, ExploreWiseObjectNavTask +from manipulathor_utils.debugger_util import ForkedPdb + + +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor + + + +class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( + ProcTHORObjectNavClipResnet50RGBOnly +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + + + with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: + OBJECT_TYPES=yaml.safe_load(f) + + # TASK_TYPE = StretchObjectNavTaskSegmentationSuccess + # TASK_TYPE = StretchObjectNavTaskSegmentationSuccessActionFail + TASK_TYPE = ExploreWiseObjectNavTask + + SENSORS = [ + RGBSensorThor( + height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres", + ), + + RGBSensorStretchKinectBigFov( + height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_arm", + ), + + GoalObjectTypeThorSensor( + object_types=OBJECT_TYPES, + ), + ] + + # NUM_PROCESSES = 40 + # NUM_TRAIN_HOUSES = None + + if platform.system() == "Darwin": + SENSORS += [ + RGBSensorStretchKinectBigFov( + height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_arm_only_viz", + ), + RGBSensorThor( + height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_only_viz", + ), + ] + + def __init__(self): + super().__init__() + self.ENV_ARGS['renderInstanceSegmentation'] = True + assert ( + self.WHICH_AGENT == 'stretch' # this only works for stretch + and self.ENV_ARGS['allow_flipping'] == False # not with 2-camera + ) + self.REWARD_CONFIG['shaping_weight'] = 0.0 + self.REWARD_CONFIG['exploration_reward'] = 0.1 + self.REWARD_CONFIG['got_stuck_penalty'] = 0.0 + self.REWARD_CONFIG['failed_action_penalty'] = -0.5 + + @classmethod + def tag(cls): + return cls.TASK_TYPE.__name__ + '-RGB-2Camera-ProcTHOR' + '-' + cls.WHICH_AGENT + + diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py similarity index 93% rename from manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib.py rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py index 2dfc866..9305f61 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py @@ -1,6 +1,6 @@ import torch -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_2camera_procthor_wide import \ +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide import \ ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py new file mode 100644 index 0000000..c9b2c40 --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py @@ -0,0 +1,25 @@ +# import platform +# import yaml + +# from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel +# from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide \ + import ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_narrow \ + import ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV + # from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor + + +class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVNoisy( + ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): + + def __init__(self): + super().__init__() + self.ENV_ARGS['motion_noise_type'] = 'habitat' + self.ENV_ARGS['motion_noise_args'] = dict() + self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] + self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01] + self.ENV_ARGS['motion_noise_args']['effect_scale'] = .1 + + self.ENV_ARGS['returnToStart'] = False # \ No newline at end of file diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py new file mode 100644 index 0000000..4c18670 --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py @@ -0,0 +1,181 @@ +from ast import For +from typing import Any, Dict, List, Optional, Sequence +from typing_extensions import Literal, final +import platform + +import datasets +import numpy as np + +from allenact.base_abstractions.preprocessor import ( + Preprocessor, + SensorPreprocessorGraph, +) +from allenact.base_abstractions.sensor import ( + Sensor, + Union, +) +from allenact.base_abstractions.task import TaskSampler +from allenact.utils.experiment_utils import ( + Builder, +) + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.obj_nav_base_config import ObjectNavBaseConfig +from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler +from utils.stretch_utils.stretch_object_nav_tasks import ObjectNavTask +from utils.procthor_utils.procthor_helper import PROCTHOR_INVALID_SCENES +from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, UPDATED_PROCTHOR_COMMIT_ID + +from manipulathor_utils.debugger_util import ForkedPdb + + + +class ProcTHORObjectNavBaseConfig(ObjectNavBaseConfig): + """The base config for ProcTHOR ObjectNav experiments.""" + + TASK_SAMPLER = ProcTHORObjectNavTaskSampler + TASK_TYPE = ObjectNavTask + + OBJECT_TYPES: Optional[Sequence[str]] + + SENSORS: Sequence[Sensor] = [] + + DISTANCE_TYPE = "l2" # "geo" # Can be "geo" or "l2" + + ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[ + int + ] = 20 # default config/main.yaml + RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = ( + -1 + ) # Should be > 0 if `ADVANCE_SCENE_ROLLOUT_PERIOD` is `None` + + if platform.system() == "Darwin": + RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = 1 + + RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 1 # change from 100 + + TEST_ON_VALIDATION = False + + HOUSE_DATASET = datasets.load_dataset( + "allenai/houses", use_auth_token=True, ignore_verifications=True + ) # this loads even for inherited classes where is unused - not necessarily a problem + + NUM_TRAIN_HOUSES = None # none means all + + @classmethod + def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: + return tuple() + + @classmethod + def make_sampler_fn( + cls, **kwargs + ) -> TaskSampler: + from datetime import datetime + + now = datetime.now() + + exp_name_w_time = cls.__name__ + "_" + now.strftime("%m_%d_%Y_%H_%M_%S_%f") + if cls.VISUALIZE: + visualizers = [ + viz(exp_name=exp_name_w_time) for viz in cls.POTENTIAL_VISUALIZERS + ] + kwargs["visualizers"] = visualizers + kwargs["exp_name"] = exp_name_w_time + + return cls.TASK_SAMPLER(**kwargs) + + @staticmethod + def _partition_inds(n: int, num_parts: int): + return np.round( + np.linspace(start=0, stop=n, num=num_parts + 1, endpoint=True) + ).astype(np.int32) + + def _get_sampler_args_for_scene_split( + self, + houses: datasets.Dataset, + mode: Literal["train", "eval"], + resample_same_scene_freq: int, + process_ind: int, + max_tasks: Optional[int], + allow_flipping: Optional[bool] = None, + devices: Optional[List[int]] = None, + **kwargs, + ) -> Dict[str, Any]: + + house_inds = list(range(len(houses))) + if mode is "train": + scenes = [str(h) for h in house_inds if h not in PROCTHOR_INVALID_SCENES] + else: + scenes = [str(h) for h in house_inds] + + general_args = super()._get_sampler_args_for_scene_split(scenes=scenes, # scenes=scenes, + process_ind=process_ind, + devices=devices, + **kwargs) + + x_display = (("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None) + + procthor_specific = { + "sampler_mode": mode, + "houses": houses, + "house_inds": list(map(int,general_args['scenes'])), + "process_ind": process_ind, + "target_object_types": self.OBJECT_TYPES, + "max_tasks": max_tasks if max_tasks is not None else len(general_args['scenes']), + "distance_type": self.DISTANCE_TYPE, + "resample_same_scene_freq": resample_same_scene_freq, + } + del general_args['scenes'] + out = {**general_args,**procthor_specific} + + out["task_type"] = self.TASK_TYPE + + out["env_args"]["x_display"] = x_display + out["env_args"]['commit_id'] = UPDATED_PROCTHOR_COMMIT_ID#PROCTHOR_COMMIT_ID + out["env_args"]['scene'] = 'Procedural' + if allow_flipping is not None: + out["env_args"]['allow_flipping'] = allow_flipping + + return out + + + def train_task_sampler_args( + self, + **kwargs + ) -> Dict[str, Any]: + train_houses = self.HOUSE_DATASET["train"] + + if self.NUM_TRAIN_HOUSES: + train_houses = train_houses.select(range(self.NUM_TRAIN_HOUSES)) + + return self._get_sampler_args_for_scene_split( + houses=train_houses, + mode="train", + max_tasks=float("inf"), + resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN, + **kwargs, + ) + + def valid_task_sampler_args(self, **kwargs) -> Dict[str, Any]: + val_houses = self.HOUSE_DATASET["validation"] + return self._get_sampler_args_for_scene_split( + houses=val_houses.select(range(100)), + mode="eval", + max_tasks=10, + resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, + allow_flipping=False, + **kwargs, + ) + + def test_task_sampler_args(self, **kwargs) -> Dict[str, Any]: + if self.TEST_ON_VALIDATION: + return self.valid_task_sampler_args(**kwargs) + test_houses = self.HOUSE_DATASET["test"] + return self._get_sampler_args_for_scene_split( + houses=test_houses.select(range(100)), + mode="eval", + max_tasks=10, + resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, + allow_flipping=False, + **kwargs, + ) + diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py new file mode 100644 index 0000000..9ecd9d2 --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -0,0 +1,111 @@ +import platform +import random +from typing import Sequence, Union + +import gym +import numpy as np +from torch import nn +import yaml +import copy + +from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor +from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor + + +from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig +from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask, ObjectNavTask, StretchNeckedObjectNavTask, StretchNeckedObjectNavTaskUpdateOrder +from utils.stretch_utils.stretch_constants import STRETCH_ENV_ARGS +from manipulathor_utils.debugger_util import ForkedPdb + +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor +from manipulathor_baselines.stretch_object_nav_baselines.models.clip_resnet_ncamera_preprocess_mixin import \ + ClipResNetPreprocessNCameraGRUActorCriticMixin +from allenact.base_abstractions.preprocessor import Preprocessor +from allenact.utils.experiment_utils import Builder + + +class ProcTHORObjectNavClipResnet50RGBOnly( + ProcTHORObjectNavBaseConfig +): + """Single-camera Object Navigation experiment configuration in ProcTHOR, using CLIP preprocessing.""" + + with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: + OBJECT_TYPES=yaml.safe_load(f) + + NOISE_LEVEL = 0 + WHICH_AGENT = 'stretch' # 'locobot' 'default' 'stretch' + + SENSORS = [ + RGBSensorThor( + height=ProcTHORObjectNavBaseConfig.SCREEN_SIZE, + width=ProcTHORObjectNavBaseConfig.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres", + ), + GoalObjectTypeThorSensor( + object_types=OBJECT_TYPES, + ), + ] + + MAX_STEPS = 500 + if platform.system() == "Darwin": + MAX_STEPS = 200 + NUM_TRAIN_HOUSES = 100 + SENSORS += [ + RGBSensorThor( + height=ProcTHORObjectNavBaseConfig.SCREEN_SIZE, + width=ProcTHORObjectNavBaseConfig.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_only_viz", + ), + ] + + NUM_PROCESSES = 56 + + TASK_SAMPLER = ProcTHORObjectNavTaskSampler + TASK_TYPE = StretchObjectNavTask + ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment + + # NUM_TRAIN_HOUSES = 500 + + CLIP_MODEL_TYPE = "RN50" + + + def __init__(self): + super().__init__() + + self.ENV_ARGS['p_randomize_material'] = 0.8 + self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice + self.ENV_ARGS['renderInstanceSegmentation'] = False + self.ENV_ARGS['renderDepthImage'] = False + self.ENV_ARGS['allow_flipping'] = False + + self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin( + sensors=self.SENSORS, + clip_model_type=self.CLIP_MODEL_TYPE, + screen_size=self.SCREEN_SIZE, + ) + + def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: + return self.preprocessing_and_model.preprocessors() + + + def create_model(self, **kwargs) -> nn.Module: + return self.preprocessing_and_model.create_model( + num_actions=len(self.TASK_TYPE.class_action_names()), **kwargs, + visualize=self.VISUALIZE + ) + + def get_agent(self): + return self.ENV_ARGS['agentMode'] + + @classmethod + def tag(cls): + return cls.TASK_TYPE.__name__ + '-RGB-SingleCam-ProcTHOR' + '-' + cls.WHICH_AGENT diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/obj_nav_2camera_robothor_narrow.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/obj_nav_2camera_robothor_narrow.py new file mode 100644 index 0000000..763bddd --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/obj_nav_2camera_robothor_narrow.py @@ -0,0 +1,82 @@ +import platform +import random +import yaml + +from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel +from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \ + ithorObjectNavClipResnet50RGBOnly2CameraWideFOV +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor + +from utils.stretch_utils.stretch_object_nav_tasks import \ + StretchObjectNavTaskSegmentationSuccess, StretchObjectNavTaskSegmentationSuccessActionFail, ExploreWiseObjectNavTask + +from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL + + + +class RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOV( + ithorObjectNavClipResnet50RGBOnly2CameraWideFOV +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + + TRAIN_SCENES = ROBOTHOR_TRAIN + TEST_SCENES = ROBOTHOR_VAL + # OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ == 'robothor'])) + # OBJECT_TYPES.sort() + + with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: + OBJECT_TYPES=yaml.safe_load(f) + + random.shuffle(TRAIN_SCENES) + random.shuffle(TEST_SCENES) + + # SENSORS = [ + # RGBSensorStretchIntel( + # height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres", + # ), + # RGBSensorStretchKinect( + # height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_arm", + # ), + # GoalObjectTypeThorSensor( + # object_types=OBJECT_TYPES, + # ), + # ] + + TASK_TYPE = ExploreWiseObjectNavTask + MAX_STEPS = 200 + + # if platform.system() == "Darwin": + # SENSORS += [ + # RGBSensorStretchKinect( + # height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_arm_only_viz", + # ), + # RGBSensorStretchIntel( + # height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_only_viz", + # ), + # ] + + + diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py new file mode 100644 index 0000000..642adae --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py @@ -0,0 +1,78 @@ +from utils.stretch_utils.stretch_constants import INTEL_CAMERA_WIDTH +from manipulathor_utils.debugger_util import ForkedPdb + +from utils.stretch_utils.real_stretch_sensors import RealRGBSensorStretchIntel, RealRGBSensorStretchKinect +from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor + +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor + +from utils.stretch_utils.real_stretch_environment import StretchRealEnvironment +from utils.stretch_utils.all_rooms_object_nav_task_sampler import RealStretchAllRoomsObjectNavTaskSampler +from utils.stretch_utils.stretch_object_nav_tasks import RealStretchObjectNavTask + + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \ + ithorObjectNavClipResnet50RGBOnly2CameraWideFOV + + +class RealStretchObjectNav( + ithorObjectNavClipResnet50RGBOnly2CameraWideFOV +): + desired_screen_size = INTEL_CAMERA_WIDTH + SENSORS = [ + RealRGBSensorStretchIntel( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres", + ), + RealRGBSensorStretchKinect( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_arm", + ), + GoalObjectTypeThorSensor( + object_types=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.OBJECT_TYPES, # doesn't matter for now, it's apples all the way dow + ), + ] + + # this should only ever be run on darwin anyway - sensors for visualization + # SENSORS += [ + # RealRGBSensorStretchIntel( + # height=desired_screen_size, + # width=desired_screen_size, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_only_viz", + # ), + # RealRGBSensorStretchKinect( + # height=desired_screen_size, + # width=desired_screen_size, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_arm_only_viz", + # ), + # ] + + MAX_STEPS = 60 + + TASK_SAMPLER = RealStretchAllRoomsObjectNavTaskSampler + TASK_TYPE = RealStretchObjectNavTask # + ENVIRONMENT_TYPE = StretchRealEnvironment # account for the super init + VISUALIZE = True + + NUM_PROCESSES = 1 + NUMBER_OF_TEST_PROCESS = 0 + NUMBER_OF_VALID_PROCESS = 0 + VAL_DEVICES = [] + TEST_DEVICES = [] + + TRAIN_SCENES = ['RealRobothor'] + TEST_SCENES = ['RealRobothor'] diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_ithorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_ithorstyle.py new file mode 100644 index 0000000..3066a0c --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_ithorstyle.py @@ -0,0 +1,74 @@ +import platform +import random +import yaml + +from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor +from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor + +from utils.stretch_utils.stretch_object_nav_tasks import StretchNeckedObjectNavTask + + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \ + ithorObjectNavClipResnet50RGBOnly2CameraWideFOV +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor + +from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL + + + +class RoboTHORObjectNavTestiTHORstyleSingleCam( + ithorObjectNavClipResnet50RGBOnly2CameraWideFOV +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + + TRAIN_SCENES = ROBOTHOR_TRAIN + TEST_SCENES = ROBOTHOR_VAL + # OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ == 'robothor'])) + # OBJECT_TYPES.sort() + + with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: + OBJECT_TYPES=yaml.safe_load(f) + + random.shuffle(TRAIN_SCENES) + random.shuffle(TEST_SCENES) + + SENSORS = [ + RGBSensorThor( + height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres", + ), + GoalObjectTypeThorSensor( + object_types=OBJECT_TYPES, + ), + ] + + if platform.system() == "Darwin": + SENSORS += [ + RGBSensorThor( + height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_only_viz", + ), + ] + + TASK_TYPE = StretchNeckedObjectNavTask + MAX_STEPS = 200 + + def __init__(self): + super().__init__() + self.ENV_ARGS['agentMode']='default' + # maybe - include updated procthor commit ID? check if it makes a difference + + + + + + diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py new file mode 100644 index 0000000..964e652 --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -0,0 +1,84 @@ +import datasets +import torch + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_narrow \ + import ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide import \ + ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only import \ + ProcTHORObjectNavClipResnet50RGBOnly +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide_stochastic import \ + ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVNoisy + +from utils.procthor_utils.procthor_object_nav_task_samplers import RoboThorObjectNavTestTaskSampler +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask, ObjectNavTask, StretchNeckedObjectNavTask, StretchNeckedObjectNavTaskUpdateOrder + +from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment + + +from manipulathor_utils.debugger_util import ForkedPdb + + +# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): +# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): +# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV): +class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVNoisy): + + EVAL_TASKS = datasets.load_dataset( + f"allenai/robothor-objectnav-eval", use_auth_token=True + ) + + TEST_TASK_SAMPLER = RoboThorObjectNavTestTaskSampler + TEST_ON_VALIDATION = True + # TEST_GPU_IDS = list(range(torch.cuda.device_count())) # uncomment for vision server testing + + @classmethod + def tag(cls): + return super().tag() + "-RoboTHOR-Test" + + @classmethod + def make_sampler_fn(cls, **kwargs): + from datetime import datetime + + now = datetime.now() + + exp_name_w_time = cls.__name__ + "_" + now.strftime("%m_%d_%Y_%H_%M_%S_%f") + if cls.VISUALIZE: + visualizers = [ + viz(exp_name=exp_name_w_time) for viz in cls.POTENTIAL_VISUALIZERS + ] + kwargs["visualizers"] = visualizers + kwargs["exp_name"] = exp_name_w_time + + if kwargs["sampler_mode"] == "train": + return cls.TASK_SAMPLER(**kwargs) + else: + return cls.TEST_TASK_SAMPLER(**kwargs) + + def valid_task_sampler_args(self, **kwargs): + out = self._get_sampler_args_for_scene_split( + houses=self.EVAL_TASKS["validation"].shuffle(), + mode="eval", + max_tasks=10, + allow_flipping=False, + resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored + **kwargs, + ) + return out + + def test_task_sampler_args(self, **kwargs): + if self.TEST_ON_VALIDATION: + houses = self.EVAL_TASKS["validation"] + else: + houses = self.EVAL_TASKS["test"].shuffle() + # return self.valid_task_sampler_args(**kwargs) + + out = self._get_sampler_args_for_scene_split( + houses=houses, + mode="eval", + max_tasks=None, + allow_flipping=False, + resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored + **kwargs, + ) + return out \ No newline at end of file diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py new file mode 100644 index 0000000..06910a9 --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py @@ -0,0 +1,143 @@ +from typing import Any, Dict, List, Optional, Sequence +from typing_extensions import Literal +import torch + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.robothor.test_robothor_procthorstyle import \ + ObjectNavRoboTHORTestProcTHORstyle +from allenact.base_abstractions.experiment_config import MachineParams +from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph +from allenact.utils.experiment_utils import evenly_distribute_count_into_bins +from allenact.base_abstractions.sensor import ( + ExpertActionSensor, + SensorSuite, +) + + +class ObjectNavRoboTHORTestProcTHORstyleDistrib( + ObjectNavRoboTHORTestProcTHORstyle +): + NUM_PROCESSES = 56 # one them crashed for space? + NUM_TRAIN_PROCESSES = 64 + NUM_VAL_PROCESSES = 2 + NUM_TEST_PROCESSES = 60 + + TRAIN_DEVICES = ( + tuple(range(torch.cuda.device_count())) + if torch.cuda.is_available() + else (torch.device("cpu"),) + ) + VAL_DEVICES = ( + (torch.cuda.device_count() - 1,) + if torch.cuda.is_available() + else (torch.device("cpu"),) + ) + TEST_DEVICES = ( + tuple(range(torch.cuda.device_count())) + if torch.cuda.is_available() + else (torch.device("cpu"),) + ) + def __init__( + self, + distributed_nodes: int = 1, + ): + super().__init__() + self.distributed_nodes = distributed_nodes + # self.ENV_ARGS['continuousMode']=True + # self.ENV_ARGS["returnToStart"]= False # do either of these actually do something + + # self.train_gpu_ids = tuple(range(torch.cuda.device_count())) # should I do this for everyone?, should i add val + + + # def machine_params(self, mode="train", **kwargs): + # params = super().machine_params(mode, **kwargs) + + # if mode == "train": + # params.devices = params.devices * self.distributed_nodes + # params.nprocesses = params.nprocesses * self.distributed_nodes + # params.sampler_devices = params.sampler_devices * self.distributed_nodes + + # if "machine_id" in kwargs: + # machine_id = kwargs["machine_id"] + # assert ( + # 0 <= machine_id < self.distributed_nodes + # ), f"machine_id {machine_id} out of range [0, {self.distributed_nodes - 1}]" + + # local_worker_ids = list( + # range( + # len(self.train_gpu_ids) * machine_id, + # len(self.train_gpu_ids) * (machine_id + 1), + # ) + # ) + + # params.set_local_worker_ids(local_worker_ids) + + # # Confirm we're setting up train params nicely: + # print( + # f"devices {params.devices}" + # f"\nnprocesses {params.nprocesses}" + # f"\nsampler_devices {params.sampler_devices}" + # f"\nlocal_worker_ids {params.local_worker_ids}" + # ) + # elif mode == "valid": + # # Use all GPUs at their maximum capacity for training + # # (you may run validation in a separate machine) + # params.nprocesses = (0,) + + # return params + + def machine_params(self, mode: Literal["train", "valid", "test"], **kwargs): + devices: Sequence[torch.device] + nprocesses: int + if mode == "train": + devices = self.TRAIN_DEVICES * self.distributed_nodes + nprocesses = self.NUM_TRAIN_PROCESSES * self.distributed_nodes + elif mode == "valid": + devices = self.VAL_DEVICES + nprocesses = self.NUM_VAL_PROCESSES + elif mode == "test": + devices = self.TEST_DEVICES + nprocesses = self.NUM_TEST_PROCESSES + + nprocesses = evenly_distribute_count_into_bins( + count=nprocesses, nbins=len(devices) + ) + + sensors = [*self.SENSORS] + if mode != "train": + sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)] + + sensor_preprocessor_graph = ( + SensorPreprocessorGraph( + source_observation_spaces=SensorSuite(sensors).observation_spaces, + preprocessors=self.preprocessors(), + ) + if mode == "train" + or ( + (isinstance(nprocesses, int) and nprocesses > 0) + or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) + ) + else None + ) + + params = MachineParams( + nprocesses=nprocesses, + devices=devices, + sampler_devices=devices, + sensor_preprocessor_graph=sensor_preprocessor_graph, + ) + + # NOTE: for distributed setup + if mode == "train" and "machine_id" in kwargs: + machine_id = kwargs["machine_id"] + assert ( + 0 <= machine_id < self.distributed_nodes + ), f"machine_id {machine_id} out of range [0, {self.distributed_nodes} - 1]" + local_worker_ids = list( + range( + len(self.TRAIN_DEVICES) * machine_id, + len(self.TRAIN_DEVICES) * (machine_id + 1), + ) + ) + params.set_local_worker_ids(local_worker_ids) + + return params \ No newline at end of file diff --git a/manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_objnav_ncamera_model.py similarity index 92% rename from manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py rename to manipulathor_baselines/stretch_object_nav_baselines/models/clip_objnav_ncamera_model.py index 4295c91..36637f8 100644 --- a/manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_objnav_ncamera_model.py @@ -6,6 +6,8 @@ import platform from datetime import datetime from typing import Tuple, Optional, List, Dict, cast +import platform +from datetime import datetime import gym import torch @@ -21,7 +23,6 @@ from manipulathor_utils.debugger_util import ForkedPdb from utils.hacky_viz_utils import hacky_visualization - class ResnetTensorNavNCameraActorCritic(VisualNavActorCritic): def __init__( # base params @@ -43,6 +44,7 @@ def __init__( goal_dims: int = 32, resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32), combiner_hidden_out_dims: Tuple[int, int] = (128, 32), + visualize=False, ): super().__init__( action_space=action_space, @@ -52,6 +54,7 @@ def __init__( beliefs_fusion=beliefs_fusion, auxiliary_uuids=auxiliary_uuids, ) + self.visualize = visualize self.goal_visual_encoder = ResnetNCameraTensorGoalEncoder( # type:ignore self.observation_space, @@ -90,10 +93,15 @@ def is_blind(self) -> bool: def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor: return self.goal_visual_encoder(observations) - def forward(self, **kwargs): #TODO NOW remove - if platform.system() == "Darwin": - hacky_visualization(kwargs['observations'], base_directory_to_right_images=self.starting_time) - return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs) + # def forward(self, **kwargs): #TODO NOW remove + # if platform.system() == "Darwin": + # hacky_visualization(kwargs['observations'], base_directory_to_right_images=self.starting_time) + # return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs) + + # def forward(self, **kwargs): #TODO NOW remove + # if self.visualize: + # hacky_visualization(kwargs['observations'], base_directory_to_right_images=self.starting_time) + # return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs) class ResnetNCameraTensorGoalEncoder(nn.Module): diff --git a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py new file mode 100644 index 0000000..3aa772e --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py @@ -0,0 +1,85 @@ +from typing import Sequence, Union, Type + +import attr +import gym +import numpy as np +import torch.nn as nn + +from allenact.base_abstractions.preprocessor import Preprocessor +from allenact.base_abstractions.sensor import Sensor +from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor +from allenact.utils.experiment_utils import Builder +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor +from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor + +from allenact_plugins.navigation_plugin.objectnav.models import ( + ResnetTensorNavActorCritic, +) + +from manipulathor_baselines.stretch_object_nav_baselines.models.clip_objnav_ncamera_model import \ + ResnetTensorNavNCameraActorCritic +from manipulathor_utils.debugger_util import ForkedPdb + + +@attr.s(kw_only=True) +class ClipResNetPreprocessNCameraGRUActorCriticMixin: + sensors: Sequence[Sensor] = attr.ib() + clip_model_type: str = attr.ib() + screen_size: int = attr.ib() + pool: bool = attr.ib(default=False) + + def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: + preprocessors = [] + self.resnet_preprocessor_uuids = [] + for camera in [s for s in self.sensors if (isinstance(s, RGBSensor) or isinstance(s,DepthSensor))]: + if "_only_viz" not in camera.uuid: + if isinstance(camera, RGBSensor): + assert ( + np.linalg.norm( + np.array(camera._norm_means) + - np.array(ClipResNetPreprocessor.CLIP_RGB_MEANS) + ) + < 1e-5 + ) + assert ( + np.linalg.norm( + np.array(camera._norm_sds) + - np.array(ClipResNetPreprocessor.CLIP_RGB_STDS) + ) + < 1e-5 + ) + preprocessors.append( + ClipResNetPreprocessor( + rgb_input_uuid=camera.uuid, + clip_model_type=self.clip_model_type, + pool=self.pool, + output_uuid=camera.uuid+"_clip_resnet", + ) + ) + self.resnet_preprocessor_uuids.append(camera.uuid+"_clip_resnet") + + else: + self.resnet_preprocessor_uuids.append(camera.uuid) + + return preprocessors + + def create_model(self, num_actions: int, visualize: bool, **kwargs) -> nn.Module: + goal_sensor_uuid = next( + (s.uuid for s in self.sensors if isinstance(s, GoalObjectTypeThorSensor)), + None, + ) + self.resnet_preprocessor_uuids = [s for s in self.resnet_preprocessor_uuids if "_only_viz" not in s] + + # display or assert sensor order here? possible source for sneaky failure if they're not the same + # as in pretraining when loading weights. + + return ResnetTensorNavNCameraActorCritic( + action_space=gym.spaces.Discrete(num_actions), + observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + goal_sensor_uuid=goal_sensor_uuid, + resnet_preprocessor_uuids=self.resnet_preprocessor_uuids, + hidden_size=512, + goal_dims=32, + add_prev_actions=True, + visualize=visualize + ) \ No newline at end of file diff --git a/scripts/run_same_commands_servers.py b/scripts/run_same_commands_servers.py index 00377e9..9d1e810 100644 --- a/scripts/run_same_commands_servers.py +++ b/scripts/run_same_commands_servers.py @@ -267,11 +267,41 @@ # --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ # --seed 10 --machine_id 0 --extra_tag armpointnav_with_ProcTHOR_RGBonly_after_fix_clip ' -command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib \ +# command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib \ +# --distributed_ip_and_port IP_ADR:6060 \ +# --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ +# --seed 10 --machine_id 0 --extra_tag prcthor_obj_nav ' + +# command_aws5 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOVDistrib/finetune_robothor/2022-05-27_21-00-03/exp_RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOVDistrib_finetune_robothor__stage_02__steps_000160201650.pt ~/; ./manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch/experiments/ithor/kiana_obj_nav_2camera_procthor_wide_distrib \ +# --distributed_ip_and_port IP_ADR:6060 \ +# --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ +# --seed 10 --machine_id 0 --extra_tag kiana_objnav_outside_room_wide -c ~/exp_ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVDistrib_prcthor_obj_nav__stage_02__steps_000100123710.pt ' + +# command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +# --distributed_ip_and_port IP_ADR:6060 \ +# --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ +# --seed 10 --machine_id 0' + +command_aws5 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test/2022-06-08_21-59-41/exp_StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000014017504.pt ~/; ./manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ --distributed_ip_and_port IP_ADR:6060 \ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ - --seed 10 --machine_id 0 --extra_tag prcthor_obj_nav ' + --seed 10 --machine_id 0 -c ~/exp_StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000014017504.pt ' +command_aws1 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/ExploreWiseObjectNavTask-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test/2022-07-18_15-49-34/exp_ExploreWiseObjectNavTask-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000273645744.pt ~/; \ + ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +--distributed_ip_and_port IP_ADR:6060 \ + --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ + --seed 10 --machine_id 0 -c ~/exp_ExploreWiseObjectNavTask-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000273645744.pt ' + +# command_aws1 = ' ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +# --distributed_ip_and_port IP_ADR:6060 \ +# --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ +# --seed 10 --machine_id 0 ' + +# command_aws1 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +# --distributed_ip_and_port IP_ADR:6060 \ +# --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ +# --seed 10 --machine_id 0' # command = command_aws5 # command = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/no_pointnav_stretch_all_rooms_distrib \ @@ -284,7 +314,7 @@ server_sets = { 'aws1':{ 'servers':[f'aws{i}' for i in range(1,5)], - 'ip_adr': '18.237.24.199', + 'ip_adr': '52.13.80.54', 'command': command_aws1, }, 'aws5': { @@ -309,6 +339,7 @@ def parse_args(): args.command = server['command'] args.command = args.command.replace('IP_ADR', ip_adr) args.command = args.command.replace('NUM_MACHINES', str(len(args.servers))) + args.command = args.command.replace('MAIN_SERVER', ip_adr) return args def main(args): diff --git a/scripts/update_tensorboards.py b/scripts/update_tensorboards.py index cf8d390..2d234ea 100644 --- a/scripts/update_tensorboards.py +++ b/scripts/update_tensorboards.py @@ -3,7 +3,7 @@ import time import psutil -servers = [('aws1', 6006), ('aws5', 6007)] +servers = [('aws1', 6006), ('aws5', 6007),('aws6', 6008)]#, ('aws7', 6009)] while(True): diff --git a/utils/calculation_utils.py b/utils/calculation_utils.py index 7b18731..f01693d 100644 --- a/utils/calculation_utils.py +++ b/utils/calculation_utils.py @@ -1,7 +1,9 @@ import torch -from allenact.embodiedai.mapping.mapping_utils.point_cloud_utils import depth_frame_to_world_space_xyz +from allenact.embodiedai.mapping.mapping_utils.point_cloud_utils import depth_frame_to_world_space_xyz, camera_space_xyz_to_world_xyz +from utils.noise_in_motion_util import squeeze_bool_mask import numpy as np + def position_distance(s1, s2): position1 = s1["position"] position2 = s2["position"] @@ -27,4 +29,34 @@ def calc_world_coordinates(min_xyz, camera_xyz, camera_rotation, camera_horizon, horizon=camera_horizon, fov=fov, ) - return world_space_point_cloud \ No newline at end of file + return world_space_point_cloud + +def calc_world_xyz_from_agent_relative(object_xyz_list, agent_xyz, agent_rotation, device): + # This might just be a duplication/convenience thing of the above + with torch.no_grad(): + agent_xyz = (torch.from_numpy(agent_xyz).float().to(device)) + + object_xyz_matrix = np.ndarray([3,len(object_xyz_list)]) + for i in range(len(object_xyz_list)): + x,y,z = object_xyz_list[i]['position']['x'],object_xyz_list[i]['position']['y'],object_xyz_list[i]['position']['z'] + object_xyz_matrix[:,i] = [x,y,z] + object_xyz_matrix = torch.from_numpy(object_xyz_matrix).to(device) + + world_xyz_matrix = camera_space_xyz_to_world_xyz(object_xyz_matrix, agent_xyz, agent_rotation, horizon=0 ).cpu().numpy() + world_positions = [] + for i in range(len(object_xyz_list)): + x,y,z = world_xyz_matrix[:,i] + world_positions.append(dict(position=dict(x=x, y=y,z=z), rotation=dict(x=0,y=0, z=0))) + + return world_positions + +def get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device): + mask = squeeze_bool_mask(mask) + depth_frame_masked = depth_frame_original.copy() + depth_frame_masked[~mask] = -1 + depth_frame_masked[depth_frame_masked == 0] = -1 # TODO: what is this for? missing values? + world_space_point_cloud = calc_world_coordinates(min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device, depth_frame_masked) + valid_points = (world_space_point_cloud == world_space_point_cloud).sum(dim=-1) == 3 + point_in_world = world_space_point_cloud[valid_points] + midpoint_agent_coord = point_in_world.mean(dim=0) + return midpoint_agent_coord diff --git a/utils/procthor_utils/procthor_helper.py b/utils/procthor_utils/procthor_helper.py index 505c95a..af8d402 100644 --- a/utils/procthor_utils/procthor_helper.py +++ b/utils/procthor_utils/procthor_helper.py @@ -63,7 +63,7 @@ def distance_to_object_id( def path_from_point_to_object_id( point: Dict[str, float], object_id: str, allowed_error: float ) -> Optional[List[Dict[str, float]]]: - event = env.step( + event = env.controller.step( action="GetShortestPath", objectId=object_id, position=point, @@ -109,7 +109,7 @@ def retry_dist(position: Dict[str, float], object_id: str) -> float: else: break if d < 0: - get_logger().warning( + get_logger().debug( f"In house {house_name}, could not find a path from {position} to {object_id}" f" with {allowed_error} error tolerance. Returning a distance of -1." ) diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index 07e6319..907aae6 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -1,4 +1,4 @@ -import glob +from typing import Any, Dict, List, Optional import platform import pickle import random @@ -10,7 +10,6 @@ import datasets import numpy as np from ai2thor.controller import Controller -# from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import Task, TaskSampler from allenact.utils.cache_utils import DynamicDistanceCache @@ -18,13 +17,12 @@ from allenact.utils.system import get_logger from ithor_arm.ithor_arm_viz import LoggerVisualizer -# from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment from utils.procthor_utils.procthor_types import AgentPose, Vector3 -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask from utils.stretch_utils.stretch_constants import ADITIONAL_ARM_ARGS from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment -from scripts.stretch_jupyter_helper import make_all_objects_unbreakable +from scripts.stretch_jupyter_helper import get_relative_stretch_current_arm_state from manipulathor_utils.debugger_util import ForkedPdb @@ -34,19 +32,19 @@ class ProcTHORObjectNavTaskSampler(TaskSampler): def __init__( self, - scenes: List[str], sensors: List[Sensor], max_steps: int, env_args: Dict[str, Any], - action_space: gym.Space, rewards_config: Dict, - objects: List[str], task_type: type, - scene_period: Optional[Union[int, str]] = None, + houses: datasets.Dataset, + house_inds: List[int], + target_object_types: List[str], + resample_same_scene_freq: int, + distance_type: str, max_tasks: Optional[int] = None, seed: Optional[int] = None, deterministic_cudnn: bool = False, - fixed_tasks: Optional[List[Dict[str, Any]]] = None, visualizers: List[LoggerVisualizer] = [], *args, **kwargs @@ -56,26 +54,19 @@ def __init__( self.environment_type = env_args['environment_type'] del env_args['environment_type'] self.env_args = env_args - self.scenes = scenes + self.grid_size = 0.25 self.env: Optional[StretchManipulaTHOREnvironment] = None self.sensors = sensors self.max_steps = max_steps - self._action_space = action_space - self.objects = objects - - self.scene_counter: Optional[int] = None - self.scene_order: Optional[List[str]] = None - self.scene_id: Optional[int] = None - self.scene_period: Optional[ - Union[str, int] - ] = scene_period # default makes a random choice - + self._action_space = gym.spaces.Discrete(len(self.TASK_TYPE.class_action_names())) + self.resample_same_scene_freq = resample_same_scene_freq self._last_sampled_task: Optional[Task] = None - + self.seed: Optional[int] = None - self.set_seed(seed) + if seed is not None: + self.set_seed(seed) if deterministic_cudnn: set_deterministic_cudnn() @@ -83,58 +74,40 @@ def __init__( self.visualizers = visualizers self.sampler_mode = kwargs["sampler_mode"] - self.cap_training = kwargs["cap_training"] - - # if self.env_args['scene'] == 'Procedural': - self.house_dataset = datasets.load_dataset("allenai/houses", use_auth_token=True) - - RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = ( - -1 - ) # Should be > 0 if `ADVANCE_SCENE_ROLLOUT_PERIOD` is `None` - # RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 100 - # if platform.system() == "Darwin": - # RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 1 - - RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 50 - self.resample_same_scene_freq = -1 #RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN - if platform.system() == "Darwin": - self.resample_same_scene_freq = 1#TODO NOW WHAT SHOULD WE DO? kiana for now - # assert self.resample_same_scene_freq == 1 # IMPORTANT IT WON"T WORK FOR 100 + if self.sampler_mode != "train": + self.rewards_config['shaping_weight'] = 0.0 + self.rewards_config['exploration_reward'] = 0.0 + self.episode_index = 0 + self.houses = houses + self.house_inds = house_inds self.house_inds_index = 0 - self.reachable_positions_map = {} - self.house_dataset = self.house_dataset['train'] #TODO separately for test and val - # self.house_dataset = self.house_dataset.select(range(10)) - # ForkedPdb().set_trace() - - # ROOMS_TO_USE = [int(scene.replace('ProcTHOR', '')) for scene in self.scenes] - # ROOMS_TO_USE = [x for x in range(self.house_dataset.num_rows)] - ROOMS_TO_USE = [int(scene.replace('ProcTHOR', '')) for scene in self.scenes] - - # self.args_house_inds = [x for x in range(self.house_dataset.num_rows)] - self.args_house_inds = ROOMS_TO_USE self.valid_rotations = [0,90,180,270] - self.distance_type = "l2" + self.distance_type = distance_type self.distance_cache = DynamicDistanceCache(rounding=1) - self.target_object_types_set = set(self.objects) + self.target_object_types_set = set(target_object_types) self.obj_type_counter = Counter( - {obj_type: 0 for obj_type in self.objects} + {obj_type: 0 for obj_type in target_object_types} ) self.reachable_positions_map: Dict[int, Vector3] = dict() self.objects_in_scene_map: Dict[str, List[str]] = dict() self.visible_objects_cache = dict() - self.max_tasks = max_tasks if max_tasks is not None else np.Inf # stop when I tell you to stop + self.max_tasks = max_tasks self.reset_tasks = self.max_tasks - self.max_vis_points=6 - self.max_agent_positions=6 + + self.max_vis_points = 6 + self.max_agent_positions = 6 + self.p_greedy_target_object = 0.8 + self.min_raycast_distance = 1.5 + + self.success_distance = 1.0 + self.reset() def set_seed(self, seed: int): - self.seed = seed - if seed is not None: - set_seed(seed) + set_seed(seed) def _create_environment(self, **kwargs) -> StretchManipulaTHOREnvironment: env = self.environment_type( @@ -165,8 +138,8 @@ def last_sampled_task(self) -> Optional[StretchObjectNavTask]: return self._last_sampled_task def close(self) -> None: - if self.env.controller is not None: - self.env.controller.stop() + if self.env is not None: + self.env.stop() @property def all_observation_spaces_equal(self) -> bool: @@ -190,11 +163,16 @@ def get_nearest_positions(self, world_position: Vector3) -> List[Vector3]: ] def get_nearest_agent_height(self, y_coordinate: float) -> float: - return 1.5759992 # from default agent - is guess. TODO check stretch + if self.env_args['agentMode'] == 'locobot': + return 0.8697997 + elif self.env_args['agentMode'] == 'stretch': + return 1.27 # to intel camera, measured physical + else: + return 1.5759992 # from default agent - is guess. TODO check stretch @property def house_index(self) -> int: - return self.args_house_inds[self.house_inds_index] + return self.house_inds[self.house_inds_index] def is_object_visible(self, object_id: str) -> bool: """Return True if object_id is visible without any interaction in the scene. @@ -237,7 +215,7 @@ def is_object_visible(self, object_id: str) -> bool: if ( event.metadata["lastActionSuccess"] and hit["objectId"] == object_id - and hit["hitDistance"] < self.env_args['visibilityDistance'] + and hit["hitDistance"] < np.min([self.env_args['visibilityDistance'],self.min_raycast_distance]) ): self.visible_objects_cache[self.house_index][object_id] = True return True @@ -267,10 +245,11 @@ def sample_target_object_ids(self) -> Tuple[str, List[str]]: Objects returned will all be of the same objectType. Only considers visible objects in the house. """ - if random.random() < 0.8: # p_greedy_target_object + if random.random() < self.p_greedy_target_object: for obj_type, count in reversed(self.obj_type_counter.most_common()): instances_of_type = self.target_objects_in_scene.get(obj_type, []) + # NOTE: object type doesn't appear in the scene. if not instances_of_type: continue @@ -319,9 +298,11 @@ def increment_scene(self) -> bool: self.increment_scene_index() # self.env.controller.step(action="DestroyHouse", raise_for_failure=True) - self.env.controller.reset() + # self.env.controller.reset() + self.env.reset(scene_name='Procedural') + self.env.list_of_actions_so_far = [] - self.house_entry = self.house_dataset[self.house_index] + self.house_entry = self.houses[self.house_index] self.house = pickle.loads(self.house_entry["house"]) if platform.system() == "Darwin": #TODO remove @@ -330,20 +311,15 @@ def increment_scene(self) -> bool: self.env.controller.step( action="CreateHouse", house=self.house, raise_for_failure=True ) - self.env.controller.step("ResetObjectFilter") - - #TODO dude this is ugly! - pose = self.house["metadata"]["agent"].copy() - event = self.env.controller.step(action="TeleportFull", **pose) - if not event: - # get_logger().warning(f"Initial teleport failing in {self.house_index}.") # clear logger noise - return False #TODO this can mess FPS - self.env.controller.step(action="MakeAllObjectsMoveable") - self.env.controller.step(action="MakeObjectsStaticKinematicMassThreshold") - make_all_objects_unbreakable(self.env.controller) - - # NOTE: Set reachable positions + if self.house_index not in self.reachable_positions_map: + pose = self.house["metadata"]["agent"].copy() + if self.env_args['agentMode'] == 'locobot': + del pose["standing"] + event = self.env.controller.step(action="TeleportFull", **pose) + if not event: + get_logger().warning(f"Initial teleport failing in {self.house_index}.") + return False rp_event = self.env.controller.step(action="GetReachablePositions") if not rp_event: # NOTE: Skip scenes where GetReachablePositions fails @@ -353,10 +329,17 @@ def increment_scene(self) -> bool: return False reachable_positions = rp_event.metadata["actionReturn"] self.reachable_positions_map[self.house_index] = reachable_positions + + # verify the stretch arm is stowed + if self.env_args['agentMode'] == 'stretch': + arm_pos = get_relative_stretch_current_arm_state(self.env.controller) + assert abs(sum(arm_pos.values())) < 0.001 + return True + def increment_scene_index(self): - self.house_inds_index = (self.house_inds_index + 1) % len(self.args_house_inds) + self.house_inds_index = (self.house_inds_index + 1) % len(self.house_inds) def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObjectNavTask]: if self.env is None: @@ -385,8 +368,9 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject while not self.increment_scene(): pass - if random.random() < 0.8: #TODO + if random.random() < self.env_args['p_randomize_material']: self.env.controller.step(action="RandomizeMaterials", raise_for_failure=True) + self.env.controller.step(action="RandomizeLighting", synchronized=True, raise_for_failure=True) else: self.env.controller.step(action="ResetMaterials", raise_for_failure=True) @@ -397,21 +381,26 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject ) # NOTE: Set agent pose - starting_pose = AgentPose( - position=random.choice(self.reachable_positions), - rotation=Vector3(x=0, y=random.choice(self.valid_rotations), z=0), - horizon=0, - standing=True, - ) - event = self.env.controller.step(action="TeleportFull", **starting_pose) - # if not event: - # get_logger().warning( - # f"Teleport failing in {self.house_index} at {starting_pose}" - # ) + event = None + attempts = 0 + while not event: + attempts+=1 + starting_pose = AgentPose( + position=random.choice(self.reachable_positions), + rotation=Vector3(x=0, y=random.choice(self.valid_rotations), z=0), + horizon=0, + ) + if self.env_args['agentMode'] != 'locobot': + starting_pose['standing']=True + starting_pose['horizon'] = self.env_args['horizon_init'] + event = self.env.controller.step(action="TeleportFull", **starting_pose) + if attempts > 10: + get_logger().error(f"Teleport failed {attempts-1} times in house {self.house_index} - something may be wrong") + self.episode_index += 1 self.max_tasks -= 1 - + self._last_sampled_task = self.TASK_TYPE( env=self.env, sensors=self.sensors, @@ -431,6 +420,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject "object_type": target_object_type, "starting_pose": starting_pose, "mirrored": self.env_args['allow_flipping'] and random.random() > 0.5, + 'success_distance': self.success_distance }, ) return self._last_sampled_task @@ -439,3 +429,88 @@ def reset(self): self.episode_index = 0 self.max_tasks = self.reset_tasks self.house_inds_index = 0 + + + +class RoboThorObjectNavTestTaskSampler(ProcTHORObjectNavTaskSampler): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.last_scene = None + + def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObjectNavTask]: + # ForkedPdb().set_trace() + while True: + # NOTE: Stopping condition + if self.env is None: + self.env = self._create_environment() + + # NOTE: Stopping condition + if self.max_tasks <= 0: + return None + + + epidx = self.house_inds[self.max_tasks - 1] + ep = self.houses[epidx] + # ForkedPdb().set_trace() + + if self.last_scene is None or self.last_scene != ep["scene"]: + self.last_scene = ep["scene"] + # self.env.controller.reset(ep["scene"]) + self.env.reset(scene_name=ep["scene"]) + + # NOTE: not using ep["targetObjectIds"] due to floating points with + # target objects moving. + event = self.env.controller.step(action="ResetObjectFilter") + target_object_ids = [ + obj["objectId"] + for obj in event.metadata["objects"] + if obj["objectType"] == ep["targetObjectType"] + ] + self.env.controller.step( + action="SetObjectFilter", + objectIds=target_object_ids, + raise_for_failure=True, + ) + if self.env_args['agentMode'] != 'locobot': + ep["agentPose"]["standing"] = True + ep["agentPose"]["horizon"] = self.env_args['horizon_init'] # reset for stretch agent + event = self.env.controller.step(action="TeleportFull", **ep["agentPose"]) + if not event: + # NOTE: Skip scenes where TeleportFull fails. + # This is added from a bug in the RoboTHOR eval dataset. + get_logger().error( + f"Teleport failing {event.metadata['actionReturn']} in {epidx}." + ) + self.max_tasks -= 1 + self.episode_index += 1 + continue + + difficulty = {"difficulty": ep["difficulty"]} if "difficulty" in ep else {} + self._last_sampled_task = self.TASK_TYPE( + # visualize=self.episode_index in self.epids_to_visualize, + env=self.env, + sensors=self.sensors, + max_steps=self.max_steps, + reward_config=self.rewards_config, + distance_type=self.distance_type, + distance_cache=self.distance_cache, + visualizers=self.visualizers, + task_info={ + "mode": self.env_args['agentMode'], + "scene_name": ep["scene"], + "target_object_ids": target_object_ids, + "object_type": ep["targetObjectType"], + "starting_pose": ep["agentPose"], + "mirrored": False, + "id": f"{ep['scene']}__global{epidx}__{ep['targetObjectType']}", + 'success_distance': self.success_distance, + **difficulty, + }, + ) + + self.max_tasks -= 1 + self.episode_index += 1 + + return self._last_sampled_task + + diff --git a/utils/procthor_utils/all_rooms_object_nav_task_sampler.py b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py similarity index 73% rename from utils/procthor_utils/all_rooms_object_nav_task_sampler.py rename to utils/stretch_utils/all_rooms_object_nav_task_sampler.py index 76dab88..cc467cf 100644 --- a/utils/procthor_utils/all_rooms_object_nav_task_sampler.py +++ b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py @@ -18,8 +18,14 @@ from scripts.dataset_generation.find_categories_to_use import ROBOTHOR_TRAIN, KITCHEN_TRAIN, KITCHEN_TEST, KITCHEN_VAL from utils.manipulathor_data_loader_utils import get_random_query_image, get_random_query_feature_from_img_adr from scripts.stretch_jupyter_helper import get_reachable_positions, transport_wrapper +from utils.stretch_utils.stretch_object_nav_tasks import ObjectNavTask from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer +from utils.stretch_utils.real_stretch_environment import StretchRealEnvironment +from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment + + + class AllRoomsObjectNavTaskSampler(TaskSampler): @@ -136,7 +142,8 @@ def __init__( self.reset() self.visualizers = visualizers self.sampler_mode = kwargs["sampler_mode"] - self.cap_training = kwargs["cap_training"] + + self.success_distance = 1.0 possible_initial_locations = ( "datasets/apnd-dataset/valid_agent_initial_locations.json" @@ -272,6 +279,13 @@ def put_object_in_location(location_point): # if not event_transport_init_obj.metadata['lastActionSuccess']: # print('scene', scene_name, 'init', init_object['object_id']) # print('ERROR: one of transfers fail', 'init', event_transport_init_obj.metadata['errorMessage']) + + if random.random() < 0.8: + # self.env.controller.step(action="RandomizeMaterials", raise_for_failure=True) + self.env.controller.step(action="RandomizeLighting", synchronized=True, raise_for_failure=True) + # else: + # self.env.controller.step(action="ResetMaterials", raise_for_failure=True) + else: #Object is already at the location it should be target_obj_type = data_point['target_obj_type'] @@ -289,7 +303,7 @@ def get_full_object_info(env, object_type): init_object = get_full_object_info(self.env, target_obj_type) - agent_state["cameraHorizon"] = 0 # 0 for stretch, 20 for other manipulathor agent + agent_state["cameraHorizon"] = self.env_args['horizon_init'] # 0 for stretch, 20 for other manipulathor agent event = this_controller.step( dict( action="TeleportFull", @@ -323,7 +337,8 @@ def get_full_object_info(env, object_type): 'mode': self.env_args['agentMode'], 'house_name': scene_name, 'scene_name': scene_name, - 'mirrored': False #self.env_args['allow_flipping'] and random.random() > 0.5, # not including for non-procthor + 'mirrored': False, #self.env_args['allow_flipping'] and random.random() > 0.5, # not including for non-procthor + 'success_distance': self.success_distance } @@ -340,6 +355,7 @@ def get_full_object_info(env, object_type): reward_config=self.rewards_config, distance_type=self.distance_type, distance_cache=self.distance_cache, + additional_visualize=False # not implemented for non-procthor ) return self._last_sampled_task @@ -391,7 +407,7 @@ def get_source_target_indices(self): "name": "agent", "position": dict(x=agent_pose['x'], y=agent_pose['y'], z=agent_pose['z']), "rotation": dict(x=0, y=agent_pose['rotation'], z=0), - "cameraHorizon": agent_pose['horizon'], + "cameraHorizon": self.env_args['horizon_init'], #agent_pose['horizon'], "isStanding": True, } @@ -408,7 +424,7 @@ def get_source_target_indices(self): "name": "agent", "position": dict(x=agent_pose['x'], y=agent_pose['y'], z=agent_pose['z']), "rotation": dict(x=0, y=agent_pose['rotation'], z=0), - "cameraHorizon": agent_pose['horizon'], + "cameraHorizon": self.env_args['horizon_init'], #agent_pose['horizon'], "isStanding": True, } @@ -418,3 +434,106 @@ def get_source_target_indices(self): return data_point + +class RealStretchAllRoomsObjectNavTaskSampler(AllRoomsObjectNavTaskSampler): + + def _create_environment(self, **kwargs) -> ManipulaTHOREnvironment: + env = StretchRealEnvironment( + make_agents_visible=False, + object_open_speed=0.05, + env_args=self.env_args, + ) + return env + + def __init__(self, **kwargs) -> None: + + super().__init__(**kwargs) + + self.all_possible_points = {} + + # This can be done more elegantly from datasets/objects/robothor_habitat2022.yaml and exclude toilet + self.possible_real_objects = [ + {'object_id': 'AlarmClock|1|1|1', 'object_type':"AlarmClock"}, + {'object_id': 'Apple|1|1|1', 'object_type':"Apple"}, + {'object_id': 'BaseballBat|1|1|1', 'object_type':"BaseballBat"}, + {'object_id': 'BasketBall|1|1|1', 'object_type':"BasketBall"}, + {'object_id': 'Bed|1|1|1', 'object_type':"Bed"}, + {'object_id': 'Bowl|1|1|1', 'object_type':"Bowl"}, + {'object_id': 'Chair|1|1|1', 'object_type':"Chair"}, + {'object_id': 'GarbageCan|1|1|1', 'object_type':"GarbageCan"}, + {'object_id': 'HousePlant|1|1|1', 'object_type':"HousePlant"}, + {'object_id': 'Laptop|1|1|1', 'object_type':"Laptop"}, + {'object_id': 'Mug|1|1|1', 'object_type':"Mug"}, + {'object_id': 'Sofa|1|1|1', 'object_type':"Sofa"}, + {'object_id': 'SprayBottle|1|1|1', 'object_type':"SprayBottle"}, + {'object_id': 'Television|1|1|1', 'object_type':"Television"}, + {'object_id': 'Vase|1|1|1', 'object_type':"Vase"} + ] + + self.preset_easyish_tasks = [ + {'object_id': 'Apple|1|1|1', 'object_type':"Apple"}, + {'object_id': 'BaseballBat|1|1|1', 'object_type':"BaseballBat"}, + {'object_id': 'BasketBall|1|1|1', 'object_type':"BasketBall"}, + {'object_id': 'Bowl|1|1|1', 'object_type':"Bowl"}, + {'object_id': 'Chair|1|1|1', 'object_type':"Chair"}, + {'object_id': 'HousePlant|1|1|1', 'object_type':"HousePlant"}, + {'object_id': 'Mug|1|1|1', 'object_type':"Mug"}, + {'object_id': 'SprayBottle|1|1|1', 'object_type':"SprayBottle"}, + {'object_id': 'Television|1|1|1', 'object_type':"Television"}, + {'object_id': 'Vase|1|1|1', 'object_type':"Vase"} + ] + + self.real_object_index = 0 + + if self.sampler_mode == "test": + self.max_tasks = self.reset_tasks = 200 + + def next_task( + self, force_advance_scene: bool = False + ) -> Optional[ObjectNavTask]: + + if self.max_tasks is not None and self.max_tasks <= 0: + return None + + if self.sampler_mode != "train" and self.length <= 0: + return None + + if self.env is None: + self.env = self._create_environment() + self.env.reset(scene_name='RealRobothor') + + # skip_object = True + # while skip_object: + # target_object = random.choice(self.possible_real_objects) + # print('I am now seeking a', target_object['object_type'], '. Accept by setting skip_object=False') + # ForkedPdb().set_trace() + + target_object = self.preset_easyish_tasks[self.real_object_index] + print('I am now seeking a', target_object['object_type'], '. Continue when ready.') + ForkedPdb().set_trace() + + task_info = { + 'target_object_ids': [target_object['object_id']], + 'object_type': target_object['object_type'], + 'starting_pose': {}, + 'mode': self.env_args['agentMode'], + 'house_name': 'RealRobothor', + 'scene_name': 'RealRobothor', + 'mirrored': False # yeah no + } + + self._last_sampled_task = self.TASK_TYPE( + env=self.env, + sensors=self.sensors, + task_info=task_info, + max_steps=self.max_steps, + action_space=self._action_space, + visualizers=self.visualizers, + reward_config=self.rewards_config, + distance_type="real_world", + distance_cache=self.distance_cache, + additional_visualize=False # not implemented for non-procthor + ) + self.real_object_index = self.real_object_index + 1 + + return self._last_sampled_task \ No newline at end of file diff --git a/utils/stretch_utils/real_stretch_environment.py b/utils/stretch_utils/real_stretch_environment.py index c7dd705..1d06982 100644 --- a/utils/stretch_utils/real_stretch_environment.py +++ b/utils/stretch_utils/real_stretch_environment.py @@ -39,7 +39,7 @@ def check_controller_version(self): def create_controller(self): - controller = ai2thor.robot_controller.Controller(host="stretch2.corp.ai2", port=9000, width=1280, height=720) + controller = ai2thor.robot_controller.Controller(host="stretch2", port=9000, width=1280, height=720) return controller def start( diff --git a/utils/stretch_utils/real_stretch_sensors.py b/utils/stretch_utils/real_stretch_sensors.py index 09ad06d..c9a1410 100644 --- a/utils/stretch_utils/real_stretch_sensors.py +++ b/utils/stretch_utils/real_stretch_sensors.py @@ -13,16 +13,17 @@ import torch from PIL import Image import matplotlib.pyplot as plt -from allenact.base_abstractions.sensor import DepthSensor, Sensor, RGBSensor +from allenact.base_abstractions.sensor import Sensor #, DepthSensor, RGBSensor + from allenact.base_abstractions.task import Task from allenact.utils.misc_utils import prepare_locals_for_super from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor -from detectron2.config import get_cfg -from detectron2.data import MetadataCatalog -from detectron2.engine import DefaultPredictor -from detectron2.model_zoo import model_zoo -from detectron2.utils.visualizer import Visualizer +# from detectron2.config import get_cfg +# from detectron2.data import MetadataCatalog +# from detectron2.engine import DefaultPredictor +# from detectron2.model_zoo import model_zoo +# from detectron2.utils.visualizer import Visualizer import matplotlib.pyplot as plt from torch.distributions.utils import lazy_property @@ -38,7 +39,10 @@ from utils.calculation_utils import calc_world_coordinates from utils.detection_translator_util import THOR2COCO from utils.noise_in_motion_util import squeeze_bool_mask -from utils.real_stretch_utils import get_binary_mask_of_arm, get_mid_point_of_object_from_depth_and_mask + +from scipy.interpolate import UnivariateSpline + +# from utils.real_stretch_utils import get_binary_mask_of_arm, get_mid_point_of_object_from_depth_and_mask from utils.stretch_utils.stretch_constants import INTEL_RESIZED_H, INTEL_RESIZED_W, KINECT_REAL_W, KINECT_REAL_H, \ MAX_INTEL_DEPTH, MIN_INTEL_DEPTH, MAX_KINECT_DEPTH, MIN_KINECT_DEPTH, INTEL_FOV_W, INTEL_FOV_H, KINECT_FOV_W, \ KINECT_FOV_H @@ -85,7 +89,7 @@ def frame_from_env(self, env: IThorEnvironment, task: Optional[Task]) -> np.ndar return (depth) def normalize_real_intel_image(image,final_size=224): - image = cv2.rotate(image, cv2.cv2.ROTATE_90_CLOCKWISE) + image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) # if len(image.shape) == 3: # image = image[:,:,::-1] # image = cv2.flip(image,1) @@ -115,6 +119,22 @@ def normalize_real_kinect_image(frame,size=224): w,h = (int(current_size[0] * ratio), int(current_size[1] * ratio)) frame = cv2.resize(frame,(h,w)) + + spl = UnivariateSpline([0, 64, 128, 192, 255], [0, 30, 80, 160, 255]) + color_channel_decr = spl(range(256)) + + c_r, c_g, c_b = cv2.split(frame) + c_r = cv2.LUT(c_r, color_channel_decr).astype('uint8') + frame_mod = cv2.merge((c_r,c_g,c_b)) + hsv=cv2.cvtColor(frame_mod,cv2.COLOR_RGB2HSV).astype('int64') + hsv[:,:,2] = hsv[:,:,2] + 25 + hsv = np.clip(hsv,0,255).astype('uint8') + frame_bright = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB) + + sidebyside=np.hstack((frame,frame_mod,frame_bright)) + frame=frame_bright + + # ForkedPdb().set_trace() if len(frame.shape) == 3: result = np.zeros((size, size, frame.shape[2])) elif len(frame.shape) == 2: @@ -129,376 +149,384 @@ def normalize_real_kinect_image(frame,size=224): result[result < MIN_KINECT_DEPTH] = 0 return result.astype(frame.dtype) -class StretchDetectronObjectMask(Sensor): - def __init__(self, type: str,noise, source_camera, uuid: str = "object_mask", **kwargs: Any): - observation_space = gym.spaces.Box( - low=0, high=1, shape=(1,), dtype=np.float32 - ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) - self.type = type - uuid = '{}_{}'.format(uuid, type) - self.noise = noise - self.cache = {'object_name': '', 'image':None, 'mask':None} - super().__init__(**prepare_locals_for_super(locals())) - self.source_camera = source_camera - - @lazy_property - def predictor(self): - self.cfg = get_cfg() - if platform.system() == "Darwin": - self.cfg.MODEL.DEVICE = "cpu" - # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library - self.cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) # is this the model we want? Yes it is a pretty good model - self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05 # set threshold for this model #TODO good number? - # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well - self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") - # self.predictor = DefaultPredictor(self.cfg) - self.class_labels = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]).thing_classes - return DefaultPredictor(self.cfg) +""" +cv2.imshow('window2',cv2.cvtColor(sidebyside,cv2.COLOR_RGB2BGR)) +cv2.waitKey(0) +cv2.imshow('window',cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)) +cv2.waitKey(0) + +""" +# class StretchDetectronObjectMask(Sensor): +# def __init__(self, type: str,noise, source_camera, uuid: str = "object_mask", **kwargs: Any): +# observation_space = gym.spaces.Box( +# low=0, high=1, shape=(1,), dtype=np.float32 +# ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) +# self.type = type +# uuid = '{}_{}'.format(uuid, type) +# self.noise = noise +# self.cache = {'object_name': '', 'image':None, 'mask':None} +# super().__init__(**prepare_locals_for_super(locals())) +# self.source_camera = source_camera + +# @lazy_property +# def predictor(self): +# self.cfg = get_cfg() +# if platform.system() == "Darwin": +# self.cfg.MODEL.DEVICE = "cpu" +# # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library +# self.cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) # is this the model we want? Yes it is a pretty good model +# self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05 # set threshold for this model #TODO good number? +# # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well +# self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") +# # self.predictor = DefaultPredictor(self.cfg) +# self.class_labels = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]).thing_classes +# return DefaultPredictor(self.cfg) + + +# def get_observation( +# self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any +# ) -> Any: + +# if self.type == 'source': +# info_to_search = 'source_object_id' +# elif self.type == 'destination': +# info_to_search = 'goal_object_id' +# #TODO remove +# mask = torch.zeros((224,224,1)) +# return mask + +# original_im = self.source_camera.get_observation(env, task, *args, **kwargs) + +# if info_to_search == self.cache['object_name']: +# if np.all(self.cache['image'] == original_im): +# return self.cache['mask'] +# else: +# self.cache = {} + + +# # TODO VERYYYYYY IMPORTANT +# im = original_im[:,:,::-1] +# # the detection requires BGR??? + +# outputs = self.predictor(im * 255.) + +# # remove +# # outputs = self.predictor(im * 255.) +# # cv2.imwrite('my_image.png',im * 255.) + +# # +# def visualize_detections(im, outputs ): +# v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]), scale=1.2) +# out = v.draw_instance_predictions(outputs["instances"].to("cpu")) +# dir = 'experiment_output/visualization_predictions' +# timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f.png") +# os.makedirs(dir, exist_ok=True) +# print('saving all detections in ', os.path.join(dir, timestamp)) +# plt.imsave(os.path.join(dir, timestamp), out.get_image()[:, :, ::-1]) + + + +# category = task.task_info[info_to_search].split('|')[0] +# assert category in THOR2COCO +# class_ind_to_look_for = self.class_labels.index(THOR2COCO[category]) +# all_predicted_labels = outputs['instances'].pred_classes +# all_predicted_bbox = outputs['instances'].pred_boxes #TODO switch to segmentation + + + +# mask = torch.zeros((im.shape[0], im.shape[1])) +# valid_boxes = [all_predicted_bbox[i] for i in range(len(all_predicted_labels)) if all_predicted_labels[i] == class_ind_to_look_for] + +# for box in valid_boxes: +# x1, y1, x2, y2 = [int(x) for x in box.tensor.squeeze()] +# mask[y1:y2, x1:x2] = 1 +# mask = torch.nn.functional.interpolate(mask.unsqueeze(0).unsqueeze(0), size=(224, 224)).squeeze(0).squeeze(0) + +# #TODO save all the masks so we don't pass the image through detectyion multiple timesact + +# # print('camera', self.source_camera, 'type', self.type, 'category', category) +# # print('observed objects', [self.class_labels[x] for x in all_predicted_labels]) + +# #TODO this can be optimized because we are passing this image twice and no point + +# # use these later for visualization purposes? + +# # plt.imsave('something.png', im) +# mask = mask.unsqueeze(-1) +# self.cache = {'object_name': info_to_search, 'image':original_im, 'mask':mask} + +# return mask + + +# class StretchObjectMask(Sensor): +# def __init__(self, type: str,noise, uuid: str = "object_mask", **kwargs: Any): +# observation_space = gym.spaces.Box( +# low=0, high=1, shape=(1,), dtype=np.float32 +# ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) +# self.type = type +# uuid = '{}_{}'.format(uuid, type) +# self.noise = noise +# self.cache = None +# super().__init__(**prepare_locals_for_super(locals())) + +# def get_observation( +# self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any +# ) -> Any: +# if env.last_image_changed is False and self.cache is not None: +# return self.cache + +# # TODO remove +# # mask = np.zeros((224, 224, 1)) +# # return mask + + +# img = env.controller.last_event.frame +# resized_image = cv2.resize(img, dsize=(224,224)) +# global input_received, center_x, center_y +# center_x, center_y = -1, -1 +# input_received = False + +# def normalize_number(x, w): +# if x < 0: +# x = 0 +# if x > w: +# x = w +# return x + +# def clear_plot(): +# fig.clear(); fig.clf(); plt.clf(); ax.cla(); fig.clear(True) +# def close_plot(): +# clear_plot() +# plt.close(fig) +# plt.close('all') +# def onclick(event): +# global center_x, center_y, input_received +# if event.button == 3: #Right click +# center_x = -1 +# center_y = -1 +# input_received = True +# close_plot() +# if event.button == 1: #Left click +# center_x = event.xdata +# center_y = event.ydata +# input_received = True +# close_plot() + +# import matplotlib +# matplotlib.use('MacOSX') +# fig, ax = plt.subplots() +# # clear_plot() +# ax.imshow(resized_image) +# cid = fig.canvas.mpl_connect('button_press_event', onclick) +# plt.show() +# fig.canvas.mpl_disconnect(cid) +# close_plot() + + +# self.window_size = 20 #TODO do I want to change the size of this one maybe? +# mask = np.zeros((224, 224, 1)) +# if center_y == -1 and center_x == -1: +# mask[:,:] = 0. +# else: +# offset = self.window_size / 2 +# object_boundaries = center_x - offset, center_y - offset, center_x + offset, center_y + offset +# x1, y1, x2, y2 = [int(normalize_number(i, 224)) for i in object_boundaries] +# mask[y1:y2, x1:x2] = 1. +# self.cache = mask +# return mask + + + +# class RealStretchPickedUpObjSensor(Sensor): +# def __init__(self, uuid: str = "pickedup_object", **kwargs: Any): +# observation_space = gym.spaces.Box( +# low=0, high=1, shape=(1,), dtype=np.float32 +# ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) +# super().__init__(**prepare_locals_for_super(locals())) + +# def get_observation( +# self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any +# ) -> Any: +# return False + +# class RealKinectArmPointNavEmulSensor(Sensor): + +# def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, arm_mask_sensor:Sensor, uuid: str = "arm_point_nav_emul", **kwargs: Any): +# observation_space = gym.spaces.Box( +# low=0, high=1, shape=(1,), dtype=np.float32 +# ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) +# self.type = type +# self.mask_sensor = mask_sensor +# self.depth_sensor = depth_sensor +# self.arm_mask_sensor = arm_mask_sensor +# uuid = '{}_{}'.format(uuid, type) + +# self.min_xyz = np.zeros((3)) + +# self.dummy_answer = torch.zeros(3) +# self.dummy_answer[:] = 4 # is this good enough? +# self.device = torch.device("cpu") +# super().__init__(**prepare_locals_for_super(locals())) +# def get_camera_int_ext(self,env): +# #TODO all these values need to be checked +# fov=max(KINECT_FOV_W, KINECT_FOV_H)#TODO are you sure? it should be smaller one I think +# agent_state = env.controller.last_event.metadata['agent'] +# camera_horizon = 45 +# camera_xyz = np.array([agent_state['position'][k] for k in ['x','y','z']]) +# camera_rotation = (agent_state['rotation']['y'] + 90) % 360 +# return fov, camera_horizon, camera_xyz, camera_rotation +# def get_observation( +# self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any +# ) -> Any: +# #TODO remove +# if self.type == 'destination': +# return self.dummy_answer +# mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) #TODO this is called multiple times? +# depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1) + +# if task.num_steps_taken() == 0: +# self.pointnav_history_aggr = [] +# self.real_prev_location = None +# self.belief_prev_location = None + + +# fov, camera_horizon, camera_xyz, camera_rotation = self.get_camera_int_ext(env) +# arm_world_coord = None + +# if mask.sum() != 0: + +# midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) +# if not torch.any(torch.isnan(midpoint_agent_coord) + torch.isinf(midpoint_agent_coord)): +# self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken())) + +# arm_mask = self.arm_mask_sensor.get_observation(env, task, *args, **kwargs) #TODO this is also called twice +# if arm_mask.sum() == 0: #Do we want to do some approximations or no? +# arm_world_coord = None #TODO approax for this +# else: +# arm_world_coord = get_mid_point_of_object_from_depth_and_mask(arm_mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) +# # distance_in_agent_coord = midpoint_agent_coord - arm_location_in_camera +# # result = distance_in_agent_coord.cpu() +# # if arm_mask.sum() != 0 or mask.sum() != 0: +# # +# # import cv2 +# # cv2.imwrite('/Users/kianae/Desktop/image.png', env.kinect_frame[:,:,::-1]) +# # cv2.imwrite('/Users/kianae/Desktop/mask.png', mask.squeeze().numpy() * 255) +# # cv2.imwrite('/Users/kianae/Desktop/arm_mask.png', arm_mask * 255) +# result = self.history_aggregation(camera_xyz, camera_rotation, arm_world_coord, task.num_steps_taken()) +# return result +# def history_aggregation(self, camera_xyz, camera_rotation, arm_world_coord, current_step_number): +# if len(self.pointnav_history_aggr) == 0 or arm_world_coord is None: +# return self.dummy_answer +# else: +# weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] +# total_weights = sum(weights) +# total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] +# total_sum = sum(total_sum) +# midpoint = total_sum / total_weights +# agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) +# midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) +# midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) +# midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']]) + +# arm_world_coord = dict(position=dict(x=arm_world_coord[0], y=arm_world_coord[1], z=arm_world_coord[2]), rotation=dict(x=0,y=0,z=0)) +# arm_state_agent_coord = convert_world_to_agent_coordinate(arm_world_coord, agent_state) +# arm_state_agent_coord = torch.Tensor([arm_state_agent_coord['position'][k] for k in ['x','y','z']]) + +# distance_in_agent_coord = midpoint_agent_coord - arm_state_agent_coord + +# return distance_in_agent_coord.cpu() + + +# class KinectArmMaskSensor(Sensor): +# def __init__(self, uuid: str = "arm_mask_kinect", **kwargs: Any): +# observation_space = gym.spaces.Box( +# low=0, high=1, shape=(1,), dtype=np.float32 +# ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) +# super().__init__(**prepare_locals_for_super(locals())) + +# def get_observation( +# self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any +# ) -> Any: +# kinect_raw_value = env.kinect_raw_frame +# mask = get_binary_mask_of_arm(kinect_raw_value) +# mask = normalize_real_kinect_image(mask) +# return np.expand_dims(mask, axis=-1) + +# class RealIntelAgentBodyPointNavEmulSensor(Sensor): + +# def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "point_nav_emul", **kwargs: Any): +# observation_space = gym.spaces.Box( +# low=0, high=1, shape=(1,), dtype=np.float32 +# ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) +# self.type = type +# self.mask_sensor = mask_sensor +# self.depth_sensor = depth_sensor +# uuid = '{}_{}'.format(uuid, type) + +# self.min_xyz = np.zeros((3)) +# self.dummy_answer = torch.zeros(3) +# self.dummy_answer[:] = 4 # is this good enough? +# self.device = torch.device("cpu") + + +# super().__init__(**prepare_locals_for_super(locals())) +# def get_camera_int_ext(self,env): +# #TODO all these values need to be checked +# fov=max(INTEL_FOV_W, INTEL_FOV_H) #TODO are you sure? it should be smaller one I think +# agent_state = env.controller.last_event.metadata['agent'] +# camera_horizon = 0 +# camera_xyz = np.array([agent_state['position'][k] for k in ['x','y','z']]) +# camera_rotation = agent_state['rotation']['y'] +# return fov, camera_horizon, camera_xyz, camera_rotation + + + + +# def get_observation( +# self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any +# ) -> Any: + +# mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) + +# if task.num_steps_taken() == 0: +# self.pointnav_history_aggr = [] +# self.real_prev_location = None +# self.belief_prev_location = None + + +# fov, camera_horizon, camera_xyz, camera_rotation = self.get_camera_int_ext(env) + + +# if mask.sum() != 0: +# depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1) +# middle_of_object = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) +# self.pointnav_history_aggr.append((middle_of_object.cpu(), 1, task.num_steps_taken())) + +# # result = middle_of_object.cpu() +# # else: +# # result = self.dummy_answer +# result = self.history_aggregation(camera_xyz, camera_rotation, task.num_steps_taken()) +# return result +# def history_aggregation(self, camera_xyz, camera_rotation, current_step_number): +# if len(self.pointnav_history_aggr) == 0: +# return self.dummy_answer +# else: +# weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] +# total_weights = sum(weights) +# total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] +# total_sum = sum(total_sum) +# midpoint = total_sum / total_weights +# agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) +# midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) +# midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) + +# distance_in_agent_coord = dict(x=midpoint_agent_coord['position']['x'], y=midpoint_agent_coord['position']['y'], z=midpoint_agent_coord['position']['z']) + +# agent_centric_middle_of_object = torch.Tensor([distance_in_agent_coord['x'], distance_in_agent_coord['y'], distance_in_agent_coord['z']]) - def get_observation( - self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any - ) -> Any: - - if self.type == 'source': - info_to_search = 'source_object_id' - elif self.type == 'destination': - info_to_search = 'goal_object_id' - #TODO remove - mask = torch.zeros((224,224,1)) - return mask - - original_im = self.source_camera.get_observation(env, task, *args, **kwargs) - - if info_to_search == self.cache['object_name']: - if np.all(self.cache['image'] == original_im): - return self.cache['mask'] - else: - self.cache = {} - - - # TODO VERYYYYYY IMPORTANT - im = original_im[:,:,::-1] - # the detection requires BGR??? - - outputs = self.predictor(im * 255.) - - # remove - # outputs = self.predictor(im * 255.) - # cv2.imwrite('my_image.png',im * 255.) - - # - def visualize_detections(im, outputs ): - v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]), scale=1.2) - out = v.draw_instance_predictions(outputs["instances"].to("cpu")) - dir = 'experiment_output/visualization_predictions' - timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f.png") - os.makedirs(dir, exist_ok=True) - print('saving all detections in ', os.path.join(dir, timestamp)) - plt.imsave(os.path.join(dir, timestamp), out.get_image()[:, :, ::-1]) - - - - category = task.task_info[info_to_search].split('|')[0] - assert category in THOR2COCO - class_ind_to_look_for = self.class_labels.index(THOR2COCO[category]) - all_predicted_labels = outputs['instances'].pred_classes - all_predicted_bbox = outputs['instances'].pred_boxes #TODO switch to segmentation - - - - mask = torch.zeros((im.shape[0], im.shape[1])) - valid_boxes = [all_predicted_bbox[i] for i in range(len(all_predicted_labels)) if all_predicted_labels[i] == class_ind_to_look_for] - - for box in valid_boxes: - x1, y1, x2, y2 = [int(x) for x in box.tensor.squeeze()] - mask[y1:y2, x1:x2] = 1 - mask = torch.nn.functional.interpolate(mask.unsqueeze(0).unsqueeze(0), size=(224, 224)).squeeze(0).squeeze(0) - - #TODO save all the masks so we don't pass the image through detectyion multiple timesact - - # print('camera', self.source_camera, 'type', self.type, 'category', category) - # print('observed objects', [self.class_labels[x] for x in all_predicted_labels]) - - #TODO this can be optimized because we are passing this image twice and no point - - # use these later for visualization purposes? - - # plt.imsave('something.png', im) - mask = mask.unsqueeze(-1) - self.cache = {'object_name': info_to_search, 'image':original_im, 'mask':mask} - - return mask - - -class StretchObjectMask(Sensor): - def __init__(self, type: str,noise, uuid: str = "object_mask", **kwargs: Any): - observation_space = gym.spaces.Box( - low=0, high=1, shape=(1,), dtype=np.float32 - ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) - self.type = type - uuid = '{}_{}'.format(uuid, type) - self.noise = noise - self.cache = None - super().__init__(**prepare_locals_for_super(locals())) - - def get_observation( - self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any - ) -> Any: - if env.last_image_changed is False and self.cache is not None: - return self.cache - - # TODO remove - # mask = np.zeros((224, 224, 1)) - return mask - - - img = env.controller.last_event.frame - resized_image = cv2.resize(img, dsize=(224,224)) - global input_received, center_x, center_y - center_x, center_y = -1, -1 - input_received = False - - def normalize_number(x, w): - if x < 0: - x = 0 - if x > w: - x = w - return x - - def clear_plot(): - fig.clear(); fig.clf(); plt.clf(); ax.cla(); fig.clear(True) - def close_plot(): - clear_plot() - plt.close(fig) - plt.close('all') - def onclick(event): - global center_x, center_y, input_received - if event.button == 3: #Right click - center_x = -1 - center_y = -1 - input_received = True - close_plot() - if event.button == 1: #Left click - center_x = event.xdata - center_y = event.ydata - input_received = True - close_plot() - - import matplotlib - matplotlib.use('MacOSX') - fig, ax = plt.subplots() - # clear_plot() - ax.imshow(resized_image) - cid = fig.canvas.mpl_connect('button_press_event', onclick) - plt.show() - fig.canvas.mpl_disconnect(cid) - close_plot() - - - self.window_size = 20 #TODO do I want to change the size of this one maybe? - mask = np.zeros((224, 224, 1)) - if center_y == -1 and center_x == -1: - mask[:,:] = 0. - else: - offset = self.window_size / 2 - object_boundaries = center_x - offset, center_y - offset, center_x + offset, center_y + offset - x1, y1, x2, y2 = [int(normalize_number(i, 224)) for i in object_boundaries] - mask[y1:y2, x1:x2] = 1. - self.cache = mask - return mask - - - -class RealStretchPickedUpObjSensor(Sensor): - def __init__(self, uuid: str = "pickedup_object", **kwargs: Any): - observation_space = gym.spaces.Box( - low=0, high=1, shape=(1,), dtype=np.float32 - ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) - super().__init__(**prepare_locals_for_super(locals())) - - def get_observation( - self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any - ) -> Any: - return False - -class RealKinectArmPointNavEmulSensor(Sensor): - - def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, arm_mask_sensor:Sensor, uuid: str = "arm_point_nav_emul", **kwargs: Any): - observation_space = gym.spaces.Box( - low=0, high=1, shape=(1,), dtype=np.float32 - ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) - self.type = type - self.mask_sensor = mask_sensor - self.depth_sensor = depth_sensor - self.arm_mask_sensor = arm_mask_sensor - uuid = '{}_{}'.format(uuid, type) - - self.min_xyz = np.zeros((3)) - - self.dummy_answer = torch.zeros(3) - self.dummy_answer[:] = 4 # is this good enough? - self.device = torch.device("cpu") - super().__init__(**prepare_locals_for_super(locals())) - def get_camera_int_ext(self,env): - #TODO all these values need to be checked - fov=max(KINECT_FOV_W, KINECT_FOV_H)#TODO are you sure? it should be smaller one I think - agent_state = env.controller.last_event.metadata['agent'] - camera_horizon = 45 - camera_xyz = np.array([agent_state['position'][k] for k in ['x','y','z']]) - camera_rotation = (agent_state['rotation']['y'] + 90) % 360 - return fov, camera_horizon, camera_xyz, camera_rotation - def get_observation( - self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any - ) -> Any: - #TODO remove - if self.type == 'destination': - return self.dummy_answer - mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) #TODO this is called multiple times? - depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1) - - if task.num_steps_taken() == 0: - self.pointnav_history_aggr = [] - self.real_prev_location = None - self.belief_prev_location = None - - - fov, camera_horizon, camera_xyz, camera_rotation = self.get_camera_int_ext(env) - arm_world_coord = None - - if mask.sum() != 0: - - midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) - if not torch.any(torch.isnan(midpoint_agent_coord) + torch.isinf(midpoint_agent_coord)): - self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken())) - - arm_mask = self.arm_mask_sensor.get_observation(env, task, *args, **kwargs) #TODO this is also called twice - if arm_mask.sum() == 0: #Do we want to do some approximations or no? - arm_world_coord = None #TODO approax for this - else: - arm_world_coord = get_mid_point_of_object_from_depth_and_mask(arm_mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) - # distance_in_agent_coord = midpoint_agent_coord - arm_location_in_camera - # result = distance_in_agent_coord.cpu() - # if arm_mask.sum() != 0 or mask.sum() != 0: - # - # import cv2 - # cv2.imwrite('/Users/kianae/Desktop/image.png', env.kinect_frame[:,:,::-1]) - # cv2.imwrite('/Users/kianae/Desktop/mask.png', mask.squeeze().numpy() * 255) - # cv2.imwrite('/Users/kianae/Desktop/arm_mask.png', arm_mask * 255) - result = self.history_aggregation(camera_xyz, camera_rotation, arm_world_coord, task.num_steps_taken()) - return result - def history_aggregation(self, camera_xyz, camera_rotation, arm_world_coord, current_step_number): - if len(self.pointnav_history_aggr) == 0 or arm_world_coord is None: - return self.dummy_answer - else: - weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] - total_weights = sum(weights) - total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] - total_sum = sum(total_sum) - midpoint = total_sum / total_weights - agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) - midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) - midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) - midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']]) - - arm_world_coord = dict(position=dict(x=arm_world_coord[0], y=arm_world_coord[1], z=arm_world_coord[2]), rotation=dict(x=0,y=0,z=0)) - arm_state_agent_coord = convert_world_to_agent_coordinate(arm_world_coord, agent_state) - arm_state_agent_coord = torch.Tensor([arm_state_agent_coord['position'][k] for k in ['x','y','z']]) - - distance_in_agent_coord = midpoint_agent_coord - arm_state_agent_coord - - return distance_in_agent_coord.cpu() - - -class KinectArmMaskSensor(Sensor): - def __init__(self, uuid: str = "arm_mask_kinect", **kwargs: Any): - observation_space = gym.spaces.Box( - low=0, high=1, shape=(1,), dtype=np.float32 - ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) - super().__init__(**prepare_locals_for_super(locals())) - - def get_observation( - self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any - ) -> Any: - kinect_raw_value = env.kinect_raw_frame - mask = get_binary_mask_of_arm(kinect_raw_value) - mask = normalize_real_kinect_image(mask) - return np.expand_dims(mask, axis=-1) - -class RealIntelAgentBodyPointNavEmulSensor(Sensor): - - def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "point_nav_emul", **kwargs: Any): - observation_space = gym.spaces.Box( - low=0, high=1, shape=(1,), dtype=np.float32 - ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) - self.type = type - self.mask_sensor = mask_sensor - self.depth_sensor = depth_sensor - uuid = '{}_{}'.format(uuid, type) - - self.min_xyz = np.zeros((3)) - self.dummy_answer = torch.zeros(3) - self.dummy_answer[:] = 4 # is this good enough? - self.device = torch.device("cpu") - - - super().__init__(**prepare_locals_for_super(locals())) - def get_camera_int_ext(self,env): - #TODO all these values need to be checked - fov=max(INTEL_FOV_W, INTEL_FOV_H) #TODO are you sure? it should be smaller one I think - agent_state = env.controller.last_event.metadata['agent'] - camera_horizon = 0 - camera_xyz = np.array([agent_state['position'][k] for k in ['x','y','z']]) - camera_rotation = agent_state['rotation']['y'] - return fov, camera_horizon, camera_xyz, camera_rotation - - - - - def get_observation( - self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any - ) -> Any: - - mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) - - if task.num_steps_taken() == 0: - self.pointnav_history_aggr = [] - self.real_prev_location = None - self.belief_prev_location = None - - - fov, camera_horizon, camera_xyz, camera_rotation = self.get_camera_int_ext(env) - - - if mask.sum() != 0: - depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1) - middle_of_object = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) - self.pointnav_history_aggr.append((middle_of_object.cpu(), 1, task.num_steps_taken())) - - # result = middle_of_object.cpu() - # else: - # result = self.dummy_answer - result = self.history_aggregation(camera_xyz, camera_rotation, task.num_steps_taken()) - return result - def history_aggregation(self, camera_xyz, camera_rotation, current_step_number): - if len(self.pointnav_history_aggr) == 0: - return self.dummy_answer - else: - weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] - total_weights = sum(weights) - total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] - total_sum = sum(total_sum) - midpoint = total_sum / total_weights - agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) - midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) - midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) - - distance_in_agent_coord = dict(x=midpoint_agent_coord['position']['x'], y=midpoint_agent_coord['position']['y'], z=midpoint_agent_coord['position']['z']) - - agent_centric_middle_of_object = torch.Tensor([distance_in_agent_coord['x'], distance_in_agent_coord['y'], distance_in_agent_coord['z']]) - - agent_centric_middle_of_object = agent_centric_middle_of_object - return agent_centric_middle_of_object +# agent_centric_middle_of_object = agent_centric_middle_of_object +# return agent_centric_middle_of_object diff --git a/utils/stretch_utils/stretch_constants.py b/utils/stretch_utils/stretch_constants.py index 5cdbc06..88ad531 100644 --- a/utils/stretch_utils/stretch_constants.py +++ b/utils/stretch_utils/stretch_constants.py @@ -16,6 +16,7 @@ STRETCH_MANIPULATHOR_COMMIT_ID = '09b6ccf74558395a231927dee8be3b8c83b52ef7' #bigger fov # PROCTHOR_COMMIT_ID = '0eddca46783a788bfce69b146c496d931c981ae4' PROCTHOR_COMMIT_ID = '996a369b5484c7037d3737906be81b84a52473a0' #after the arm destroy bug +UPDATED_PROCTHOR_COMMIT_ID = 'fd95db6135273a8ab2e35d4d350dd556c8ced655' STRETCH_ENV_ARGS = dict( gridSize=0.25, diff --git a/utils/stretch_utils/stretch_ithor_arm_environment.py b/utils/stretch_utils/stretch_ithor_arm_environment.py index 51e3394..237b6e9 100644 --- a/utils/stretch_utils/stretch_ithor_arm_environment.py +++ b/utils/stretch_utils/stretch_ithor_arm_environment.py @@ -14,6 +14,8 @@ from torch.distributions.utils import lazy_property from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment +from ithor_arm.ithor_arm_noise_models import NoiseInMotionHabitatFlavor, NoiseInMotionSimple1DNormal + from utils.stretch_utils.stretch_constants import ( ADITIONAL_ARM_ARGS, PICKUP, DONE, MOVE_AHEAD, ROTATE_RIGHT, ROTATE_LEFT, MOVE_BACK, MOVE_ARM_HEIGHT_P, MOVE_ARM_HEIGHT_M, MOVE_ARM_Z_P, MOVE_ARM_Z_M, MOVE_WRIST_P, MOVE_WRIST_M, MOVE_WRIST_P_SMALL, MOVE_WRIST_M_SMALL, ROTATE_LEFT_SMALL, ROTATE_RIGHT_SMALL, @@ -111,6 +113,20 @@ def __init__( self._always_return_visible_range = False self.simplify_physics = simplify_physics + if 'motion_noise_type' in env_args.keys(): + if env_args['motion_noise_type'] == 'habitat': + self.noise_model = NoiseInMotionHabitatFlavor(**env_args['motion_noise_args']) + elif env_args['motion_noise_type'] == 'simple1d': + self.noise_model = NoiseInMotionSimple1DNormal(**env_args['motion_noise_args']) + else: + print('Unrecognized motion noise model type') + ForkedPdb().set_trace() + else: + self.noise_model = NoiseInMotionHabitatFlavor(effect_scale=0.0) # un-noise model + + self.ahead_nominal = AGENT_MOVEMENT_CONSTANT + self.rotate_nominal = AGENT_ROTATION_DEG + # self.start(None) @@ -121,6 +137,8 @@ def __init__( self.MEMORY_SIZE = 5 # self.memory_frames = [] + self.list_of_actions_so_far = [] + if "quality" not in self.env_args: self.env_args["quality"] = self._quality @@ -185,46 +203,55 @@ def reset( scene_name = self.controller.last_event.metadata["sceneName"] # self.reset_init_params()#**kwargs) removing this fixes one of the crashing problem - # to solve the crash issue - # why do we still have this crashing problem? - try: - reset_environment_and_additional_commands(self.controller, scene_name) - except Exception as e: - print("RESETTING THE SCENE,", scene_name, 'because of', str(e)) - self.controller = self.create_controller() - reset_environment_and_additional_commands(self.controller, scene_name) - - if self.object_open_speed != 1.0: - self.controller.step( - {"action": "ChangeOpenSpeed", "x": self.object_open_speed} - ) - - self._initially_reachable_points = None - self._initially_reachable_points_set = None - self.controller.step({"action": "GetReachablePositions"}) - if not self.controller.last_event.metadata["lastActionSuccess"]: - warnings.warn( - "Error when getting reachable points: {}".format( - self.controller.last_event.metadata["errorMessage"] + if self.env_args['agentMode'] is not 'stretch': + self.controller.reset(scene_name) + elif scene_name=='Procedural': + self.controller.reset() + else: + + # to solve the crash issue + # why do we still have this crashing problem? + try: + reset_environment_and_additional_commands(self.controller, scene_name) + except Exception as e: + print("RESETTING THE SCENE,", scene_name, 'because of', str(e)) + self.controller = self.create_controller() + reset_environment_and_additional_commands(self.controller, scene_name) + + if self.object_open_speed != 1.0: + self.controller.step( + {"action": "ChangeOpenSpeed", "x": self.object_open_speed} ) - ) - self._initially_reachable_points = self.last_action_return + + self._initially_reachable_points = None + self._initially_reachable_points_set = None + self.controller.step({"action": "GetReachablePositions"}) + if not self.controller.last_event.metadata["lastActionSuccess"]: + warnings.warn( + "Error when getting reachable points: {}".format( + self.controller.last_event.metadata["errorMessage"] + ) + ) + self._initially_reachable_points = self.last_action_return self.list_of_actions_so_far = [] + self.noise_model.reset_noise_model() + self.nominal_agent_location = self.get_agent_location() + def check_controller_version(self, controller=None): controller_to_check = controller if controller_to_check is None: controller_to_check = self.controller - if STRETCH_MANIPULATHOR_COMMIT_ID is not None: - assert ( - self.env_args['commit_id'] in controller_to_check._build.url - ), "Build number is not right, {} vs {}, use pip3 install -e git+https://github.com/allenai/ai2thor.git@{}#egg=ai2thor".format( - controller_to_check._build.url, - STRETCH_MANIPULATHOR_COMMIT_ID, - STRETCH_MANIPULATHOR_COMMIT_ID, - ) + # if STRETCH_MANIPULATHOR_COMMIT_ID is not None: + # assert ( + # self.env_args['commit_id'] in controller_to_check._build.url + # ), "Build number is not right, {} vs {}, use pip3 install -e git+https://github.com/allenai/ai2thor.git@{}#egg=ai2thor".format( + # controller_to_check._build.url, + # STRETCH_MANIPULATHOR_COMMIT_ID, + # STRETCH_MANIPULATHOR_COMMIT_ID, + # ) def create_controller(self): assert 'commit_id' in self.env_args @@ -292,6 +319,42 @@ def get_pickupable_objects(self): object_list = event.metadata["arm"]["pickupableObjects"] return object_list + + def update_nominal_location(self, action_dict): + # location = { + # "x": metadata["agent"]["position"]["x"], + # "y": metadata["agent"]["position"]["y"], + # "z": metadata["agent"]["position"]["z"], + # "rotation": metadata["agent"]["rotation"]["y"], + # "horizon": metadata["agent"]["cameraHorizon"], + # "standing": metadata.get("isStanding", metadata["agent"].get("isStanding")), + # } + + curr_loc = self.nominal_agent_location + new_loc = copy.deepcopy(curr_loc) + + if action_dict['action'] is 'RotateLeft': + new_loc["rotation"] = (new_loc["rotation"] - self.rotate_nominal) % 360 + if action_dict['action'] is 'RotateLeftSmall': + new_loc["rotation"] = (new_loc["rotation"] - self.rotate_nominal/5) % 360 + if action_dict['action'] is 'RotateRight': + new_loc["rotation"] = (new_loc["rotation"] + self.rotate_nominal) % 360 + elif action_dict['action'] is 'RotateRightSmall': + new_loc["rotation"] = (new_loc["rotation"] + self.rotate_nominal/5) % 360 + elif action_dict['action'] is 'MoveAhead': + new_loc["x"] += self.ahead_nominal * np.sin(new_loc["rotation"] * np.pi / 180) + new_loc["z"] += self.ahead_nominal * np.cos(new_loc["rotation"] * np.pi / 180) + elif action_dict['action'] is 'MoveBack': + new_loc["x"] -= self.ahead_nominal * np.sin(new_loc["rotation"] * np.pi / 180) + new_loc["z"] -= self.ahead_nominal * np.cos(new_loc["rotation"] * np.pi / 180) + elif action_dict['action'] is 'TeleportFull': + new_loc["x"] = action_dict['x'] + new_loc["y"] = action_dict['y'] + new_loc["z"] = action_dict['z'] + new_loc["rotation"] = action_dict['rotation']['y'] + new_loc["horizon"] = action_dict['horizon'] + + self.nominal_agent_location = new_loc def step( self, action_dict: Dict[str, Union[str, int, float]] @@ -299,6 +362,20 @@ def step( """Take a step in the ai2thor environment.""" action = typing.cast(str, action_dict["action"]) + original_action_dict = copy.deepcopy(action_dict) + + if self.env_args['agentMode']=='locobot' or self.env_args['agentMode']=='default': + sr = self.controller.step(action_dict) + self.list_of_actions_so_far.append(action_dict) + + if self._verbose: + print(self.controller.last_event) + + if self.restrict_to_initially_reachable_points: + self._snap_agent_to_initially_reachable() + + return sr + skip_render = "renderImage" in action_dict and not action_dict["renderImage"] last_frame: Optional[np.ndarray] = None @@ -329,23 +406,87 @@ def step( action_dict = { 'action': 'Pass' } # we have to change the last action success if the pik up fails, we do that in the task now - - elif action in [MOVE_AHEAD, MOVE_BACK, ROTATE_LEFT, ROTATE_RIGHT]: + + elif action in [MOVE_AHEAD, MOVE_BACK, ROTATE_RIGHT, ROTATE_LEFT, ROTATE_RIGHT_SMALL, ROTATE_LEFT_SMALL]: copy_aditions = copy.deepcopy(ADITIONAL_ARM_ARGS) + + # RH: order matters, nominal action happens last action_dict = {**action_dict, **copy_aditions} - if action == MOVE_AHEAD: + # ForkedPdb().set_trace() + if action in [MOVE_AHEAD]: + noise = self.noise_model.get_ahead_drift(self.ahead_nominal) + + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = noise[2] + sr = self.controller.step(action_dict) + + action_dict = {**copy_aditions} + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0] + self.ahead_nominal + action_dict["right"] = noise[1] + + elif action in [MOVE_BACK]: + noise = self.noise_model.get_ahead_drift(self.ahead_nominal) + # TODO revisit - make sense to sample the same for ahead and back? + # inclination is effect matters less than currently unmodeled hysteresis effects + + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = noise[2] + sr = self.controller.step(action_dict) + + action_dict = {**copy_aditions} action_dict["action"] = "MoveAgent" - action_dict["ahead"] = AGENT_MOVEMENT_CONSTANT - elif action == MOVE_BACK: + action_dict["ahead"] = noise[0] - self.ahead_nominal + action_dict["right"] = noise[1] + + + elif action in [ROTATE_RIGHT]: + noise = self.noise_model.get_rotate_drift() action_dict["action"] = "MoveAgent" - action_dict["ahead"] = -AGENT_MOVEMENT_CONSTANT - elif action == ROTATE_RIGHT: + action_dict["ahead"] = noise[0] + action_dict["right"] = noise[1] + sr = self.controller.step(action_dict) + + action_dict = {**copy_aditions} action_dict["action"] = "RotateAgent" - action_dict["degrees"] = AGENT_ROTATION_DEG + action_dict["degrees"] = noise[2] + self.rotate_nominal - elif action == ROTATE_LEFT: + elif action in [ROTATE_LEFT]: + noise = self.noise_model.get_rotate_drift() + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0] + action_dict["right"] = noise[1] + sr = self.controller.step(action_dict) + + action_dict = {**copy_aditions} action_dict["action"] = "RotateAgent" - action_dict["degrees"] = -AGENT_ROTATION_DEG + action_dict["degrees"] = noise[2] - self.rotate_nominal + + elif action in [ROTATE_RIGHT_SMALL]: + # RH: lesser scaling noise is deliberate. Small actions are harder to be accurate + noise = self.noise_model.get_rotate_drift() + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0]/2 + action_dict["right"] = noise[1]/2 + sr = self.controller.step(action_dict) + + action_dict = {**copy_aditions} + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = noise[2]/2 + self.rotate_nominal / 5 + + elif action in [ROTATE_LEFT_SMALL]: + # RH: lesser scaling noise is deliberate. Small actions are harder to be accurate + noise = self.noise_model.get_rotate_drift() + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0]/2 + action_dict["right"] = noise[1]/2 + sr = self.controller.step(action_dict) + + action_dict = {**copy_aditions} + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = noise[2]/2 - self.rotate_nominal / 5 + + elif action in [MOVE_ARM_HEIGHT_P,MOVE_ARM_HEIGHT_M,MOVE_ARM_Z_P,MOVE_ARM_Z_M,]: base_position = get_relative_stretch_current_arm_state(self.controller) change_value = ARM_MOVE_CONSTANT @@ -367,17 +508,16 @@ def step( action_dict = dict(action='RotateWristRelative', yaw=-WRIST_ROTATION / 5) elif action == MOVE_WRIST_M_SMALL: action_dict = dict(action='RotateWristRelative', yaw=WRIST_ROTATION / 5) - elif action == ROTATE_LEFT_SMALL: - action_dict["action"] = "RotateAgent" - action_dict["degrees"] = -AGENT_ROTATION_DEG / 5 - elif action == ROTATE_RIGHT_SMALL: - action_dict["action"] = "RotateAgent" - action_dict["degrees"] = AGENT_ROTATION_DEG / 5 - sr = self.controller.step(action_dict) + sr_nominal = self.controller.step(action_dict) self.list_of_actions_so_far.append(action_dict) + # RH: Nominal location only updates for successful actions. Note that that drift + # action might succeed even if the "main" action fails + if sr_nominal.metadata["lastActionSuccess"]: + self.update_nominal_location(original_action_dict) + if self._verbose: print(self.controller.last_event) @@ -388,4 +528,18 @@ def step( assert last_frame is not None self.last_event.frame = last_frame - return sr + if not sr_nominal.metadata["lastActionSuccess"] and action in [MOVE_AHEAD, MOVE_BACK, ROTATE_RIGHT, ROTATE_LEFT, ROTATE_RIGHT_SMALL, ROTATE_LEFT_SMALL]: + # if the action fails, sample the noise model for a turn + # does this mess up metadata? and is this reasonable? what action failure modes happen in sim vs real? + noise = self.noise_model.get_rotate_drift() + action_dict = {**copy.deepcopy(ADITIONAL_ARM_ARGS)} + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0] + action_dict["right"] = noise[1] + sr = self.controller.step(action_dict) + action_dict = {**copy.deepcopy(ADITIONAL_ARM_ARGS)} + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = noise[2] + sr = self.controller.step(action_dict) + + return sr_nominal diff --git a/utils/stretch_utils/stretch_ithor_arm_environment_minimal.py b/utils/stretch_utils/stretch_ithor_arm_environment_minimal.py new file mode 100644 index 0000000..687685f --- /dev/null +++ b/utils/stretch_utils/stretch_ithor_arm_environment_minimal.py @@ -0,0 +1,178 @@ +import typing +from typing import Dict, Union, Optional +import numpy as np + +import ai2thor.server +from ai2thor.controller import Controller +from allenact.utils.system import get_logger + +from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment +from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment + +from utils.stretch_utils.stretch_constants import ( + DONE, MOVE_AHEAD, ROTATE_RIGHT, ROTATE_LEFT, MOVE_BACK, +) +from scripts.stretch_jupyter_helper import AGENT_MOVEMENT_CONSTANT, AGENT_ROTATION_DEG, remove_nan_inf_for_frames +from utils.stretch_utils.stretch_sim2real_utils import kinect_reshape, intel_reshape + + +class MinimalStretchManipulaTHOREnvironment(ManipulaTHOREnvironment): + + @property + def kinect_frame(self) -> np.ndarray: + """Returns rgb image corresponding to the agent's egocentric view.""" + frame = self.controller.last_event.third_party_camera_frames[0].copy() + frame = remove_nan_inf_for_frames(frame, 'kinect_frame') + return kinect_reshape(frame) + @property + def kinect_depth(self) -> np.ndarray: + """Returns rgb image corresponding to the agent's egocentric view.""" + if self.controller.last_event.third_party_depth_frames[0]: + return None + depth_frame = self.controller.last_event.third_party_depth_frames[0].copy() + depth_frame = remove_nan_inf_for_frames(depth_frame, 'depth_kinect') + + # #TODO remove + if np.sum(depth_frame != self.controller.last_event.third_party_depth_frames[0].copy()) > 10: + raise Exception('Depth is nan again even after removing nan?') + + return kinect_reshape(depth_frame) + + @property + def intel_frame(self) -> np.ndarray: + """Returns rgb image corresponding to the agent's egocentric view.""" + frame = self.controller.last_event.frame.copy() + frame = remove_nan_inf_for_frames(frame, 'intel_frame') + return intel_reshape(frame) + @property + def intel_depth(self) -> np.ndarray: + """Returns rgb image corresponding to the agent's egocentric view.""" + if self.controller.last_event.depth_frame is None: + return None + depth_frame = self.controller.last_event.depth_frame.copy() + depth_frame = remove_nan_inf_for_frames(depth_frame, 'depth_intel') + return intel_reshape(depth_frame) + + def step( + self, action_dict: Dict[str, Union[str, int, float]] + ) -> ai2thor.server.Event: + """Take a step in the ai2thor environment.""" + + action = typing.cast(str, action_dict["action"]) + + skip_render = "renderImage" in action_dict and not action_dict["renderImage"] + last_frame: Optional[np.ndarray] = None + if skip_render: + last_frame = self.current_frame + + if self.simplify_physics: + action_dict["simplifyOPhysics"] = True + if action in [DONE]: + action_dict = { + 'action': 'Pass' + } # we have to change the last action success if the pik up fails, we do that in the task now + + elif action in [MOVE_AHEAD, MOVE_BACK, ROTATE_LEFT, ROTATE_RIGHT]: + if action == MOVE_AHEAD: + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = AGENT_MOVEMENT_CONSTANT + elif action == MOVE_BACK: + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = -AGENT_MOVEMENT_CONSTANT + elif action == ROTATE_RIGHT: + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = AGENT_ROTATION_DEG + + elif action == ROTATE_LEFT: + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = -AGENT_ROTATION_DEG + + + sr = self.controller.step(action_dict) + self.list_of_actions_so_far.append(action_dict) + + if self._verbose: + print(self.controller.last_event) + + if self.restrict_to_initially_reachable_points: + self._snap_agent_to_initially_reachable() + + if skip_render: + assert last_frame is not None + self.last_event.frame = last_frame + + return sr + + def create_controller(self): + assert 'commit_id' in self.env_args, 'No commit id is specified' + controller = Controller(**self.env_args) + return controller + + def start( + self, + scene_name: Optional[str], + move_mag: float = 0.25, + **kwargs, + ) -> None: + """Starts the ai2thor controller if it was previously stopped. + + After starting, `reset` will be called with the scene name and move magnitude. + + # Parameters + + scene_name : The scene to load. + move_mag : The amount of distance the agent moves in a single `MoveAhead` step. + kwargs : additional kwargs, passed to reset. + """ + if self._started: + raise RuntimeError( + "Trying to start the environment but it is already started." + ) + + self.controller = self.create_controller() + + if ( + self._start_player_screen_height, + self._start_player_screen_width, + ) != self.current_frame.shape[:2]: + self.controller.step( + { + "action": "ChangeResolution", + "x": self._start_player_screen_width, + "y": self._start_player_screen_height, + } + ) + + self._started = True + self.reset(scene_name=scene_name, move_mag=move_mag, **kwargs) + + def reset( + self, + scene_name: Optional[str], + move_mag: float = 0.25, + **kwargs, + ): + self._move_mag = move_mag + self._grid_size = self._move_mag + # self.memory_frames = [] + + if scene_name is None: + scene_name = self.controller.last_event.metadata["sceneName"] + + if self.object_open_speed != 1.0: + self.controller.step( + {"action": "ChangeOpenSpeed", "x": self.object_open_speed} + ) + + self._initially_reachable_points = None + self._initially_reachable_points_set = None + self.controller.step({"action": "GetReachablePositions"}) + if not self.controller.last_event.metadata["lastActionSuccess"]: + get_logger().warning( + "Error when getting reachable points: {}".format( + self.controller.last_event.metadata["errorMessage"] + ) + ) + self._initially_reachable_points = self.last_action_return + + self.list_of_actions_so_far = [] \ No newline at end of file diff --git a/utils/procthor_utils/procthor_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py similarity index 64% rename from utils/procthor_utils/procthor_object_nav_tasks.py rename to utils/stretch_utils/stretch_object_nav_tasks.py index e2e334b..13d9ef0 100644 --- a/utils/procthor_utils/procthor_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -1,10 +1,17 @@ +from ast import For +from dis import dis +from email.encoders import encode_noop import random -from collections import Counter from typing import Any, Dict, List, Optional, Tuple, Union from typing_extensions import Literal +import signal +import time + import gym +from importlib_metadata import Lookup import numpy as np +from sklearn.metrics import recall_score import torch from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor @@ -25,9 +32,12 @@ ROTATE_LEFT_SMALL ) from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment -# from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment from manipulathor_utils.debugger_util import ForkedPdb +from datetime import datetime + +def handler(signum, frame): + raise Exception("Controller call took too long") class ObjectNavTask(Task[ManipulaTHOREnvironment]): _actions = ( @@ -86,6 +96,8 @@ def __init__( self.dist_to_target_func = self.min_geo_distance_to_target elif distance_type == "l2": self.dist_to_target_func = self.min_l2_distance_to_target + elif distance_type == "real_world": + self.dist_to_target_func = self.dummy_distance_to_target # maybe placeholder here for estimation later else: raise NotImplementedError @@ -123,7 +135,7 @@ def min_l2_distance_to_target(self) -> float: if min_dist == float("inf"): get_logger().error( f"No target object {self.task_info['object_type']} found" - f" in house {self.task_info['house_name']}." + f" in house {self.task_info['scene_name']}." ) return -1.0 return min_dist @@ -139,7 +151,7 @@ def min_geo_distance_to_target(self) -> float: env=self.env, distance_cache=self.distance_cache, object_id=object_id, - house_name=self.task_info["house_name"], + house_name=self.task_info["scene_name"], ) if (min_dist is None and geo_dist >= 0) or ( geo_dist >= 0 and geo_dist < min_dist @@ -149,6 +161,8 @@ def min_geo_distance_to_target(self) -> float: return -1.0 return min_dist + def dummy_distance_to_target(self) -> float: + return float("inf") def start_visualize(self): for visualizer in self.visualizers: @@ -213,7 +227,40 @@ def _is_goal_in_range(self) -> bool: obj for obj in self.env.last_event.metadata["objects"] if obj["visible"] and obj["objectType"] == self.task_info["object_type"] + and self.dist_to_target_func() < self.task_info['success_distance'] ) + + def calc_action_stat_metrics(self) -> Dict[str, Any]: + action_stat = { + "metric/action_stat/" + action_str: 0.0 for action_str in self._actions + } + action_success_stat = { + "metric/action_success/" + action_str: 0.0 for action_str in self._actions + } + action_success_stat["metric/action_success/total"] = 0.0 + + seq_len = len(self.task_info["taken_actions"]) + + for (action_name, action_success) in list(zip( + self.task_info["taken_actions"], + self.task_info["action_successes"])): + action_stat["metric/action_stat/" + action_name] += 1.0 + + action_success_stat[ + "metric/action_success/{}".format(action_name) + ] += action_success + action_success_stat["metric/action_success/total"] += action_success + + action_success_stat["metric/action_success/total"] /= seq_len + + for action_name in self._actions: + action_success_stat[ + "metric/" + "action_success/{}".format(action_name) + ] /= (action_stat["metric/action_stat/" + action_name] + 0.000001) + action_stat["metric/action_stat/" + action_name] /= seq_len + + result = {**action_stat, **action_success_stat} + return result def shaping(self) -> float: if self.reward_config['shaping_weight'] == 0.0: @@ -235,7 +282,7 @@ def shaping(self) -> float: max_reward_mag = position_dist(p0, p1, ignore_y=True) if ( - self.reward_config.positive_only_reward + self.reward_config['positive_only_reward'] and cur_distance is not None and cur_distance > 0.5 ): @@ -251,7 +298,7 @@ def shaping(self) -> float: min(reward, max_reward_mag), -max_reward_mag, ) - * self.reward_config.shaping_weight + * self.reward_config['shaping_weight'] ) def get_observations(self, **kwargs) -> Any: @@ -292,6 +339,7 @@ def metrics(self) -> Dict[str, Any]: if self.is_done(): result={} # placeholder for future metrics = super().metrics() + metrics = {**metrics, **self.calc_action_stat_metrics()} metrics["dist_to_target"] = self.dist_to_target_func() metrics["total_reward"] = np.sum(self._rewards) metrics["spl"] = spl_metric( @@ -327,8 +375,11 @@ def _step(self, action: int) -> RLStepResult: self.task_info["action_successes"].append(True) else: action_dict = {"action": action_str} - self.env.step(action_dict) - self.last_action_success = bool(self.env.last_event) + sr = self.env.step(action_dict) + # RH NOTE: this is an important change from procthor because of how the noise model + # is implemented. the env step return corresponds to the nominal/called + # action, which may or may not be the last thing the controller did. + self.last_action_success = bool(sr.metadata["lastActionSuccess"]) position = self.env.last_event.metadata["agent"]["position"] self.path.append(position) @@ -390,7 +441,85 @@ class StretchObjectNavTask(ObjectNavTask): DONE, ) -class ExploreWiseObjectNavTask(ObjectNavTask): +class StretchNeckedObjectNavTask(ObjectNavTask): + _actions = ( + MOVE_AHEAD, + # MOVE_BACK, + ROTATE_RIGHT, + ROTATE_LEFT, + "LookUp", + "LookDown", + DONE, + ) + +class StretchNeckedObjectNavTaskUpdateOrder(ObjectNavTask): + # for evaluating weights from Matt + _actions = ( + MOVE_AHEAD, + ROTATE_LEFT, + ROTATE_RIGHT, + DONE, + "LookUp", + "LookDown" + ) + +class StretchObjectNavTaskSegmentationSuccess(StretchObjectNavTask): + def _is_goal_in_range(self) -> bool: + all_kinect_masks = self.env.controller.last_event.third_party_instance_masks[0] + for object_id in self.task_info["target_object_ids"]: + if object_id in all_kinect_masks and self.dist_to_target_func() < self.task_info['success_distance']: + return True + + return False + +class StretchObjectNavTaskSegmentationSuccessActionFail(StretchObjectNavTaskSegmentationSuccess): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.recent_three_strikes = 0 + + def _step(self, action: int) -> RLStepResult: + sr = super()._step(action) + + if self.last_action_success or self._took_end_action: + self.recent_three_strikes = 0 + return sr + elif self.recent_three_strikes < 2: + self.recent_three_strikes += 1 + return sr + else: + # print('Task ended for repeated action failure') + # ForkedPdb().set_trace() + self._took_end_action = True + step_result = RLStepResult( + observation=sr.observation, + reward=sr.reward - self.reward_config['got_stuck_penalty'], + done=self.is_done(), + info={"last_action_success": self.last_action_success, "action": action}, + ) + return step_result + + def judge(self) -> float: + """Judge the last event.""" + reward = self.reward_config['step_penalty'] + if not self.last_action_success: + reward += self.reward_config['failed_action_penalty'] + + reward += self.shaping() + + if self._took_end_action: + if self._success: + reward += self.reward_config['goal_success_reward'] + else: + reward += self.reward_config['failed_stop_reward'] + elif self.num_steps_taken() + 1 >= self.max_steps: + reward += self.reward_config['reached_horizon_reward'] + + self._rewards.append(float(reward)) + return float(reward) + + + +class ExploreWiseObjectNavTask(StretchObjectNavTaskSegmentationSuccessActionFail): def __init__(self, **kwargs): super().__init__(**kwargs) all_locations = [[k['x'], k['y'], k['z']] for k in (self.env.get_reachable_positions())] @@ -400,7 +529,13 @@ def __init__(self, **kwargs): def judge(self) -> float: """Compute the reward after having taken a step.""" - reward = self.reward_configs["step_penalty"] + reward = self.reward_config["step_penalty"] + + # additional scaling step penalty, thresholds at half max steps at the failed action penalty + reward += -0.2 * (1.1)**(np.min([-(self.max_steps/2)+self.num_steps_taken(),0])) + + if not self.last_action_success: + reward += self.reward_config['failed_action_penalty'] current_agent_location = self.env.get_agent_location() current_agent_location = torch.Tensor([current_agent_location['x'], current_agent_location['y'], current_agent_location['z']]) @@ -414,8 +549,8 @@ def judge(self) -> float: self.has_visited[location_index] = 1 if visited_new_place: - reward += self.reward_configs["exploration_reward"] - + reward += self.reward_config["exploration_reward"] + reward += self.shaping() if self._took_end_action: @@ -427,9 +562,77 @@ def judge(self) -> float: reward += self.reward_config['reached_horizon_reward'] self._rewards.append(float(reward)) return float(reward) + + def metrics(self) -> Dict[str, Any]: + result = super(ExploreWiseObjectNavTask, self).metrics() + + if self.is_done(): + result['percent_room_visited'] = self.has_visited.mean().item() + result['new_locations_visited'] = self.has_visited.sum().item() + return result +class RealStretchObjectNavTask(StretchObjectNavTask): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.start_time = datetime.now() + self.last_time = None + signal.signal(signal.SIGALRM, handler) + + def _step(self, action: int) -> RLStepResult: + action_str = self.class_action_names()[action] + print('Model Said', action_str, ' as action ', str(self.num_steps_taken())) + + self.manual_action = False + + # add/remove/adjust to allow graceful exit from auto-battlebots + end_ep_early = False + if self.num_steps_taken() % 10 == 0: + print('verify continue? set end_ep_early=True to gracefully fail out') + ForkedPdb().set_trace() + + self._last_action_str = action_str + action_dict = {"action": action_str} + signal.alarm(8) # second timeout - catch missed server connection. THIS IS NOT THREAD SAFE + try: + self.env.step(action_dict) + except: + print('Controller call took too long, continue to try again or set end_ep_early to fail out instead') + ForkedPdb().set_trace() + self.env.step(action_dict) + + signal.alarm(0) + + if action_str == "Done" or end_ep_early: + self._took_end_action = True + dt_total = (datetime.now() - self.start_time).total_seconds()/60 + print('I think I found a ', self.task_info['object_type'], ' after ', str(dt_total), ' minutes.' ) + print('Was I correct? Set self._success in trace. Default false.') + ForkedPdb().set_trace() + + if self.last_time is not None: + dt = (datetime.now() - self.last_time).total_seconds() + print('FPS: ', str(1/dt)) + self.last_time = datetime.now() + + self.last_action_success = self.env.last_action_success + + last_action_name = self._last_action_str + self.visualize(last_action_name) + + step_result = RLStepResult( + observation=self.get_observations(), + reward=self.judge(), + done=self.is_done(), + info={"last_action_success": self.last_action_success}, + ) + return step_result + + def judge(self) -> float: + """Compute the reward after having taken a step.""" + reward = 0 + return reward diff --git a/utils/stretch_utils/stretch_thor_sensors.py b/utils/stretch_utils/stretch_thor_sensors.py index 4781bbe..f3191b1 100644 --- a/utils/stretch_utils/stretch_thor_sensors.py +++ b/utils/stretch_utils/stretch_thor_sensors.py @@ -25,12 +25,13 @@ # from ithor_arm.ithor_arm_environment import StretchManipulaTHOREnvironment # from ithor_arm.ithor_arm_sensors import DepthSensorThor # from ithor_arm.near_deadline_sensors import calc_world_coordinates -from utils.calculation_utils import calc_world_coordinates +from utils.calculation_utils import calc_world_coordinates, calc_world_xyz_from_agent_relative, get_mid_point_of_object_from_depth_and_mask from manipulathor_utils.debugger_util import ForkedPdb from utils.noise_in_motion_util import squeeze_bool_mask from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment from utils.stretch_utils.stretch_sim2real_utils import kinect_reshape, intel_reshape +from scripts.stretch_jupyter_helper import get_relative_stretch_current_arm_state @@ -428,6 +429,227 @@ def average_so_far(self, camera_xyz, camera_rotation, arm_state, current_step_nu return agent_centric_middle_of_object + + +class AgentBodyPointNavEmulSensorDeadReckoning(Sensor): + + def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "point_nav_emul", **kwargs: Any): + observation_space = gym.spaces.Box( + low=0, high=1, shape=(1,), dtype=np.float32 + ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) + self.type = type + self.mask_sensor = mask_sensor + self.depth_sensor = depth_sensor + uuid = '{}_{}'.format(uuid, type) + + self.min_xyz = np.zeros((3)) + self.dummy_answer = torch.zeros(3) + self.dummy_answer[:] = 4 # is this good enough? + self.device = torch.device("cpu") + + super().__init__(**prepare_locals_for_super(locals())) + + + def get_relative_nominal_locations(self,env): + if len(env.controller.last_event.metadata['thirdPartyCameras']) != 1: + print('Warning multiple cameras') + + true_base = env.get_agent_location() + true_base = copy.deepcopy(dict(position=dict(x=true_base['x'], y=true_base['y'],z=true_base['z']), + rotation=dict(x=0,y=true_base['rotation'], z=0))) + + nominal_base = env.nominal_agent_location + nominal_base_xyz = np.array([nominal_base['x'],nominal_base['y'],nominal_base['z']]) + nominal_base = copy.deepcopy(dict(position=dict(x=nominal_base['x'], y=nominal_base['y'],z=nominal_base['z']), + rotation=dict(x=0,y=nominal_base['rotation'], z=0))) + + m = copy.deepcopy(env.controller.last_event.metadata) + + wrist = env.get_absolute_hand_state() + relative_wrist = convert_world_to_agent_coordinate(wrist,true_base) + + camera1 = m["cameraPosition"] + camera1 = dict(position=dict(x=camera1['x'], y=camera1['y'],z=camera1['z']), rotation=dict(x=0,y=0, z=0)) + relative_camera_1 = convert_world_to_agent_coordinate(camera1,true_base) + + camera2 = m['thirdPartyCameras'][0] + relative_camera_2 = convert_world_to_agent_coordinate(camera2,true_base) + + relative_list = [relative_wrist,relative_camera_1,relative_camera_2] + nominal_list = calc_world_xyz_from_agent_relative(relative_list,nominal_base_xyz,nominal_base['rotation']['y'],self.device) + + return dict(agent=nominal_base, wrist=nominal_list[0],camera1=nominal_list[1],camera2=nominal_list[2]) + + def get_agent_belief_state(self,env): + fov = env.controller.last_event.metadata['fov'] # assumption: real one is fine here + real_agent_state = env.get_agent_location() + nominal_positions = self.get_relative_nominal_locations(env) + + belief_camera_horizon = env.controller.last_event.metadata["agent"]["cameraHorizon"] # assumption: real one is fine here + belief_camera_xyz = np.array([nominal_positions['camera1']['position'][k] for k in ['x','y','z']]) + belief_camera_rotation = (nominal_positions['agent']['rotation']['y']) % 360 + + real_camera_xyz = np.array([real_agent_state[k] for k in ['x','y','z']]) + + self.belief_prev_location.append(belief_camera_xyz) + self.real_prev_location.append(real_camera_xyz) + arm_state = nominal_positions['wrist']# get_relative_stretch_current_arm_state(env.controller) + + return fov, belief_camera_horizon, belief_camera_xyz, belief_camera_rotation, arm_state + + def get_observation( + self, env: StretchManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any + ) -> Any: + + mask = self.mask_sensor.get_observation(env, task, *args, **kwargs) + depth_frame = self.depth_sensor.get_observation(env, task, *args, **kwargs) + if task.num_steps_taken() == 0: + self.pointnav_history_aggr = [] + self.real_prev_location = [] + self.belief_prev_location = [] + + fov, camera_horizon, camera_xyz, camera_rotation, arm_state = self.get_agent_belief_state(env) + + if mask.sum() != 0: + midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) + self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken())) + + return self.history_aggregation(camera_xyz, camera_rotation, task.num_steps_taken()) + + def history_aggregation(self, camera_xyz, camera_rotation, current_step_number): + if len(self.pointnav_history_aggr) == 0: + return self.dummy_answer + else: + weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] + total_weights = sum(weights) + total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] + total_sum = sum(total_sum) + midpoint = total_sum / total_weights + agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) + midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) + midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) + midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']]) + + return midpoint_agent_coord.cpu() + + +class ArmPointNavEmulSensorDeadReckoning(Sensor): + + def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "arm_point_nav_emul", **kwargs: Any): + observation_space = gym.spaces.Box( + low=0, high=1, shape=(1,), dtype=np.float32 + ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) + self.type = type + self.mask_sensor = mask_sensor + self.depth_sensor = depth_sensor + uuid = '{}_{}'.format(uuid, type) + + self.min_xyz = np.zeros((3)) + + self.dummy_answer = torch.zeros(3) + self.dummy_answer[:] = 4 # is this good enough? + self.device = torch.device("cpu") + super().__init__(**prepare_locals_for_super(locals())) + + def get_relative_nominal_locations(self,env): + if len(env.controller.last_event.metadata['thirdPartyCameras']) != 1: + print('Warning multiple cameras') + + true_base = env.get_agent_location() + true_base = copy.deepcopy(dict(position=dict(x=true_base['x'], y=true_base['y'],z=true_base['z']), + rotation=dict(x=0,y=true_base['rotation'], z=0))) + + nominal_base = env.nominal_agent_location + nominal_base_xyz = np.array([nominal_base['x'],nominal_base['y'],nominal_base['z']]) + nominal_base = copy.deepcopy(dict(position=dict(x=nominal_base['x'], y=nominal_base['y'],z=nominal_base['z']), + rotation=dict(x=0,y=nominal_base['rotation'], z=0))) + + m = copy.deepcopy(env.controller.last_event.metadata) + + wrist = env.get_absolute_hand_state() + relative_wrist = convert_world_to_agent_coordinate(wrist,true_base) + + camera1 = m["cameraPosition"] + camera1 = dict(position=dict(x=camera1['x'], y=camera1['y'],z=camera1['z']), rotation=dict(x=0,y=0, z=0)) + relative_camera_1 = convert_world_to_agent_coordinate(camera1,true_base) + + camera2 = m['thirdPartyCameras'][0] + relative_camera_2 = convert_world_to_agent_coordinate(camera2,true_base) + + relative_list = [relative_wrist,relative_camera_1,relative_camera_2] + nominal_list = calc_world_xyz_from_agent_relative(relative_list,nominal_base_xyz,nominal_base['rotation']['y'],self.device) + + return dict(agent=nominal_base, wrist=nominal_list[0],camera1=nominal_list[1],camera2=nominal_list[2]) + + def get_agent_belief_state(self,env): + real_agent_state = env.get_agent_location() + nominal_positions = self.get_relative_nominal_locations(env) + + + belief_camera_xyz = np.array([nominal_positions['camera2']['position'][k] for k in ['x','y','z']]) + belief_camera_rotation = (nominal_positions['agent']['rotation']['y'] + 90) % 360 + + real_camera_xyz = np.array([real_agent_state[k] for k in ['x','y','z']]) + + metadata = copy.deepcopy(env.controller.last_event.metadata['thirdPartyCameras'][0]) + belief_camera_horizon = metadata['rotation']['x'] # assumption: real one is fine here + fov = metadata['fieldOfView'] # assumption: real one is fine here + assert abs(metadata['rotation']['z'] - 0) < 0.1 #TODO: why + + self.belief_prev_location.append(belief_camera_xyz) + self.real_prev_location.append(real_camera_xyz) + arm_state = nominal_positions['wrist']# get_relative_stretch_current_arm_state(env.controller) + + return fov, belief_camera_horizon, belief_camera_xyz, belief_camera_rotation, arm_state + + def get_observation( + self, env: StretchManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any + ) -> Any: + + mask = self.mask_sensor.get_observation(env, task, *args, **kwargs) + depth_frame = self.depth_sensor.get_observation(env, task, *args, **kwargs) + + # catch rare NaN error where tiny mask is lost in 0 missing values + squeeze_mask = squeeze_bool_mask(mask) + depth_frame_masked = depth_frame.copy() + depth_frame_masked[~squeeze_mask] = -1 + depth_frame_masked[depth_frame_masked == 0] = -1 + + if task.num_steps_taken() == 0: + self.pointnav_history_aggr = [] + self.real_prev_location = [] + self.belief_prev_location = [] + + fov, camera_horizon, camera_xyz, camera_rotation, arm_state = self.get_agent_belief_state(env) + + if depth_frame_masked[depth_frame_masked>0].sum() != 0: + midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) + self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken())) + + return self.history_aggregation(camera_xyz, camera_rotation, arm_state, task.num_steps_taken()) + + def history_aggregation(self, camera_xyz, camera_rotation, arm_world_coord, current_step_number): + if len(self.pointnav_history_aggr) == 0: + return self.dummy_answer + else: + weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] + total_weights = sum(weights) + total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] + total_sum = sum(total_sum) + midpoint = total_sum / total_weights + agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) + midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) + midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) + midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']]) + + arm_agent_coord = convert_world_to_agent_coordinate(arm_world_coord, agent_state) + arm_state_agent_coord = torch.Tensor([arm_agent_coord['position'][k] for k in ['x','y','z']]) + + distance_in_agent_coord = midpoint_agent_coord - arm_state_agent_coord + + return distance_in_agent_coord.cpu() + + # TODO we have to rewrite the noisy movement experiment ones? # class AgentGTLocationSensor(Sensor): # diff --git a/utils/stretch_utils/stretch_visualizer.py b/utils/stretch_utils/stretch_visualizer.py index d87969d..2ab6ea6 100644 --- a/utils/stretch_utils/stretch_visualizer.py +++ b/utils/stretch_utils/stretch_visualizer.py @@ -183,25 +183,18 @@ def finish_episode(self, environment, episode_info, task_info): source_object_id = task_info["target_object_ids"][0] episode_success = episode_info._success - # Put back if you want the images - # for i, img in enumerate(self.log_queue): - # image_dir = os.path.join(self.log_dir, time_to_write + '_seq{}.png'.format(str(i))) - # cv2.imwrite(image_dir, img[:,:,[2,1,0]]) - episode_success_offset = "succ" if episode_success else "fail" source_obj_type = task_info['object_type'] if source_obj_type == 'small': source_obj_type = task_info['object_type'] - # goal_obj_type = task_info['goal_object_type'] room_name = task_info['scene_name'] gif_name = ( f"{time_to_write}_room_{room_name}_to_{source_obj_type}_episode_{episode_success_offset}.gif" ) - # ForkedPdb().set_trace() - self.log_queue = put_action_on_image(self.log_queue, self.action_queue[1:]) + # self.log_queue = put_action_on_image(self.log_queue, self.action_queue[1:]) addition_texts = ['xxx'] + [str(x) for x in episode_info.agent_body_dist_to_obj] if not addition_texts == ['xxx']: self.log_queue = put_additional_text_on_image(self.log_queue, addition_texts) @@ -209,18 +202,12 @@ def finish_episode(self, environment, episode_info, task_info): concat_all_images = np.expand_dims(np.stack(self.log_queue, axis=0), axis=1) save_image_list_to_gif(concat_all_images, gif_name, self.log_dir) this_controller = environment.controller - scene = this_controller.last_event.metadata[ - "sceneName" - ] - reset_environment_and_additional_commands(this_controller, scene) - - additional_observation_start = [] - additional_observation_goal = [] - if 'target_object_mask' in episode_info.get_observations(): - additional_observation_start.append('target_object_mask') - if 'target_location_mask' in episode_info.get_observations(): - additional_observation_goal.append('target_location_mask') + if room_name == 'RealRobothor': + scene = None # reset doesn't do anything + else: + scene = this_controller.last_event.metadata["sceneName"] + # reset_environment_and_additional_commands(this_controller, scene) # RH: why is there a reset in the visualizer? self.log_queue = [] self.action_queue = [] @@ -229,22 +216,21 @@ def finish_episode(self, environment, episode_info, task_info): def log(self, environment, action_str, obs): image_intel = environment.intel_frame - depth_intel = environment.intel_depth try: - image_kinect = environment.kinect_frame - depth_kinect = environment.kinect_depth - combined_frame = np.concatenate([image_intel, image_kinect, depth_to_rgb(depth_intel), depth_to_rgb(depth_kinect)],axis=1) - except: - if depth_intel is None or depth_kinect is None: - combined_frame = np.concatenate([image_intel, image_kinect],axis=1) - else: + depth_intel = environment.intel_depth + try: + image_kinect = environment.kinect_frame + depth_kinect = environment.kinect_depth + combined_frame = np.concatenate([image_intel, image_kinect, depth_to_rgb(depth_intel), depth_to_rgb(depth_kinect)],axis=1) + except: combined_frame = np.concatenate([image_intel, depth_to_rgb(depth_intel)],axis=1) + except: + combined_frame = np.concatenate([image_intel],axis=1) # image_intel = intel_reshape(image_intel) # kinect_frame = kinect_reshape(kinect_frame) self.action_queue.append(action_str) - # combined_frame = np.concatenate([image_intel, image_kinect, depth_to_rgb(depth_intel), depth_to_rgb(depth_kinect)],axis=1) for o in obs: if ("rgb" in o) and isinstance(obs[o], np.ndarray): viz_obs = unnormalize_clip_image(obs[o])*255