diff --git a/dev_requirements.txt b/dev_requirements.txt
new file mode 100644
index 0000000..d8a701d
--- /dev/null
+++ b/dev_requirements.txt
@@ -0,0 +1,186 @@
+absl-py==0.12.0
+addict==2.4.0
+# ai2thor==0+dc0f9ecd8672dc2d62651f567ff95c63f3542332
+aiohttp==3.8.1
+aiosignal==1.2.0
+allenact==0.4.0
+allenact-plugins==0.4.0
+antlr4-python3-runtime==4.8
+appdirs==1.4.4
+appnope==0.1.2
+argon2-cffi==20.1.0
+astunparse==1.6.3
+async-generator==1.10
+async-timeout==4.0.2
+asynctest==0.13.0
+attrs==20.3.0
+aws-requests-auth==0.4.3
+backcall==0.2.0
+black==21.4b2
+bleach==3.3.0
+botocore==1.20.67
+cachetools==4.2.2
+certifi==2020.12.5
+cffi==1.14.5
+chardet==4.0.0
+charset-normalizer==2.0.12
+click==7.1.2
+clip @ git+https://github.com/openai/CLIP.git@04f4dc2ca1ed0acc9893bd1a3b526a7e02c4bb10
+cloudpickle==1.6.0
+cmake==3.18.4.post1
+colour==0.1.5
+cycler==0.10.0
+Cython==0.29.23
+dataclasses>0.5
+datasets==1.18.4
+decorator==4.4.2
+defusedxml==0.7.1
+# detectron2 @ git+https://github.com/facebookresearch/detectron2.git@d285dea0a58675217b061f48182dc031d2e3b677
+dill==0.3.4
+entrypoints==0.3
+filelock==3.0.12
+Flask==1.1.2
+flatbuffers==1.12
+frozenlist==1.2.0
+fsspec==2022.1.0
+ftfy==6.0.3
+future==0.18.2
+fvcore==0.1.5.post20210609
+gast==0.3.3
+gin==0.1.6
+gin-config==0.4.0
+google-auth==1.30.0
+google-auth-oauthlib==0.4.4
+google-pasta==0.2.0
+grpcio==1.32.0
+gym==0.17.3
+h5py==2.10.0
+huggingface-hub==0.4.0
+hydra-core==1.1.0
+idna==2.10
+idna-ssl==1.1.0
+imageio==2.9.0
+imageio-ffmpeg==0.4.3
+importlib-metadata==4.0.1
+importlib-resources==5.1.4
+iopath==0.1.8
+ipykernel==5.5.4
+ipython==7.16.1
+ipython-genutils==0.2.0
+ipywidgets==7.6.3
+itsdangerous==1.1.0
+jedi==0.17.0
+Jinja2==2.11.3
+jmespath==0.10.0
+joblib==1.1.0
+jsonschema==3.2.0
+jupyter==1.0.0
+jupyter-client==6.1.12
+jupyter-console==6.4.0
+jupyter-core==4.7.1
+jupyterlab-pygments==0.1.2
+jupyterlab-widgets==1.0.0
+Keras-Preprocessing==1.1.2
+kiwisolver==1.3.1
+llvmlite==0.36.0
+Markdown==3.3.4
+MarkupSafe==1.1.1
+matplotlib==3.3.4
+mistune==0.8.4
+moviepy==1.0.3
+msgpack==1.0.2
+multidict==5.2.0
+multiprocess==0.70.12.2
+mypy-extensions==0.4.3
+nbclient==0.5.3
+nbconvert==6.0.7
+nbformat==5.1.3
+nest-asyncio==1.5.1
+networkx==2.5.1
+notebook==6.3.0
+numba==0.53.1
+numpy==1.19.5
+numpy-quaternion==2021.4.5.14.42.35
+oauthlib==3.1.0
+omegaconf==2.1.0
+open3d==0.13.0
+opencv-contrib-python==4.5.5.64
+opt-einsum==3.3.0
+packaging==20.9
+pandas==1.1.5
+pandocfilters==1.4.3
+parso==0.8.2
+pathspec==0.8.1
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==8.2.0
+portalocker==2.3.0
+proglog==0.1.9
+progressbar2==3.53.1
+prometheus-client==0.10.1
+prompt-toolkit==3.0.18
+protobuf==3.15.8
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycocotools==2.0.2
+pycparser==2.20
+pydot==1.4.2
+pyglet==1.5.0
+Pygments==2.9.0
+pyparsing==2.4.7
+pyquaternion==0.9.9
+pyrsistent==0.17.3
+python-dateutil==2.8.1
+python-utils==2.5.6
+python-xlib==0.29
+pytz==2021.3
+PyWavelets==1.1.1
+PyYAML==5.4.1
+pyzmq==22.0.3
+qtconsole==5.1.0
+QtPy==1.9.0
+regex==2021.4.4
+requests==2.25.1
+requests-oauthlib==1.3.0
+responses==0.17.0
+rsa==4.7.2
+scikit-image==0.17.2
+scikit-learn==0.24.2
+scipy==1.5.4
+Send2Trash==1.5.0
+setproctitle==1.2.2
+Shapely==1.8.1.post1
+six==1.15.0
+tabulate==0.8.9
+tensorboard==2.5.0
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboardX==2.2
+tensorflow==2.4.1
+tensorflow-estimator==2.4.0
+termcolor==1.1.0
+terminado==0.9.4
+testpath==0.4.4
+threadpoolctl==3.0.0
+tifffile==2020.9.3
+toml==0.10.2
+torch==1.8.1
+torchaudio==0.8.1
+torchvision==0.9.1
+tornado==6.1
+tqdm==4.63.0
+traitlets==4.3.3
+typed-ast==1.4.3
+typing-extensions==3.7.4.3
+urllib3==1.26.4
+wcwidth==0.2.5
+webencodings==0.5.1
+Werkzeug==1.0.1
+widgetsnbextension==3.5.1
+wrapt==1.12.1
+xxhash==3.0.0
+yacs==0.1.8
+yarl==1.7.2
+zipp==3.4.1
diff --git a/ithor_arm/ithor_arm_environment.py b/ithor_arm/ithor_arm_environment.py
index c15cfea..004c8c9 100644
--- a/ithor_arm/ithor_arm_environment.py
+++ b/ithor_arm/ithor_arm_environment.py
@@ -1,5 +1,6 @@
 """A wrapper for engaging with the ManipulaTHOR environment."""
 
+from ast import For
 import copy
 import math
 import typing
@@ -7,11 +8,15 @@
 from typing import Tuple, Dict, List, Set, Union, Any, Optional
 
 import ai2thor.server
+from cv2 import USAGE_DEFAULT
 import numpy as np
 from ai2thor.controller import Controller
 from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV
 from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
 
+from ithor_arm.ithor_arm_noise_models import NoiseInMotionHabitatFlavor, NoiseInMotionSimple1DNormal
+from ithor_arm.arm_calculation_utils import convert_world_to_agent_coordinate
+
 from ithor_arm.ithor_arm_constants import (
     ADITIONAL_ARM_ARGS,
     ARM_MIN_HEIGHT,
@@ -56,6 +61,7 @@ def __init__(
             verbose: bool = False,
             env_args=None,
     ) -> None:
+
         """Initializer.
 
         # Parameters
@@ -84,6 +90,9 @@ def __init__(
         simplify_physics : Whether or not to simplify physics when applicable. Currently this only simplies object
             interactions when opening drawers (when simplified, objects within a drawer do not slide around on
             their own when the drawer is opened or closed, instead they are effectively glued down).
+        *_noise_meta_dist_params : [mean, variance] defines the normal distribution over which the actual noise parameters
+            for a motion noise distribution will be drawn. Distributions for noise in motion will be re-rolled every scene 
+            reset with new bias and variance values drawn from these meta-distributions.
         """
 
         self._start_player_screen_width = player_screen_width
@@ -112,6 +121,21 @@ def __init__(
         self._always_return_visible_range = False
         self.simplify_physics = simplify_physics
 
+        if 'motion_noise_type' in env_args.keys():
+            if env_args['motion_noise_type'] == 'habitat':
+                self.noise_model = NoiseInMotionHabitatFlavor(**env_args['motion_noise_args'])
+            elif env_args['motion_noise_type'] == 'simple1d':
+                self.noise_model = NoiseInMotionSimple1DNormal(**env_args['motion_noise_args'])
+            else:
+                print('Unrecognized motion noise model type')
+                ForkedPdb().set_trace()
+        else:
+            self.noise_model = NoiseInMotionHabitatFlavor(effect_scale=0.0) # un-noise model
+
+        self.ahead_nominal = 0.2
+        self.rotate_nominal = 45
+
+
         self.start(None)
         # self.check_controller_version()
 
@@ -124,6 +148,16 @@ def __init__(
             self.env_args["quality"] = self._quality
         # self.memory_frames = []
 
+    def check_controller_version(self):
+        if MANIPULATHOR_COMMIT_ID is not None:
+            assert (
+                    MANIPULATHOR_COMMIT_ID in self.controller._build.url
+            ), "Build number is not right, {} vs {}, use  pip3 install -e git+https://github.com/allenai/ai2thor.git@{}#egg=ai2thor".format(
+                self.controller._build.url,
+                MANIPULATHOR_COMMIT_ID,
+                MANIPULATHOR_COMMIT_ID,
+            )
+
     # def check_controller_version(self):
     #     if MANIPULATHOR_COMMIT_ID is not None:
     #         assert (
@@ -222,6 +256,10 @@ def reset(
 
         self.list_of_actions_so_far = []
 
+        self.noise_model.reset_noise_model()
+        self.nominal_agent_location = self.get_agent_location()
+
+
     def randomize_agent_location(
             self, seed: int = None, partial_position: Optional[Dict[str, float]] = None
     ) -> Dict:
@@ -299,6 +337,36 @@ def get_absolute_hand_state(self):
         xyz_dict = self.correct_nan_inf(xyz_dict, "absolute hand")
         return dict(position=xyz_dict, rotation={"x": 0, "y": 0, "z": 0})
 
+    
+    def update_nominal_location(self, action_dict):
+        # location = {
+        #     "x": metadata["agent"]["position"]["x"],
+        #     "y": metadata["agent"]["position"]["y"],
+        #     "z": metadata["agent"]["position"]["z"],
+        #     "rotation": metadata["agent"]["rotation"]["y"],
+        #     "horizon": metadata["agent"]["cameraHorizon"],
+        #     "standing": metadata.get("isStanding", metadata["agent"].get("isStanding")),
+        # }
+
+        curr_loc = self.nominal_agent_location
+        new_loc = copy.deepcopy(curr_loc)
+
+        if action_dict['action'] is 'RotateLeft':
+            new_loc["rotation"] = (new_loc["rotation"] - self.rotate_nominal) % 360
+        elif action_dict['action'] is 'RotateRight':
+            new_loc["rotation"] = (new_loc["rotation"] + self.rotate_nominal) % 360
+        elif action_dict['action'] is 'MoveAhead':
+            new_loc["x"] += self.ahead_nominal * np.sin(new_loc["rotation"] * np.pi / 180)
+            new_loc["z"] += self.ahead_nominal * np.cos(new_loc["rotation"] * np.pi / 180)
+        elif action_dict['action'] is 'TeleportFull':
+            new_loc["x"] = action_dict['x']
+            new_loc["y"] = action_dict['y']
+            new_loc["z"] = action_dict['z']
+            new_loc["rotation"] = action_dict['rotation']['y']
+            new_loc["horizon"] = action_dict['horizon']
+        
+        self.nominal_agent_location = new_loc
+
     def get_pickupable_objects(self):
 
         event = self.controller.last_event
@@ -361,6 +429,7 @@ def step(
     ) -> ai2thor.server.Event:
         """Take a step in the ai2thor environment."""
         action = typing.cast(str, action_dict["action"])
+        original_action_dict = copy.deepcopy(action_dict)
 
         skip_render = "renderImage" in action_dict and not action_dict["renderImage"]
         last_frame: Optional[np.ndarray] = None
@@ -396,18 +465,41 @@ def step(
 
             copy_aditions = copy.deepcopy(ADITIONAL_ARM_ARGS)
 
+            # RH: order matters, nominal action happens last
             action_dict = {**action_dict, **copy_aditions}
             if action in [MOVE_AHEAD]:
+                noise = self.noise_model.get_ahead_drift(self.ahead_nominal)
+
+                action_dict["action"] = "RotateAgent"
+                action_dict["degrees"] = noise[2]
+                sr = self.controller.step(action_dict)
+
+                action_dict = dict()
                 action_dict["action"] = "MoveAgent"
-                action_dict["ahead"] = 0.2
+                action_dict["ahead"] = self.ahead_nominal + noise[0]
+                action_dict["right"] = noise[1]
 
             elif action in [ROTATE_RIGHT]:
+                noise = self.noise_model.get_rotate_drift()
+                action_dict["action"] = "MoveAgent"
+                action_dict["ahead"] = noise[0]
+                action_dict["right"] = noise[1]
+                sr = self.controller.step(action_dict)
+
+                action_dict = dict()
                 action_dict["action"] = "RotateAgent"
-                action_dict["degrees"] = 45
+                action_dict["degrees"] = noise[2] + self.rotate_nominal
 
             elif action in [ROTATE_LEFT]:
+                noise = self.noise_model.get_rotate_drift()
+                action_dict["action"] = "MoveAgent"
+                action_dict["ahead"] = noise[0]
+                action_dict["right"] = noise[1]
+                sr = self.controller.step(action_dict)
+
+                action_dict = dict()
                 action_dict["action"] = "RotateAgent"
-                action_dict["degrees"] = -45
+                action_dict["degrees"] = noise[2] - self.rotate_nominal
 
         elif "MoveArm" in action:
             copy_aditions = copy.deepcopy(ADITIONAL_ARM_ARGS)
@@ -441,10 +533,13 @@ def step(
                     k: v for (k, v) in base_position.items() if k in ["x", "y", "z"]
                 }
 
-
-
         sr = self.controller.step(action_dict)
         self.list_of_actions_so_far.append(action_dict)
+        
+        # RH: Nominal location only updates for successful actions. Note that that drift 
+        # action might succeed even if the "main" action fails
+        if sr.metadata["lastActionSuccess"]:
+            self.update_nominal_location(original_action_dict)
 
         # if action in SET_OF_ALL_AGENT_ACTIONS:
         #     self.update_memory()
diff --git a/ithor_arm/ithor_arm_noise_models.py b/ithor_arm/ithor_arm_noise_models.py
new file mode 100644
index 0000000..a07f044
--- /dev/null
+++ b/ithor_arm/ithor_arm_noise_models.py
@@ -0,0 +1,120 @@
+"""Utility classes and functions for noise models used by the platforms."""
+from typing import Tuple, Dict, List, Set, Union, Any, Optional
+
+from utils.noise_from_habitat import MotionNoiseModel, _TruncatedMultivariateGaussian
+import numpy as np
+
+class NoiseInMotionHabitatFlavor:
+    # Sample usage in __init__ for a config file:
+        # self.ENV_ARGS['motion_noise_type'] = 'habitat'
+        # self.ENV_ARGS['motion_noise_args'] = dict()
+        # self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1]
+        # self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0,0,0,0,0,0,0]
+        # self.ENV_ARGS['motion_noise_args']['effect_scale'] = 1
+        
+    def __init__(self,
+            multiplier_means: List[float] = [1,1,1,1,1,1],
+            multiplier_sigmas: List[float] = [0,0,0,0,0,0,0],
+            effect_scale: float = 1,
+        ) -> None:
+
+        self.multiplier_means = multiplier_means
+        self.multiplier_sigmas = multiplier_sigmas
+        self.effect_scale = effect_scale
+        
+        self.ahead = None
+        self.rotate = None
+
+        self.reset_noise_model()
+    
+    def generate_linear_submodel(self,m1,m2,m3,s1,s2,s3):
+        return MotionNoiseModel(
+            _TruncatedMultivariateGaussian([m1 * 0.074, m2 * 0.036], [s1 * 0.019, s2 * 0.033]),
+            _TruncatedMultivariateGaussian([m3 * 0.189], [s3 * 0.038]))
+        
+    def generate_rotational_submodel(self,m1,m2,m3,s1,s2,s3):
+        return MotionNoiseModel(
+            _TruncatedMultivariateGaussian([m1 * 0.002, m2 * 0.003], [s1 * 0.0, s2 * 0.002]),
+            _TruncatedMultivariateGaussian([m3 * 0.219], [s3 * 0.019]))
+    
+    def generate_model_multipliers(self):
+        return [np.random.normal(self.multiplier_means[i],self.multiplier_sigmas[i]) 
+                for i in range(len(self.multiplier_means))]
+    
+    def reset_noise_model(self):
+        model_multipliers = self.generate_model_multipliers()
+        
+        self.ahead = self.generate_linear_submodel(*model_multipliers)
+        self.rotate = self.generate_rotational_submodel(*model_multipliers)
+    
+    def get_ahead_drift(self,*_):
+        # returns [ahead change, left/right change, rot change] from an ahead command in [m,m,deg]
+        
+        if self.effect_scale == 0: #speedup for trivial case
+            return [0,0,0]
+        
+        rotation_drift = self.effect_scale * self.ahead.rotation.sample()
+        linear_drift = self.effect_scale * self.ahead.linear.sample()
+        return [linear_drift[0], linear_drift[1], rotation_drift[0] * 180 / np.pi]
+
+    def get_rotate_drift(self):
+        # returns [ahead change, left/right change, rot change] from an ahead command in [m,m,deg]
+
+        if self.effect_scale == 0: #speedup for trivial case
+            return [0,0,0]
+        
+        rotation_drift = self.effect_scale * self.rotate.rotation.sample()
+        linear_drift = self.effect_scale * self.rotate.linear.sample()
+
+        return [linear_drift[0], linear_drift[1], rotation_drift[0] * 180 / np.pi]
+
+
+class NoiseInMotionSimple1DNormal:
+    # Sample usage in __init__ for a config file:
+        # self.ENV_ARGS['motion_noise_type'] = 'simple1d'
+        # self.ENV_ARGS['motion_noise_args'] = dict()
+        # self.ENV_ARGS['motion_noise_args']['ahead_noise_meta_dist_params'] = {'bias_dist': [0,0.04], 'variance_dist': [0,0.10]}
+        # self.ENV_ARGS['motion_noise_args']['lateral_noise_meta_dist_params'] = {'bias_dist': [0,0.04], 'variance_dist': [0,0.04]}
+        # self.ENV_ARGS['motion_noise_args']['turning_noise_meta_dist_params'] = {'bias_dist': [0,10], 'variance_dist': [0,10]}
+
+    def __init__(self,
+            ahead_noise_meta_dist_params: Dict = {'bias_dist': [0,0], 'variance_dist': [0,0]},
+            lateral_noise_meta_dist_params: Dict= {'bias_dist': [0,0], 'variance_dist': [0,0]},
+            turning_noise_meta_dist_params: Dict = {'bias_dist': [0,0], 'variance_dist': [0,0]},
+            effect_scale: float = 1,
+        ) -> None:
+
+        self.ahead_noise_meta_dist_params = ahead_noise_meta_dist_params
+        self.lateral_noise_meta_dist_params = lateral_noise_meta_dist_params
+        self.turning_noise_meta_dist_params = turning_noise_meta_dist_params
+        self.effect_scale = effect_scale
+        
+        self.ahead_noise_params = [0,0]
+        self.lateral_noise_params = [0,0]
+        self.turning_noise_params = [0,0]
+
+        self.reset_noise_model()
+    
+    def generate_motion_noise_params(self,meta_dist):
+        bias = np.random.normal(*meta_dist['bias_dist'])
+        variance = np.abs(np.random.normal(*meta_dist['variance_dist']))
+        return [bias,variance]
+
+    def reset_noise_model(self):
+        self.ahead_noise_params = self.generate_motion_noise_params(self.ahead_noise_meta_dist_params)
+        self.lateral_noise_params = self.generate_motion_noise_params(self.lateral_noise_meta_dist_params)
+        self.turning_noise_params = self.generate_motion_noise_params(self.turning_noise_meta_dist_params)
+    
+    def get_ahead_drift(self, nominal_ahead):
+        # returns [ahead change, left/right change, rot change] from an ahead command in [m,m,deg]
+        linear_drift = self.effect_scale * np.random.normal(*self.ahead_noise_params)
+        side_drift = self.effect_scale * np.random.normal(*self.lateral_noise_params)
+        rotation_drift = np.arctan2(side_drift,linear_drift + nominal_ahead) * 180 / np.pi
+
+        return [linear_drift, side_drift, rotation_drift ]
+
+    def get_rotate_drift(self):
+        # for this model, rotating incurs no positional drift. Returns [0,0,deg]
+        rotation_drift = self.effect_scale * np.random.normal(*self.turning_noise_params)
+
+        return [0, 0, rotation_drift]
diff --git a/ithor_arm/near_deadline_sensors.py b/ithor_arm/near_deadline_sensors.py
index 76b60fc..f4e1086 100644
--- a/ithor_arm/near_deadline_sensors.py
+++ b/ithor_arm/near_deadline_sensors.py
@@ -1,4 +1,5 @@
 """Utility classes and functions for sensory inputs used by the models."""
+from ast import For
 import copy
 import datetime
 import math
@@ -39,6 +40,8 @@
 from utils.noise_from_habitat import ControllerNoiseModel, MotionNoiseModel, _TruncatedMultivariateGaussian
 from utils.noise_in_motion_util import NoiseInMotion, squeeze_bool_mask, tensor_from_dict
 
+KINECT_FOV_W, KINECT_FOV_H = 59, 90
+
 
 class FancyNoisyObjectMaskWLabels(Sensor):
     def __init__(self, type: str,noise, height, width,  uuid: str = "object_mask", distance_thr: float = -1, **kwargs: Any):
@@ -366,8 +369,141 @@ def average_so_far(self, camera_xyz, camera_rotation, arm_state):
 #         self.noise = something
 #         return super(PointNavEmulatorSensorwScheduler, self).get_observation(env, task, *args, **kwargs)
 
+class PointNavEmulSensorDeadReckoning(Sensor):
+
+    def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "point_nav_emul", **kwargs: Any):
+        observation_space = gym.spaces.Box(
+            low=0, high=1, shape=(1,), dtype=np.float32
+        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
+        self.type = type
+        self.mask_sensor = mask_sensor
+        self.depth_sensor = depth_sensor
+        uuid = '{}_{}'.format(uuid, type)
+
+        self.min_xyz = np.zeros((3))
+
+        self.dummy_answer = torch.zeros(3)
+        self.dummy_answer[:] = 4 # is this good enough?
+        self.device = torch.device("cpu")
+        super().__init__(**prepare_locals_for_super(locals()))
+
+    def get_agent_belief_state(self,env):
+        #TODO all these values need to be checked
+        fov=max(KINECT_FOV_W, KINECT_FOV_H)#TODO are you sure? it should be smaller one I think
+        belief_agent_state = env.nominal_agent_location
+        real_agent_state = env.get_agent_location()
+
+        belief_camera_horizon = 45
+        belief_camera_xyz = np.array([belief_agent_state[k] for k in ['x','y','z']])
+        belief_camera_rotation = (belief_agent_state['rotation'] + 90) % 360
+
+        real_camera_xyz = np.array([real_agent_state[k] for k in ['x','y','z']])
+
+        self.belief_prev_location.append(belief_camera_xyz)
+        self.real_prev_location.append(real_camera_xyz)
+        
+        return fov, belief_camera_horizon, belief_camera_xyz, belief_camera_rotation
+
+    def get_observation(
+            self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any
+    ) -> Any:
+        #TODO remove
+        if self.type == 'destination':
+            return self.dummy_answer
+        mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) #TODO this is called multiple times?
+        depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1)
+
+        if task.num_steps_taken() == 0:
+            self.pointnav_history_aggr = []
+            self.real_prev_location = []
+            self.belief_prev_location = []
 
 
+        fov, camera_horizon, camera_xyz, camera_rotation = self.get_agent_belief_state(env)
+        arm_agent_coord = env.get_current_arm_state()
+
+        if mask.sum() != 0:
+
+            midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device)
+            self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken()))
+
+        # if len(self.belief_prev_location) > 190:
+        #     import matplotlib
+        #     matplotlib.use('TkAgg')
+        #     import matplotlib.pyplot as plt
+        #     fig = plt.figure()
+        #     ax = fig.add_subplot(projection='3d')
+        #     def draw_points(locations, color):
+        #         xs = [x[0] for x in locations]
+        #         ys = [x[1] for x in locations]
+        #         zs = [x[2] for x in locations]
+        #         ax.plot(xs, zs, ys, marker='o' if color=='g' else 'x', color=color)
+
+        #     def draw(locations, color):
+        #         xs = [x['camera_xyz'][0] for x in locations]
+        #         ys = [x['camera_xyz'][1] for x in locations]
+        #         zs = [x['camera_xyz'][2] for x in locations]
+        #         ax.plot(xs, zs, ys, marker='o' if color=='g' else 'x', color=color)
+
+        #     draw_points(self.real_prev_location, 'g'); draw_points(self.belief_prev_location, 'b'); plt.show()
+        #     ForkedPdb().set_trace()
+
+
+
+        result = self.history_aggregation(camera_xyz, camera_rotation, arm_agent_coord, task.num_steps_taken())
+        return result
+        
+    def history_aggregation(self, camera_xyz, camera_rotation, arm_agent_coord, current_step_number):
+        if len(self.pointnav_history_aggr) == 0:
+            return self.dummy_answer
+        else:
+            weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
+            total_weights = sum(weights)
+            total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
+            total_sum = sum(total_sum)
+            midpoint = total_sum / total_weights
+            agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0))
+            midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0))
+            midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state)
+            midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']])
+
+            # arm_world_coord = dict(position=dict(x=arm_world_coord[0], y=arm_world_coord[1], z=arm_world_coord[2]), rotation=dict(x=0,y=0,z=0))
+            # arm_state_agent_coord = convert_world_to_agent_coordinate(arm_world_coord, agent_state)
+            arm_state_agent_coord = torch.Tensor([arm_agent_coord[k] for k in ['x','y','z']])
+            # ForkedPdb().set_trace()
+
+            distance_in_agent_coord = midpoint_agent_coord - arm_state_agent_coord
+
+            return distance_in_agent_coord.cpu()
+
+
+def calc_world_coordinates(min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device, depth_frame):
+    with torch.no_grad():
+        camera_xyz = (
+            torch.from_numpy(camera_xyz - min_xyz).float().to(device)
+        )
+
+        depth_frame = torch.from_numpy(depth_frame).to(device)
+        depth_frame[depth_frame == -1] = np.NaN
+        world_space_point_cloud = depth_frame_to_world_space_xyz(
+            depth_frame=depth_frame,
+            camera_world_xyz=camera_xyz,
+            rotation=camera_rotation,
+            horizon=camera_horizon,
+            fov=fov,
+        )
+        return world_space_point_cloud
+
+def get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device):
+    mask = squeeze_bool_mask(mask)
+    depth_frame_masked = depth_frame_original.copy()
+    depth_frame_masked[~mask] = -1
+    world_space_point_cloud = calc_world_coordinates(min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device, depth_frame_masked)
+    valid_points = (world_space_point_cloud == world_space_point_cloud).sum(dim=-1) == 3
+    point_in_world = world_space_point_cloud[valid_points]
+    midpoint_agent_coord = point_in_world.mean(dim=0)
+    return midpoint_agent_coord
+
 
 class PredictionObjectMask(Sensor):
     def __init__(self, type: str,object_query_sensor, rgb_for_detection_sensor,  uuid: str = "predict_object_mask", **kwargs: Any):
diff --git a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py
new file mode 100644
index 0000000..13e0407
--- /dev/null
+++ b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py
@@ -0,0 +1,133 @@
+import platform
+from typing import Dict
+
+import gym
+import torch
+from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
+from torch import nn
+
+from ithor_arm.bring_object_sensors import CategorySampleSensor, NoisyObjectMask, NoGripperRGBSensorThor, CategoryFeatureSampleSensor
+from ithor_arm.bring_object_task_samplers import DiverseBringObjectTaskSampler
+from ithor_arm.bring_object_tasks import WPickUPExploreBringObjectTask, ExploreWiseRewardTask
+from ithor_arm.ithor_arm_constants import ENV_ARGS, TRAIN_OBJECTS, TEST_OBJECTS
+from ithor_arm.ithor_arm_sensors import (
+    InitialAgentArmToObjectSensor,
+    InitialObjectToGoalSensor,
+    PickedUpObjSensor,
+    DepthSensorThor, RelativeAgentArmToObjectSensor, RelativeObjectToGoalSensor,
+)
+from ithor_arm.ithor_arm_viz import MaskImageVisualizer
+from ithor_arm.near_deadline_sensors import PointNavEmulatorSensor
+from manipulathor_baselines.bring_object_baselines.experiments.bring_object_mixin_ddppo import BringObjectMixInPPOConfig
+from manipulathor_baselines.bring_object_baselines.experiments.bring_object_mixin_simplegru import BringObjectMixInSimpleGRUConfig
+from manipulathor_baselines.bring_object_baselines.experiments.ithor.bring_object_ithor_base import BringObjectiThorBaseConfig
+from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator
+from manipulathor_baselines.bring_object_baselines.models.query_obj_w_gt_mask_rgb_model import SmallBringObjectWQueryObjGtMaskRGBDModel
+from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator
+
+
+class PointNavNewModelAndHandWAgentLocationAndMotionNoiseMoreNoise(
+    BringObjectiThorBaseConfig,
+    BringObjectMixInPPOConfig,
+    BringObjectMixInSimpleGRUConfig,
+):
+    """An Object Navigation experiment configuration in iThor with RGB
+    input."""
+    NOISE_LEVEL = 0
+    AGENT_LOCATION_NOISE = 0.4 #?
+    distance_thr = 1.5 # is this a good number?
+    source_mask_sensor = NoisyObjectMask(height=BringObjectiThorBaseConfig.SCREEN_SIZE, width=BringObjectiThorBaseConfig.SCREEN_SIZE,noise=NOISE_LEVEL, type='source', distance_thr=distance_thr)
+    destination_mask_sensor = NoisyObjectMask(height=BringObjectiThorBaseConfig.SCREEN_SIZE, width=BringObjectiThorBaseConfig.SCREEN_SIZE,noise=NOISE_LEVEL, type='destination', distance_thr=distance_thr)
+    SENSORS = [
+        RGBSensorThor(
+            height=BringObjectiThorBaseConfig.SCREEN_SIZE,
+            width=BringObjectiThorBaseConfig.SCREEN_SIZE,
+            use_resnet_normalization=True,
+            uuid="rgb_lowres",
+        ),
+        DepthSensorThor(
+            height=BringObjectiThorBaseConfig.SCREEN_SIZE,
+            width=BringObjectiThorBaseConfig.SCREEN_SIZE,
+            use_normalization=True,
+            uuid="depth_lowres",
+        ),
+        PickedUpObjSensor(),
+        CategorySampleSensor(type='source'),
+        CategorySampleSensor(type='destination'),
+        CategoryFeatureSampleSensor(type='source'),
+        CategoryFeatureSampleSensor(type='destination'),
+        source_mask_sensor,
+        destination_mask_sensor,
+        PointNavEmulatorSensor(type='source', mask_sensor=source_mask_sensor, noise=AGENT_LOCATION_NOISE),
+        PointNavEmulatorSensor(type='destination', mask_sensor=destination_mask_sensor, noise=AGENT_LOCATION_NOISE),
+        # TempRealArmpointNav(uuid='point_nav_emul',type='source'),
+        # TempRealArmpointNav(uuid='point_nav_emul', type='destination'),
+    ]
+
+    MAX_STEPS = 200
+
+    TASK_SAMPLER = DiverseBringObjectTaskSampler
+    # TASK_TYPE = TestPointNavExploreWiseRewardTask
+    TASK_TYPE = ExploreWiseRewardTask
+
+    NUM_PROCESSES = 20
+
+    OBJECT_TYPES = TRAIN_OBJECTS + TEST_OBJECTS
+
+
+    def train_task_sampler_args(self, **kwargs):
+        sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseMoreNoise, self).train_task_sampler_args(**kwargs)
+        if platform.system() == "Darwin":
+            pass
+        else:
+
+            for pointnav_emul_sensor in sampler_args['sensors']:
+                if isinstance(pointnav_emul_sensor, PointNavEmulatorSensor):
+                    pointnav_emul_sensor.device = torch.device(kwargs["devices"][0])
+
+        return sampler_args
+    def test_task_sampler_args(self, **kwargs):
+        sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseMoreNoise, self).test_task_sampler_args(**kwargs)
+        if platform.system() == "Darwin":
+            pass
+        else:
+
+            for pointnav_emul_sensor in sampler_args['sensors']:
+                if isinstance(pointnav_emul_sensor, PointNavEmulatorSensor):
+                    pointnav_emul_sensor.device = torch.device(kwargs["devices"][0])
+
+        return sampler_args
+
+    def __init__(self):
+        super().__init__()
+        self.REWARD_CONFIG['exploration_reward'] = 0.1 # is this too big?
+        self.REWARD_CONFIG['object_found'] = 1 # is this too big?
+        self.ENV_ARGS['visibilityDistance'] = self.distance_thr
+
+        # self.ENV_ARGS['motion_noise_type'] = 'habitat'
+        # self.ENV_ARGS['motion_noise_args'] = dict()
+        # self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1]
+        # self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0,0,0,0,0,0,0]
+        # self.ENV_ARGS['motion_noise_args']['effect_scale'] = 1
+
+        # self.ENV_ARGS['motion_noise_type'] = 'simple1d'
+        # self.ENV_ARGS['motion_noise_args'] = dict()
+        # self.ENV_ARGS['motion_noise_args']['ahead_noise_meta_dist_params'] = {'bias_dist': [0,0.04], 'variance_dist': [0,0.10]}
+        # self.ENV_ARGS['motion_noise_args']['lateral_noise_meta_dist_params'] = {'bias_dist': [0,0.04], 'variance_dist': [0,0.04]}
+        # self.ENV_ARGS['motion_noise_args']['turning_noise_meta_dist_params'] = {'bias_dist': [0,10], 'variance_dist': [0,10]}
+        
+
+    @classmethod
+    def create_model(cls, **kwargs) -> nn.Module:
+        return RGBDModelWPointNavEmulator(
+            action_space=gym.spaces.Discrete(
+                len(cls.TASK_TYPE.class_action_names())
+            ),
+            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
+            hidden_size=512,
+            visualize=cls.VISUALIZE
+        )
+
+    @classmethod
+    def tag(cls):
+        return cls.__name__
diff --git a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py
new file mode 100644
index 0000000..dfb36f2
--- /dev/null
+++ b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py
@@ -0,0 +1,135 @@
+import platform
+from typing import Dict
+
+import gym
+import torch
+from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
+from torch import nn
+
+from ithor_arm.bring_object_sensors import CategorySampleSensor, NoisyObjectMask, NoGripperRGBSensorThor, CategoryFeatureSampleSensor
+from ithor_arm.bring_object_task_samplers import DiverseBringObjectTaskSampler
+from ithor_arm.bring_object_tasks import WPickUPExploreBringObjectTask, ExploreWiseRewardTask
+from ithor_arm.ithor_arm_constants import MANIPULATHOR_ENV_ARGS, TRAIN_OBJECTS, TEST_OBJECTS
+from ithor_arm.ithor_arm_sensors import (
+    InitialAgentArmToObjectSensor,
+    InitialObjectToGoalSensor,
+    PickedUpObjSensor,
+    DepthSensorThor, RelativeAgentArmToObjectSensor, RelativeObjectToGoalSensor,
+)
+from ithor_arm.ithor_arm_viz import MaskImageVisualizer
+from ithor_arm.near_deadline_sensors import PointNavEmulatorSensor, PointNavEmulSensorDeadReckoning
+from manipulathor_baselines.bring_object_baselines.experiments.bring_object_mixin_ddppo import BringObjectMixInPPOConfig
+from manipulathor_baselines.bring_object_baselines.experiments.bring_object_mixin_simplegru import BringObjectMixInSimpleGRUConfig
+from manipulathor_baselines.bring_object_baselines.experiments.ithor.bring_object_ithor_base import BringObjectiThorBaseConfig
+from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator
+from manipulathor_baselines.bring_object_baselines.models.query_obj_w_gt_mask_rgb_model import SmallBringObjectWQueryObjGtMaskRGBDModel
+from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator
+
+
+class PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoResetDebugged(
+    BringObjectiThorBaseConfig,
+    BringObjectMixInPPOConfig,
+    BringObjectMixInSimpleGRUConfig,
+):
+    """An Object Navigation experiment configuration in iThor with RGB
+    input."""
+    NOISE_LEVEL = 0
+    AGENT_LOCATION_NOISE = 0.4 #?
+    distance_thr = 1.5 # is this a good number?
+    source_mask_sensor = NoisyObjectMask(height=BringObjectiThorBaseConfig.SCREEN_SIZE, width=BringObjectiThorBaseConfig.SCREEN_SIZE,noise=NOISE_LEVEL, type='source', distance_thr=distance_thr)
+    destination_mask_sensor = NoisyObjectMask(height=BringObjectiThorBaseConfig.SCREEN_SIZE, width=BringObjectiThorBaseConfig.SCREEN_SIZE,noise=NOISE_LEVEL, type='destination', distance_thr=distance_thr)
+    depth_sensor = DepthSensorThor(
+            height=BringObjectiThorBaseConfig.SCREEN_SIZE,
+            width=BringObjectiThorBaseConfig.SCREEN_SIZE,
+            use_normalization=True,
+            uuid="depth_lowres",
+        )
+    
+    SENSORS = [
+        RGBSensorThor(
+            height=BringObjectiThorBaseConfig.SCREEN_SIZE,
+            width=BringObjectiThorBaseConfig.SCREEN_SIZE,
+            use_resnet_normalization=True,
+            uuid="rgb_lowres",
+        ), 
+        depth_sensor,
+        PickedUpObjSensor(),
+        CategorySampleSensor(type='source'),
+        CategorySampleSensor(type='destination'),
+        CategoryFeatureSampleSensor(type='source'),
+        CategoryFeatureSampleSensor(type='destination'),
+        source_mask_sensor,
+        destination_mask_sensor,
+        PointNavEmulSensorDeadReckoning(type='source', mask_sensor=source_mask_sensor, noise=AGENT_LOCATION_NOISE, depth_sensor=depth_sensor),
+        PointNavEmulSensorDeadReckoning(type='destination', mask_sensor=destination_mask_sensor, noise=AGENT_LOCATION_NOISE, depth_sensor=depth_sensor),
+        # TempRealArmpointNav(uuid='point_nav_emul',type='source'),
+        # TempRealArmpointNav(uuid='point_nav_emul', type='destination'),
+    ]
+
+    MAX_STEPS = 200
+
+    TASK_SAMPLER = DiverseBringObjectTaskSampler
+    # TASK_TYPE = TestPointNavExploreWiseRewardTask
+    TASK_TYPE = ExploreWiseRewardTask
+
+    NUM_PROCESSES = 20
+
+    OBJECT_TYPES = TRAIN_OBJECTS + TEST_OBJECTS
+
+
+    def train_task_sampler_args(self, **kwargs):
+        sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoResetDebugged, self).train_task_sampler_args(**kwargs)
+        if platform.system() == "Darwin":
+            pass
+        else:
+
+            for pointnav_emul_sensor in sampler_args['sensors']:
+                if isinstance(pointnav_emul_sensor, PointNavEmulSensorDeadReckoning):
+                    pointnav_emul_sensor.device = torch.device(kwargs["devices"][0])
+
+        return sampler_args
+    def test_task_sampler_args(self, **kwargs):
+        sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoResetDebugged, self).test_task_sampler_args(**kwargs)
+        if platform.system() == "Darwin":
+            pass
+        else:
+
+            for pointnav_emul_sensor in sampler_args['sensors']:
+                if isinstance(pointnav_emul_sensor, PointNavEmulSensorDeadReckoning):
+                    pointnav_emul_sensor.device = torch.device(kwargs["devices"][0])
+
+        return sampler_args
+
+    def __init__(self):
+        super().__init__()
+        self.REWARD_CONFIG['exploration_reward'] = 0.1 # is this too big?
+        self.REWARD_CONFIG['object_found'] = 1 # is this too big?
+        self.ENV_ARGS['visibilityDistance'] = self.distance_thr
+
+        self.ENV_ARGS['motion_noise_type'] = 'habitat'
+        self.ENV_ARGS['motion_noise_args'] = dict()
+        self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1]
+        self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01]
+        self.ENV_ARGS['motion_noise_args']['effect_scale'] = .5
+
+        # self.ENV_ARGS['motion_noise_type'] = 'simple1d'
+        # self.ENV_ARGS['motion_noise_args'] = dict()
+        # self.ENV_ARGS['motion_noise_args']['ahead_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.02]}
+        # self.ENV_ARGS['motion_noise_args']['lateral_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.02]}
+        # self.ENV_ARGS['motion_noise_args']['turning_noise_meta_dist_params'] = {'bias_dist': [0,2], 'variance_dist': [0,2]}
+        
+
+    @classmethod
+    def create_model(cls, **kwargs) -> nn.Module:
+        return RGBDModelWPointNavEmulator(
+            action_space=gym.spaces.Discrete(
+                len(cls.TASK_TYPE.class_action_names())
+            ),
+            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
+            hidden_size=512,
+            visualize=cls.VISUALIZE
+        )
+
+    @classmethod
+    def tag(cls):
+        return cls.__name__
diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py
deleted file mode 100644
index 1222f03..0000000
--- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py
+++ /dev/null
@@ -1,181 +0,0 @@
-import platform
-import random
-from typing import Sequence, Union
-from typing_extensions import final
-
-import gym
-import numpy as np
-from torch import nn
-import yaml
-import copy
-
-from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
-from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel, RGBSensorStretchKinectBigFov
-from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
-
-
-from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment
-
-from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig
-from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler
-from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask
-from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS
-from manipulathor_utils.debugger_util import ForkedPdb
-
-from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor
-from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic
-from allenact.base_abstractions.preprocessor import Preprocessor
-from allenact.utils.experiment_utils import Builder
-from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer
-from ithor_arm.ithor_arm_viz import TestMetricLogger
-
-from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL
-from ithor_arm.ithor_arm_constants import TRAIN_OBJECTS, TEST_OBJECTS
-from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic
-
-
-
-class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV(
-    ProcTHORObjectNavBaseConfig
-):
-    """An Object Navigation experiment configuration in iThor with RGB
-    input."""
-
-    TRAIN_SCENES = [
-        "FloorPlan{}_physics".format(str(i))
-        for i in range(1, 20 + 1)
-    ]
-    VALID_SCENES = [
-        "FloorPlan{}_physics".format(str(i))
-        for i in range(21, 26)
-    ]
-    TEST_SCENES = [
-        "FloorPlan{}_physics".format(str(i))
-        for i in range(26, 31)
-    ]
-
-
-    ALL_SCENES = TRAIN_SCENES + TEST_SCENES + VALID_SCENES
-
-    # OBJECT_TYPES = tuple(sorted(TRAIN_OBJECTS))
-    with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f:
-        OBJECT_TYPES=yaml.safe_load(f)
-
-    # UNSEEN_OBJECT_TYPES = tuple(sorted(TEST_OBJECTS))
-
-
-    NOISE_LEVEL = 0
-    distance_thr = 1.0 # match procthor config
-    mean = np.array([0.48145466, 0.4578275, 0.40821073])
-    stdev = np.array([0.26862954, 0.26130258, 0.27577711])
-    SENSORS = [
-        RGBSensorThor(
-            height=224,
-            width=224,
-            use_resnet_normalization=True,
-            mean=mean,
-            stdev=stdev,
-            uuid="rgb_lowres",
-        ),
-        RGBSensorStretchKinectBigFov(
-            height=224,
-            width=224,
-            use_resnet_normalization=True,
-            mean=mean,
-            stdev=stdev,
-            uuid="rgb_lowres_arm",
-        ),
-        GoalObjectTypeThorSensor(
-            object_types=OBJECT_TYPES,
-        ),
-    ]
-
-    MAX_STEPS = 500
-    if platform.system() == "Darwin":
-        MAX_STEPS = 100
-
-    TASK_SAMPLER = AllRoomsObjectNavTaskSampler
-    TASK_TYPE = StretchObjectNavTask
-    ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment
-    POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger]
-
-
-    NUM_PROCESSES = 40
-
-    def __init__(self):
-        super().__init__() 
-        self.REWARD_CONFIG['goal_success_reward'] = 10.0 
-        self.REWARD_CONFIG['step_penalty'] = -0.01 
-        self.REWARD_CONFIG['failed_stop_reward'] = 0.0 
-        self.REWARD_CONFIG['reached_horizon_reward'] = 0.0
-        self.REWARD_CONFIG['shaping_weight'] = 1.0
-
-        self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS)
-        self.ENV_ARGS['visibilityDistance'] = self.distance_thr
-        self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice
-        # self.ENV_ARGS['scene'] = 'Procedural'
-        self.ENV_ARGS['renderInstanceSegmentation'] = 'False'
-        self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID
-        self.ENV_ARGS['allow_flipping'] = True
-
-
-    @classmethod
-    @final
-    def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
-        preprocessors = []
-        # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None)
-
-        preprocessors.append(
-            ClipResNetPreprocessor(
-                rgb_input_uuid="rgb_lowres",
-                clip_model_type="RN50",
-                pool=False,
-                output_uuid="rgb_clip_resnet",
-                visualize=cls.VISUALIZE
-            )
-        )
-        preprocessors.append(
-            ClipResNetPreprocessor(
-                rgb_input_uuid="rgb_lowres_arm",
-                clip_model_type="RN50",
-                pool=False,
-                output_uuid="rgb_clip_resnet_arm",
-                visualize=cls.VISUALIZE
-            )
-        )
-        return preprocessors
-
-    @classmethod
-    @final
-    def create_model(cls, **kwargs) -> nn.Module:
-        goal_sensor_uuid = next(
-            (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)),
-            None,
-        )
-        resnet_preprocessor_uuids = ["rgb_clip_resnet","rgb_clip_resnet_arm"]
-
-        return ResnetTensorNavNCameraActorCritic(
-            action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())),
-            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
-            goal_sensor_uuid=goal_sensor_uuid,
-            resnet_preprocessor_uuids=resnet_preprocessor_uuids,
-            hidden_size=512,
-            goal_dims=32,
-            add_prev_actions=True,
-        )
-        # return ResnetTensorNavActorCritic(
-        #     action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())),
-        #     observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
-        #     goal_sensor_uuid=goal_sensor_uuid,
-        #     rgb_resnet_preprocessor_uuid="rgb_clip_resnet",
-        #     depth_resnet_preprocessor_uuid="rgb_clip_resnet_arm", # a convenient lie - can't use with a depth sensor too
-        #     hidden_size=512,
-        #     goal_dims=32,
-        #     add_prev_actions=True,
-        # )
-
-    @classmethod
-    def tag(cls):
-        return cls.__name__
-
-    
diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py
deleted file mode 100644
index f097c55..0000000
--- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py
+++ /dev/null
@@ -1,168 +0,0 @@
-import platform
-import random
-from typing import Sequence, Union
-from typing_extensions import final
-
-import gym
-import numpy as np
-from torch import nn
-import yaml
-import copy
-
-from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
-from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel
-from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
-
-
-from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment
-
-from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig
-from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler
-from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask
-from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS
-from manipulathor_utils.debugger_util import ForkedPdb
-
-from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor
-from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic
-from allenact.base_abstractions.preprocessor import Preprocessor
-from allenact.utils.experiment_utils import Builder
-from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer
-from ithor_arm.ithor_arm_viz import TestMetricLogger
-from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic
-
-
-
-class ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV(
-    ProcTHORObjectNavBaseConfig
-):
-    """An Object Navigation experiment configuration in iThor with RGB
-    input."""
-
-    with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f:
-        OBJECT_TYPES=yaml.safe_load(f)
-
-    NOISE_LEVEL = 0
-    distance_thr = 1.0 # match procthor config
-    mean = np.array([0.48145466, 0.4578275, 0.40821073])
-    stdev = np.array([0.26862954, 0.26130258, 0.27577711])
-    SENSORS = [
-        RGBSensorStretchIntel(
-            height=224,
-            width=224,
-            use_resnet_normalization=True,
-            mean=mean,
-            stdev=stdev,
-            uuid="rgb_lowres",
-        ),
-        RGBSensorStretchKinect(
-            height=224,
-            width=224,
-            use_resnet_normalization=True,
-            mean=mean,
-            stdev=stdev,
-            uuid="rgb_lowres_arm",
-        ),
-        GoalObjectTypeThorSensor(
-            object_types=OBJECT_TYPES,
-        ),
-    ]
-
-    MAX_STEPS = 500
-    if platform.system() == "Darwin":
-        MAX_STEPS = 100
-
-    TASK_SAMPLER = ProcTHORObjectNavTaskSampler
-    TASK_TYPE = StretchObjectNavTask
-    ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment
-    POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger]
-
-
-    NUM_PROCESSES = 56
-
-    TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(9999)] # 9999 is all of train
-    if platform.system() == "Darwin":
-        TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)]
-
-    TEST_SCENES = [f'ProcTHOR{i}' for i in range(100)]
-    random.shuffle(TRAIN_SCENES)
-
-
-    def __init__(self):
-        super().__init__() 
-        self.REWARD_CONFIG['goal_success_reward'] = 10.0 
-        self.REWARD_CONFIG['step_penalty'] = -0.01 
-        self.REWARD_CONFIG['failed_stop_reward'] = 0.0 
-        self.REWARD_CONFIG['reached_horizon_reward'] = 0.0
-        self.REWARD_CONFIG['shaping_weight'] = 1.0
-
-        self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS)
-        self.ENV_ARGS['visibilityDistance'] = self.distance_thr
-        self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice
-        self.ENV_ARGS['scene'] = 'Procedural'
-        self.ENV_ARGS['renderInstanceSegmentation'] = 'False'
-        self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID
-        self.ENV_ARGS['allow_flipping'] = True
-
-
-    @classmethod
-    @final
-    def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
-        preprocessors = []
-        # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None)
-
-        preprocessors.append(
-            ClipResNetPreprocessor(
-                rgb_input_uuid="rgb_lowres",
-                clip_model_type="RN50",
-                pool=False,
-                output_uuid="rgb_clip_resnet",
-                visualize=cls.VISUALIZE
-            )
-        )
-        preprocessors.append(
-            ClipResNetPreprocessor(
-                rgb_input_uuid="rgb_lowres_arm",
-                clip_model_type="RN50",
-                pool=False,
-                output_uuid="rgb_clip_resnet_arm",
-                visualize=cls.VISUALIZE
-            )
-        )
-        return preprocessors
-
-
-    @classmethod
-    @final
-    def create_model(cls, **kwargs) -> nn.Module:
-        goal_sensor_uuid = next(
-            (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)),
-            None,
-        )
-        resnet_preprocessor_uuids = ["rgb_clip_resnet","rgb_clip_resnet_arm"]
-
-        return ResnetTensorNavNCameraActorCritic(
-            action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())),
-            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
-            goal_sensor_uuid=goal_sensor_uuid,
-            resnet_preprocessor_uuids=resnet_preprocessor_uuids,
-            hidden_size=512,
-            goal_dims=32,
-            add_prev_actions=True,
-        )
-        # return ResnetTensorNavActorCritic(
-        #     action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())),
-        #     observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
-        #     goal_sensor_uuid=goal_sensor_uuid,
-        #     rgb_resnet_preprocessor_uuid="rgb_clip_resnet",
-        #     depth_resnet_preprocessor_uuid="rgb_clip_resnet_arm", # a convenient lie - can't use with a depth sensor too
-        #     hidden_size=512,
-        #     goal_dims=32,
-        #     add_prev_actions=True,
-        # )
-        
-
-    @classmethod
-    def tag(cls):
-        return cls.__name__
-
-    
diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py
deleted file mode 100644
index 2c042d8..0000000
--- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py
+++ /dev/null
@@ -1,194 +0,0 @@
-import platform
-import random
-from typing import Sequence, Union
-from typing_extensions import final
-
-import gym
-import numpy as np
-from torch import nn
-import yaml
-import copy
-
-from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
-
-from utils.procthor_utils.procthor_helper import PROCTHOR_INVALID_SCENES
-from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchKinectBigFov
-from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
-
-
-from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment
-
-from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig
-from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler
-from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask
-from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS
-from manipulathor_utils.debugger_util import ForkedPdb
-
-from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor
-from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic
-from allenact.base_abstractions.preprocessor import Preprocessor
-from allenact.utils.experiment_utils import Builder
-from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer
-from ithor_arm.ithor_arm_viz import TestMetricLogger
-from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic
-
-
-
-class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV(
-    ProcTHORObjectNavBaseConfig
-):
-    """An Object Navigation experiment configuration in iThor with RGB
-    input."""
-
-    with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f:
-        OBJECT_TYPES=yaml.safe_load(f)
-
-    NOISE_LEVEL = 0
-    distance_thr = 1.0 # match procthor config
-    mean = np.array([0.48145466, 0.4578275, 0.40821073])
-    stdev = np.array([0.26862954, 0.26130258, 0.27577711])
-    #TODO use resnet clip normalizations?
-    SENSORS = [
-        RGBSensorThor(
-            height=224,
-            width=224,
-            use_resnet_normalization=True,
-            mean=mean,
-            stdev=stdev,
-            uuid="rgb_lowres",
-        ),
-
-        RGBSensorStretchKinectBigFov(
-            height=224,
-            width=224,
-            use_resnet_normalization=True,
-            mean=mean,
-            stdev=stdev,
-            uuid="rgb_lowres_arm",
-        ),
-
-        GoalObjectTypeThorSensor(
-            object_types=OBJECT_TYPES,
-        ),
-    ]
-
-    MAX_STEPS = 500
-    if platform.system() == "Darwin":
-        MAX_STEPS = 10
-        SENSORS += [
-            RGBSensorStretchKinectBigFov(
-                height=224,
-                width=224,
-                use_resnet_normalization=True,
-                mean=mean,
-                stdev=stdev,
-                uuid="rgb_lowres_arm_only_viz",
-            ),
-            RGBSensorThor(
-                height=224,
-                width=224,
-                use_resnet_normalization=True,
-                mean=mean,
-                stdev=stdev,
-                uuid="rgb_lowres_only_viz",
-            ),
-        ]
-
-    TASK_SAMPLER = ProcTHORObjectNavTaskSampler
-    TASK_TYPE = StretchObjectNavTask
-    ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment
-    POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger]
-
-    NUM_PROCESSES = 50
-
-
-    TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(9999) if i not in PROCTHOR_INVALID_SCENES] # 9999 is all of train
-    if platform.system() == "Darwin":
-        TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)]
-
-    TEST_SCENES = [f'ProcTHOR{i}' for i in range(1)]
-    random.shuffle(TRAIN_SCENES)
-
-
-    def __init__(self):
-        super().__init__() 
-        self.REWARD_CONFIG['goal_success_reward'] = 10.0 
-        self.REWARD_CONFIG['step_penalty'] = -0.01 
-        self.REWARD_CONFIG['failed_stop_reward'] = 0.0 
-        self.REWARD_CONFIG['reached_horizon_reward'] = 0.0
-        self.REWARD_CONFIG['shaping_weight'] = 1.0
-
-        self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS)
-        self.ENV_ARGS['visibilityDistance'] = self.distance_thr
-        self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice
-        self.ENV_ARGS['scene'] = 'Procedural'
-        self.ENV_ARGS['renderInstanceSegmentation'] = 'False'
-        #TODO depth is not False. Kiana added this:
-        self.ENV_ARGS['renderInstanceSegmentation'] = False
-        self.ENV_ARGS['renderDepthImage'] = False
-        self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID
-        self.ENV_ARGS['allow_flipping'] = False #TODO important change everywhere
-
-
-    @classmethod
-    @final
-    def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
-        preprocessors = []
-        # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None)
-
-        preprocessors.append(
-            ClipResNetPreprocessor(
-                rgb_input_uuid="rgb_lowres",
-                clip_model_type="RN50",
-                pool=False,
-                output_uuid="rgb_clip_resnet",
-                visualize=cls.VISUALIZE
-            )
-        )
-        preprocessors.append(
-            ClipResNetPreprocessor(
-                rgb_input_uuid="rgb_lowres_arm",
-                clip_model_type="RN50",
-                pool=False,
-                output_uuid="rgb_clip_resnet_arm",
-                visualize=cls.VISUALIZE
-            )
-        )
-        return preprocessors
-
-
-    @classmethod
-    @final
-    def create_model(cls, **kwargs) -> nn.Module:
-        goal_sensor_uuid = next(
-            (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)),
-            None,
-        )
-        resnet_preprocessor_uuids = ["rgb_clip_resnet","rgb_clip_resnet_arm"]
-
-        # return ResnetTensorNavActorCritic(
-        #     action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())),
-        #     observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
-        #     goal_sensor_uuid=goal_sensor_uuid,
-        #     rgb_resnet_preprocessor_uuid="rgb_clip_resnet",
-        #     depth_resnet_preprocessor_uuid="rgb_clip_resnet_arm", # a convenient lie - can't use with a depth sensor too
-        #     hidden_size=512,
-        #     goal_dims=32,
-        #     add_prev_actions=True,
-        # )
-
-        return ResnetTensorNavNCameraActorCritic(
-            action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())),
-            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
-            goal_sensor_uuid=goal_sensor_uuid,
-            resnet_preprocessor_uuids=resnet_preprocessor_uuids,
-            hidden_size=512,
-            goal_dims=32,
-            add_prev_actions=True,
-        )
-
-    @classmethod
-    def tag(cls):
-        return cls.__name__
-
-    
diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py
deleted file mode 100644
index 4ee0dd3..0000000
--- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py
+++ /dev/null
@@ -1,172 +0,0 @@
-import platform
-import random
-from typing import Sequence, Union
-from typing_extensions import final
-
-import gym
-import numpy as np
-from torch import nn
-import yaml
-import copy
-
-from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
-from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel
-from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
-
-
-from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment
-
-from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig
-from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler
-from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask
-from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS
-from manipulathor_utils.debugger_util import ForkedPdb
-
-from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor
-from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic
-from allenact.base_abstractions.preprocessor import Preprocessor
-from allenact.utils.experiment_utils import Builder
-from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer
-from ithor_arm.ithor_arm_viz import TestMetricLogger
-from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic
-
-from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL
-
-
-
-class RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOV(
-    ProcTHORObjectNavBaseConfig
-):
-    """An Object Navigation experiment configuration in iThor with RGB
-    input."""
-
-    TRAIN_SCENES = ROBOTHOR_TRAIN
-    TEST_SCENES = ROBOTHOR_VAL
-    # OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ == 'robothor']))
-    # OBJECT_TYPES.sort()
-
-    with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f:
-        OBJECT_TYPES=yaml.safe_load(f)
-
-
-    random.shuffle(TRAIN_SCENES)
-    random.shuffle(TEST_SCENES)
-
-    NOISE_LEVEL = 0
-    distance_thr = 1.0 # match procthor config
-    mean = np.array([0.48145466, 0.4578275, 0.40821073])
-    stdev = np.array([0.26862954, 0.26130258, 0.27577711])
-    SENSORS = [
-        RGBSensorStretchIntel(
-            height=224,
-            width=224,
-            use_resnet_normalization=True,
-            mean=mean,
-            stdev=stdev,
-            uuid="rgb_lowres",
-        ),
-        RGBSensorStretchKinect(
-            height=224,
-            width=224,
-            use_resnet_normalization=True,
-            mean=mean,
-            stdev=stdev,
-            uuid="rgb_lowres_arm",
-        ),
-        GoalObjectTypeThorSensor(
-            object_types=OBJECT_TYPES,
-        ),
-    ]
-
-    MAX_STEPS = 500
-    if platform.system() == "Darwin":
-        MAX_STEPS = 500
-
-    TASK_SAMPLER = AllRoomsObjectNavTaskSampler
-    TASK_TYPE = StretchObjectNavTask
-    ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment
-    POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger]
-    VISUALIZE = False
-
-
-    NUM_PROCESSES = 56
-
-
-
-    def __init__(self):
-        super().__init__() 
-        self.REWARD_CONFIG['goal_success_reward'] = 10.0 
-        self.REWARD_CONFIG['step_penalty'] = -0.01 
-        self.REWARD_CONFIG['failed_stop_reward'] = 0.0 
-        self.REWARD_CONFIG['reached_horizon_reward'] = 0.0
-        self.REWARD_CONFIG['shaping_weight'] = 1.0
-
-        self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS)
-        self.ENV_ARGS['visibilityDistance'] = self.distance_thr
-        self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice
-        # self.ENV_ARGS['scene'] = 'Procedural'
-        self.ENV_ARGS['renderInstanceSegmentation'] = 'False'
-        self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID
-        self.ENV_ARGS['allow_flipping'] = True
-
-
-    @classmethod
-    @final
-    def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
-        preprocessors = []
-        # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None)
-
-        preprocessors.append(
-            ClipResNetPreprocessor(
-                rgb_input_uuid="rgb_lowres",
-                clip_model_type="RN50",
-                pool=False,
-                output_uuid="rgb_clip_resnet",
-                visualize=cls.VISUALIZE
-            )
-        )
-        preprocessors.append(
-            ClipResNetPreprocessor(
-                rgb_input_uuid="rgb_lowres_arm",
-                clip_model_type="RN50",
-                pool=False,
-                output_uuid="rgb_clip_resnet_arm",
-                visualize=cls.VISUALIZE
-            )
-        )
-        return preprocessors
-
-    @classmethod
-    @final
-    def create_model(cls, **kwargs) -> nn.Module:
-        goal_sensor_uuid = next(
-            (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)),
-            None,
-        )
-        resnet_preprocessor_uuids = ["rgb_clip_resnet","rgb_clip_resnet_arm"]
-
-        return ResnetTensorNavNCameraActorCritic(
-            action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())),
-            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
-            goal_sensor_uuid=goal_sensor_uuid,
-            resnet_preprocessor_uuids=resnet_preprocessor_uuids,
-            hidden_size=512,
-            goal_dims=32,
-            add_prev_actions=True,
-        )        
-        # return ResnetTensorNavActorCritic(
-        #     action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())),
-        #     observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
-        #     goal_sensor_uuid=goal_sensor_uuid,
-        #     rgb_resnet_preprocessor_uuid="rgb_clip_resnet",
-        #     depth_resnet_preprocessor_uuid="rgb_clip_resnet_arm", # a convenient lie - can't use with a depth sensor too
-        #     hidden_size=512,
-        #     goal_dims=32,
-        #     add_prev_actions=True,
-        # )
-
-    @classmethod
-    def tag(cls):
-        return cls.__name__
-
-    
diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py
deleted file mode 100644
index 74a0fef..0000000
--- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py
+++ /dev/null
@@ -1,139 +0,0 @@
-from typing import Any, Dict, List, Optional, Sequence
-from typing_extensions import Literal, final
-
-import datasets
-import numpy as np
-import torch
-import torch.optim as optim
-
-from ai2thor.platform import CloudRendering
-from allenact.embodiedai.sensors.vision_sensors import DepthSensor
-from manipulathor_baselines.bring_object_baselines.experiments.bring_object_thor_base import BringObjectThorBaseConfig
-from allenact.utils.experiment_utils import (
-    Builder,
-    PipelineStage,
-    TrainingPipeline,
-    TrainingSettings,
-)
-from allenact.algorithms.onpolicy_sync.losses import PPO
-from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
-
-from allenact.base_abstractions.sensor import Sensor
-
-# from allenact.utils.system import get_logger
-# from training import cfg
-# from training.tasks.object_nav import ObjectNavTaskSampler
-# from training.utils.types import RewardConfig, TaskSamplerArgs
-from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler
-from utils.procthor_utils.procthor_object_nav_tasks import ObjectNavTask
-from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID
-
-
-class ProcTHORObjectNavBaseConfig(BringObjectThorBaseConfig):
-    """The base config for ProcTHOR ObjectNav experiments."""
-
-    TASK_SAMPLER = ProcTHORObjectNavTaskSampler
-    TASK_TYPE = ObjectNavTask
-
-    TRAIN_DEVICES = (
-        tuple(range(torch.cuda.device_count()))
-        if torch.cuda.is_available()
-        else (torch.device("cpu"),)
-    )
-    VAL_DEVICES = (
-        (torch.cuda.device_count() - 1,)
-        if torch.cuda.is_available()
-        else (torch.device("cpu"),)
-    )
-    TEST_DEVICES = (
-        tuple(range(torch.cuda.device_count()))
-        if torch.cuda.is_available()
-        else (torch.device("cpu"),)
-    )
-
-    # TEST_ON_VALIDATION: bool = cfg.evaluation.test_on_validation
-
-    # AGENT_MODE = cfg.agent.agent_mode
-    # CAMERA_WIDTH = cfg.agent.camera_width
-    # CAMERA_HEIGHT = cfg.agent.camera_height
-    # STEP_SIZE = cfg.agent.step_size
-    # VISIBILITY_DISTANCE = cfg.agent.visibility_distance
-    # FIELD_OF_VIEW = cfg.agent.field_of_view
-    # ROTATE_STEP_DEGREES = cfg.agent.rotate_step_degrees
-
-    # HOUSE_DATASET = datasets.load_dataset(
-    #     "allenai/houses", use_auth_token=True, ignore_verifications=True
-    # )
-
-    SENSORS: Sequence[Sensor] = []
-
-    DISTANCE_TYPE = "l2"  # "geo"  # Can be "geo" or "l2"
-
-    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[
-        int
-    ] = 20 # default config/main.yaml
-    RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = (
-        -1
-    )  # Should be > 0 if `ADVANCE_SCENE_ROLLOUT_PERIOD` is `None`
-    RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 1 # TODO apparently this won't work with 100 (why?)
-    
-    @final
-    def training_pipeline(self, **kwargs):
-        log_interval_small = (
-            self.NUM_PROCESSES * 32 * 10
-            if torch.cuda.is_available
-            else 1
-        )
-        log_interval_medium = (
-            self.NUM_PROCESSES * 64 * 5
-            if torch.cuda.is_available
-            else 1
-        )
-        log_interval_large = (
-            self.NUM_PROCESSES * 128 * 5
-            if torch.cuda.is_available
-            else 1
-        )
-
-        batch_steps_0 = int(10e6)
-        batch_steps_1 = int(10e6)
-        batch_steps_2 = int(1e9) - batch_steps_1 - batch_steps_0
-
-        return TrainingPipeline(
-            save_interval=5_000_000,
-            metric_accumulate_interval=10_000,
-            optimizer_builder=Builder(optim.Adam, dict(lr=0.0003)),
-            num_mini_batch=1,
-            update_repeats=4,
-            max_grad_norm=0.5,
-            num_steps=128,
-            named_losses={"ppo_loss": PPO(**PPOConfig)},
-            gamma=0.99,
-            use_gae=True,
-            gae_lambda=0.95,
-            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
-            pipeline_stages=[
-                PipelineStage(
-                    loss_names=["ppo_loss"],
-                    max_stage_steps=batch_steps_0,
-                    training_settings=TrainingSettings(
-                        num_steps=32, metric_accumulate_interval=log_interval_small
-                    ),
-                ),
-                PipelineStage(
-                    loss_names=["ppo_loss"],
-                    max_stage_steps=batch_steps_1,
-                    training_settings=TrainingSettings(
-                        num_steps=64, metric_accumulate_interval=log_interval_medium
-                    ),
-                ),
-                PipelineStage(
-                    loss_names=["ppo_loss"],
-                    max_stage_steps=batch_steps_2,
-                    training_settings=TrainingSettings(
-                        num_steps=128, metric_accumulate_interval=log_interval_large
-                    ),
-                ),
-            ],
-        )
-
diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py
deleted file mode 100644
index 5d3ea82..0000000
--- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py
+++ /dev/null
@@ -1,159 +0,0 @@
-import platform
-import random
-from typing import Sequence, Union
-from typing_extensions import final
-
-import gym
-import numpy as np
-from torch import nn
-import yaml
-import copy
-
-from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
-from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
-
-
-from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment
-
-from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig
-from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler
-from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask, ObjectNavTask
-from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS
-from manipulathor_utils.debugger_util import ForkedPdb
-
-from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor
-from allenact.base_abstractions.preprocessor import Preprocessor
-from allenact.utils.experiment_utils import Builder
-from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic
-from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic
-from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer
-from ithor_arm.ithor_arm_viz import TestMetricLogger
-
-
-
-class ProcTHORObjectNavClipResnet50RGBOnly(
-    ProcTHORObjectNavBaseConfig
-):
-    """An Object Navigation experiment configuration in iThor with RGB
-    input."""
-
-    with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f:
-        OBJECT_TYPES=yaml.safe_load(f)
-
-    NOISE_LEVEL = 0
-    distance_thr = 1.0 # match procthor config
-    mean = np.array([0.48145466, 0.4578275, 0.40821073])
-    stdev = np.array([0.26862954, 0.26130258, 0.27577711])
-    SENSORS = [
-        RGBSensorThor(
-            height=224,
-            width=224,
-            use_resnet_normalization=True,
-            mean=mean,
-            stdev=stdev,
-            uuid="rgb_lowres",
-        ),
-        GoalObjectTypeThorSensor(
-            object_types=OBJECT_TYPES,
-        ),
-    ]
-
-    MAX_STEPS = 500
-    if platform.system() == "Darwin":
-        MAX_STEPS = 50
-
-    TASK_SAMPLER = ProcTHORObjectNavTaskSampler
-    TASK_TYPE = ObjectNavTask
-    ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment
-    POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger]
-    VISUALIZE = False
-
-
-    NUM_PROCESSES = 30
-
-    TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(10)] # 9999 is all of train
-    # if platform.system() == "Darwin":
-    #     TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)]
-
-    TEST_SCENES = [f'ProcTHOR{i}' for i in range(1)]
-    random.shuffle(TRAIN_SCENES)
-
-
-    def __init__(self):
-        super().__init__() 
-        self.REWARD_CONFIG['goal_success_reward'] = 10.0 
-        self.REWARD_CONFIG['step_penalty'] = -0.01 
-        self.REWARD_CONFIG['failed_stop_reward'] = 0.0 
-        self.REWARD_CONFIG['reached_horizon_reward'] = 0.0
-        self.REWARD_CONFIG['shaping_weight'] = 1.0
-
-        # self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS)
-        self.ENV_ARGS['visibilityDistance'] = self.distance_thr
-        self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice
-        self.ENV_ARGS['scene'] = 'Procedural'
-        self.ENV_ARGS['renderInstanceSegmentation'] = 'False'
-        self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID
-        self.ENV_ARGS['allow_flipping'] = True
-
-
-    @classmethod
-    @final
-    def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
-        preprocessors = []
-        rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None)
-
-        if rgb_sensor is not None:
-            preprocessors.append(
-                ClipResNetPreprocessor(
-                    rgb_input_uuid=rgb_sensor.uuid,
-                    clip_model_type="RN50",
-                    pool=False,
-                    output_uuid="rgb_clip_resnet",
-                    visualize=cls.VISUALIZE
-                )
-            )
-
-        return preprocessors
-
-    # @classmethod
-    # @final
-    # def create_model(cls, **kwargs) -> nn.Module:
-    #     goal_sensor_uuid = next(
-    #         (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)),
-    #         None,
-    #     )
-    #     resnet_preprocessor_uuids = ["rgb_clip_resnet"]
-
-    #     return ResnetTensorNavNCameraActorCritic(
-    #         action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())),
-    #         observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
-    #         goal_sensor_uuid=goal_sensor_uuid,
-    #         resnet_preprocessor_uuids=resnet_preprocessor_uuids,
-    #         hidden_size=512,
-    #         goal_dims=32,
-    #         add_prev_actions=True,
-    #     )
-    @classmethod
-    def create_model(cls, **kwargs) -> nn.Module:
-        has_rgb = any(isinstance(s, RGBSensorThor) for s in cls.SENSORS)
-        has_depth = False #any(isinstance(s, DepthSensor) for s in cls.SENSORS)
-
-        goal_sensor_uuid = next(
-            (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)),
-            None,
-        )
-
-        return ResnetTensorNavActorCritic(
-            action_space=gym.spaces.Discrete(len(ObjectNavTask.class_action_names())),
-            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
-            goal_sensor_uuid=goal_sensor_uuid,
-            rgb_resnet_preprocessor_uuid="rgb_clip_resnet" if has_rgb else None,
-            depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None,
-            hidden_size=512,
-            goal_dims=32,
-            add_prev_actions=True #cfg.model.add_prev_actions_embedding,
-        )
-
-    @classmethod
-    def tag(cls):
-        return cls.__name__    
diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_rgb_only.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_rgb_only.py
deleted file mode 100644
index 9a0c637..0000000
--- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_rgb_only.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import platform
-import random
-from typing import Sequence, Union
-
-import gym
-import numpy as np
-from torch import nn
-import yaml
-
-# from ithor_arm.bring_object_sensors import RGBSensorThorNoNan
-from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
-from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
-
-
-from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment
-
-from manipulathor_baselines.procthor_baselines.models.objnav_model_rgb_only import ObjNavOnlyRGBModel
-from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig
-from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler
-from utils.procthor_utils.procthor_object_nav_tasks import ProcTHORObjectNavTask
-from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID
-# from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS
-from manipulathor_utils.debugger_util import ForkedPdb
-
-# from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor
-# from allenact.base_abstractions.preprocessor import Preprocessor
-# from allenact.utils.experiment_utils import Builder
-# from allenact_plugins.navigation_plugin.objectnav.models import (
-#     ResnetTensorNavActorCritic,
-# )
-
-
-
-class ProcTHORObjectNavRGBOnly(
-    ProcTHORObjectNavBaseConfig
-):
-    """An Object Navigation experiment configuration in iThor with RGB
-    input."""
-
-    with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f:
-        OBJECT_TYPES=yaml.safe_load(f)
-
-    NOISE_LEVEL = 0
-    distance_thr = 1.0 # match procthor config
-    mean = np.array([0.48145466, 0.4578275, 0.40821073])
-    stdev = np.array([0.26862954, 0.26130258, 0.27577711])
-    SENSORS = [
-        RGBSensorThor(
-            height=224,
-            width=224,
-            use_resnet_normalization=True,
-            mean=mean,
-            stdev=stdev,
-            uuid="rgb_lowres",
-        ),
-        GoalObjectTypeThorSensor(
-            object_types=OBJECT_TYPES,
-        ),
-    ]
-
-    MAX_STEPS = 500
-    if platform.system() == "Darwin":
-        MAX_STEPS = 10
-
-    TASK_SAMPLER = ProcTHORObjectNavTaskSampler
-    TASK_TYPE = ProcTHORObjectNavTask
-    ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment
-
-    NUM_PROCESSES = 56
-
-    TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(9999)] # 9999 is all of train
-    if platform.system() == "Darwin":
-        TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)]
-
-    TEST_SCENES = [f'ProcTHOR{i}' for i in range(1)]
-
-
-    # OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list]))
-
-    random.shuffle(TRAIN_SCENES)
-
-    if platform.system() == "Darwin":
-        MAX_STEPS = 10
-
-
-    def __init__(self):
-        super().__init__() 
-        self.REWARD_CONFIG['goal_success_reward'] = 10.0 
-        self.REWARD_CONFIG['step_penalty'] = -0.01 
-        self.REWARD_CONFIG['failed_stop_reward'] = 0.0 
-        self.REWARD_CONFIG['reached_horizon_reward'] = 0.0
-        self.REWARD_CONFIG['shaping_weight'] = 1.0
-
-        self.ENV_ARGS['visibilityDistance'] = self.distance_thr
-        self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice
-        self.ENV_ARGS['scene'] = 'Procedural'
-        self.ENV_ARGS['renderInstanceSegmentation'] = 'False'
-        self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID
-
-    @classmethod
-    def create_model(cls, **kwargs) -> nn.Module:
-        return ObjNavOnlyRGBModel(
-            action_space=gym.spaces.Discrete(
-                len(cls.TASK_TYPE.class_action_names())
-            ),
-            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
-            hidden_size=512,
-            visualize=cls.VISUALIZE
-        )
-
-    @classmethod
-    def tag(cls):
-        return cls.__name__
diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_robothor_rgb_only.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_robothor_rgb_only.py
deleted file mode 100644
index 46f4626..0000000
--- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_robothor_rgb_only.py
+++ /dev/null
@@ -1,153 +0,0 @@
-import platform
-import random
-from typing import Sequence, Union
-from typing_extensions import final
-
-import gym
-import numpy as np
-from torch import nn
-import copy
-
-from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
-from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
-
-
-from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment
-
-from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig
-from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler
-from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask
-from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS
-from manipulathor_utils.debugger_util import ForkedPdb
-
-from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor
-from allenact.base_abstractions.preprocessor import Preprocessor
-from allenact.utils.experiment_utils import Builder
-from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic
-
-from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL
-from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer
-from ithor_arm.ithor_arm_viz import BringObjImageVisualizer, TestMetricLogger
-
-
-class RoboTHORObjectNavClipResnet50RGBOnly(
-    ProcTHORObjectNavBaseConfig
-):
-    """An Object Navigation experiment configuration in iThor with RGB
-    input."""
-
-
-    TRAIN_SCENES = ROBOTHOR_TRAIN
-    TEST_SCENES = ROBOTHOR_VAL
-    OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ == 'robothor']))
-    OBJECT_TYPES.sort()
-    OBJECT_TYPES = ['AlarmClock', 'Apple', 'BaseballBat', 'BasketBall', 'Bed', 'Bowl', 'Chair', 
-                    'GarbageCan', 'HousePlant', 'Laptop', 'Mug', 'Sofa', 'SprayBottle', 'Television', 
-                    'Toilet', 'Vase'] # habitat challenge object - just get it to eval bc trained with goal sensor.
-    # ForkedPdb().set_trace()
-
-    random.shuffle(TRAIN_SCENES)
-    random.shuffle(TEST_SCENES)
-
-    NOISE_LEVEL = 0
-    distance_thr = 1.0 # match procthor config
-    mean = np.array([0.48145466, 0.4578275, 0.40821073])
-    stdev = np.array([0.26862954, 0.26130258, 0.27577711])
-    SENSORS = [
-        RGBSensorThor(
-            height=224,
-            width=224,
-            use_resnet_normalization=True,
-            mean=mean,
-            stdev=stdev,
-            uuid="rgb_lowres",
-        ),
-        GoalObjectTypeThorSensor(
-            object_types=OBJECT_TYPES,
-        ),
-    ]
-
-    MAX_STEPS = 500
-    if platform.system() == "Darwin":
-        MAX_STEPS = 50
-
-    TASK_SAMPLER = AllRoomsObjectNavTaskSampler
-    TASK_TYPE = StretchObjectNavTask
-    ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment
-
-    POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger]
-
-    NUM_PROCESSES = 56
-
-    # TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(9999)] # 9999 is all of train
-    # if platform.system() == "Darwin":
-    #     TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)]
-
-    # TEST_SCENES = [f'ProcTHOR{i}' for i in range(1)]
-    # random.shuffle(TRAIN_SCENES)
-
-    def __init__(self):
-        super().__init__() 
-        self.REWARD_CONFIG['goal_success_reward'] = 10.0 
-        self.REWARD_CONFIG['step_penalty'] = -0.01 
-        self.REWARD_CONFIG['failed_stop_reward'] = 0.0 
-        self.REWARD_CONFIG['reached_horizon_reward'] = 0.0
-        self.REWARD_CONFIG['shaping_weight'] = 1.0
-
-        self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS)
-        self.ENV_ARGS['visibilityDistance'] = self.distance_thr
-        self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice
-        # self.ENV_ARGS['scene'] = 'Procedural'
-        self.ENV_ARGS['renderInstanceSegmentation'] = 'False'
-        self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID
-        self.ENV_ARGS['allow_flipping'] = True
-
-
-    @classmethod
-    @final
-    def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
-        preprocessors = []
-        rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None)
-
-        if rgb_sensor is not None:
-            preprocessors.append(
-                ClipResNetPreprocessor(
-                    rgb_input_uuid=rgb_sensor.uuid,
-                    clip_model_type="RN50",
-                    pool=False,
-                    output_uuid="rgb_clip_resnet",
-                    visualize=cls.VISUALIZE
-                )
-            )
-
-        return preprocessors
-
-
-    @classmethod
-    @final
-    def create_model(cls, **kwargs) -> nn.Module:
-        has_rgb = any(isinstance(s, RGBSensorThor) for s in cls.SENSORS)
-        # has_depth = any(isinstance(s, DepthSensor) for s in cls.SENSORS)
-        has_depth=False
-
-        goal_sensor_uuid = next(
-            (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)),
-            None,
-        )
-
-        return ResnetTensorNavActorCritic(
-            action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())),
-            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
-            goal_sensor_uuid=goal_sensor_uuid,
-            rgb_resnet_preprocessor_uuid="rgb_clip_resnet" if has_rgb else None,
-            depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None,
-            hidden_size=512,
-            goal_dims=32,
-            add_prev_actions=True,
-        )
-
-    @classmethod
-    def tag(cls):
-        return cls.__name__
-
-    
diff --git a/manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py b/manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py
deleted file mode 100644
index d7bcfc2..0000000
--- a/manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py
+++ /dev/null
@@ -1,183 +0,0 @@
-from typing import Any, Dict, List, Optional, cast
-from typing_extensions import Literal
-
-import clip
-import gym
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from allenact.base_abstractions.preprocessor import Preprocessor
-from allenact.utils.misc_utils import prepare_locals_for_super
-from manipulathor_utils.debugger_util import ForkedPdb
-from utils.hacky_viz_utils import hacky_visualization
-from datetime import datetime
-
-
-class ClipResNetEmbedder(nn.Module):
-    def __init__(self, resnet, pool=True):
-        super().__init__()
-        self.model = resnet
-        self.pool = pool
-        self.eval()
-
-    def forward(self, x):
-        m = self.model.visual
-        with torch.no_grad():
-
-            def stem(x):
-                for conv, bn in [(m.conv1, m.bn1), (m.conv2, m.bn2), (m.conv3, m.bn3)]:
-                    x = m.relu(bn(conv(x)))
-                x = m.avgpool(x)
-                return x
-
-            x = x.type(m.conv1.weight.dtype)
-            x = stem(x)
-            x = m.layer1(x)
-            x = m.layer2(x)
-            x = m.layer3(x)
-            x = m.layer4(x)
-            if self.pool:
-                x = F.adaptive_avg_pool2d(x, (1, 1))
-                x = torch.flatten(x, 1)
-            return x
-
-class ClipResNetPreprocessor(Preprocessor):
-    """Preprocess RGB or depth image using a ResNet model with CLIP model
-    weights."""
-
-    def __init__(
-        self,
-        rgb_input_uuid: str,
-        clip_model_type: Literal["RN50", "RN50x16"],
-        pool: bool,
-        device: Optional[torch.device] = None,
-        device_ids: Optional[List[torch.device]] = None,
-        visualize=False, # TODO this is such a hack job
-        **kwargs: Any,
-    ):
-        assert clip_model_type in clip.available_models()
-
-        if clip_model_type == "RN50":
-            output_shape = (2048, 7, 7)
-        elif clip_model_type == "RN50x16":
-            output_shape = (3072, 7, 7)
-        else:
-            raise NotImplementedError(
-                f"Currently `clip_model_type` must be one of 'RN50' or 'RN50x16'"
-            )
-
-        if pool:
-            output_shape = output_shape[:1]
-
-        self.clip_model_type = clip_model_type
-
-        self.pool = pool
-
-        self.device = torch.device("cpu") if device is None else device
-        self.device_ids = device_ids or cast(
-            List[torch.device], list(range(torch.cuda.device_count()))
-        )
-        self._resnet: Optional[ClipResNetEmbedder] = None
-
-        low = -np.inf
-        high = np.inf
-        shape = output_shape
-
-        input_uuids = [rgb_input_uuid]
-        assert (
-            len(input_uuids) == 1
-        ), "resnet preprocessor can only consume one observation type"
-
-        observation_space = gym.spaces.Box(low=low, high=high, shape=shape)
-        self.starting_time = datetime.now().strftime("{}_%m_%d_%Y_%H_%M_%S_%f".format(self.__class__.__name__))
-        self.visualize = visualize
-
-        super().__init__(**prepare_locals_for_super(locals()))
-
-    @property
-    def resnet(self) -> ClipResNetEmbedder:
-        import clip
-
-        if self._resnet is None:
-            self._resnet = ClipResNetEmbedder(
-                clip.load(self.clip_model_type, device=self.device)[0], pool=self.pool
-            ).to(self.device)
-        return self._resnet
-
-    def to(self, device: torch.device) -> "ClipResNetPreprocessor":
-        self._resnet = self.resnet.to(device)
-        self.device = device
-        return self
-
-    def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:
-        x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2)  # bhwc -> bchw
-        # if self.visualize:
-        #     obs['rgb_lowres'] = x.permute(0,2,3,1).unsqueeze(0)
-        #     hacky_visualization(obs, base_directory_to_right_images=self.starting_time)
-        # If the input is depth, repeat it across all 3 channels
-        if x.shape[1] == 1:
-            x = x.repeat(1, 3, 1, 1)
-        x = self.resnet(x).float()
-        return x
-
-
-class ClipTextPreprocessor(Preprocessor):
-    def __init__(
-        self,
-        goal_sensor_uuid: str,
-        object_types: List[str],
-        device: Optional[torch.device] = None,
-        device_ids: Optional[List[torch.device]] = None,
-        **kwargs: Any,
-    ):
-        try:
-            import clip
-
-            self.clip = clip
-        except ImportError as _:
-            raise ImportError(
-                "Cannot `import clip` when instatiating `CLIPResNetPreprocessor`."
-                " Please install clip from the openai/CLIP git repository:"
-                "\n`pip install git+https://github.com/openai/CLIP.git@3b473b0e682c091a9e53623eebc1ca1657385717`"
-            )
-
-        output_shape = (1024,)
-
-        self.object_types = object_types
-
-        self.device = torch.device("cpu") if device is None else device
-        self.device_ids = device_ids or cast(
-            List[torch.device], list(range(torch.cuda.device_count()))
-        )
-
-        low = -np.inf
-        high = np.inf
-        shape = output_shape
-
-        observation_space = gym.spaces.Box(low=low, high=high, shape=shape)
-
-        input_uuids = [goal_sensor_uuid]
-
-        super().__init__(**prepare_locals_for_super(locals()))
-
-    @property
-    def text_encoder(self):
-        if self._clip_model is None:
-            self._clip_model = self.clip.load("RN50", device=self.device)[0]
-            self._clip_model.eval()
-        return self._clip_model.encode_text
-
-    def to(self, device: torch.device):
-        self.device = device
-        self._clip_model = None
-        return self
-
-    def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:
-        object_inds = obs[self.input_uuids[0]]
-        object_types = [self.object_types[ind] for ind in object_inds]
-        x = self.clip.tokenize([f"navigate to the {obj}" for obj in object_types]).to(
-            self.device
-        )
-        with torch.no_grad():
-            return self.text_encoder(x).float()
diff --git a/manipulathor_baselines/procthor_baselines/models/objnav_model_rgb_only.py b/manipulathor_baselines/procthor_baselines/models/objnav_model_rgb_only.py
deleted file mode 100644
index 91ef13c..0000000
--- a/manipulathor_baselines/procthor_baselines/models/objnav_model_rgb_only.py
+++ /dev/null
@@ -1,199 +0,0 @@
-"""Baseline models for use in the object navigation task.
-
-Object navigation is currently available as a Task in AI2-THOR and
-Facebook's Habitat.
-"""
-import platform
-from datetime import datetime
-from typing import Tuple, Optional
-
-import gym
-import torch
-from allenact.algorithms.onpolicy_sync.policy import (
-    ActorCriticModel,
-    LinearCriticHead,
-    DistributionType,
-    Memory,
-    ObservationType,
-)
-from allenact.base_abstractions.distributions import CategoricalDistr
-from allenact.base_abstractions.misc import ActorCriticOutput
-from allenact.embodiedai.models.basic_models import RNNStateEncoder
-from allenact.utils.model_utils import make_cnn, compute_cnn_output
-from gym.spaces.dict import Dict as SpaceDict
-from torch import nn
-
-from manipulathor_utils.debugger_util import ForkedPdb
-from utils.model_utils import LinearActorHeadNoCategory
-from utils.hacky_viz_utils import hacky_visualization
-
-
-class ObjNavOnlyRGBModel(ActorCriticModel[CategoricalDistr]):
-    """Baseline recurrent actor critic model for preddistancenav task.
-
-    # Attributes
-    action_space : The space of actions available to the agent. Currently only discrete
-        actions are allowed (so this space will always be of type `gym.spaces.Discrete`).
-    observation_space : The observation space expected by the agent. This observation space
-        should include (optionally) 'rgb' images and 'depth' images.
-    hidden_size : The hidden size of the GRU RNN.
-    object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal
-        object type.
-    """
-
-    def __init__(
-        self,
-        action_space: gym.spaces.Discrete,
-        observation_space: SpaceDict,
-        hidden_size=512,
-        obj_state_embedding_size=512,
-        trainable_masked_hidden_state: bool = False,
-        num_rnn_layers=1,
-        rnn_type="GRU",
-        teacher_forcing=1,
-        visualize=False,
-    ):
-        """Initializer.
-
-        See class documentation for parameter definitions.
-        """
-        super().__init__(action_space=action_space, observation_space=observation_space)
-        self.visualize = visualize
-
-        self._hidden_size = hidden_size
-        self.object_type_embedding_size = obj_state_embedding_size
-
-        # sensor_names = self.observation_space.spaces.keys()
-        network_args = {'input_channels': 3, 'layer_channels': [32, 64, 32], 'kernel_sizes': [(8, 8), (4, 4), (3, 3)], 'strides': [(4, 4), (2, 2), (1, 1)], 'paddings': [(0, 0), (0, 0), (0, 0)], 'dilations': [(1, 1), (1, 1), (1, 1)], 'output_height': 24, 'output_width': 24, 'output_channels': 512, 'flatten': True, 'output_relu': True}
-        self.full_visual_encoder = make_cnn(**network_args)
-
-        self.state_encoder = RNNStateEncoder(
-            512,
-            self._hidden_size,
-            trainable_masked_hidden_state=trainable_masked_hidden_state,
-            num_layers=num_rnn_layers,
-            rnn_type=rnn_type,
-        )
-
-        self.actor_pickup = LinearActorHeadNoCategory(self._hidden_size, action_space.n)
-        self.critic_pickup = LinearCriticHead(self._hidden_size)
-
-        self.train()
-        # self.detection_model.eval()
-
-        self.starting_time = datetime.now().strftime("{}_%m_%d_%Y_%H_%M_%S_%f".format(self.__class__.__name__))
-
-
-
-    @property
-    def recurrent_hidden_state_size(self) -> int:
-        """The recurrent hidden state size of the model."""
-        return self._hidden_size
-
-    @property
-    def num_recurrent_layers(self) -> int:
-        """Number of recurrent hidden layers."""
-        return self.state_encoder.num_recurrent_layers
-
-    def _recurrent_memory_specification(self):
-        return dict(
-            rnn=(
-                (
-                    ("layer", self.num_recurrent_layers),
-                    ("sampler", None),
-                    ("hidden", self.recurrent_hidden_state_size),
-                ),
-                torch.float32,
-            )
-        )
-
-    def forward(  # type:ignore
-        self,
-        observations: ObservationType,
-        memory: Memory,
-        prev_actions: torch.Tensor,
-        masks: torch.FloatTensor,
-    ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]:
-        """Processes input batched observations to produce new actor and critic
-        values. Processes input batched observations (along with prior hidden
-        states, previous actions, and masks denoting which recurrent hidden
-        states should be masked) and returns an `ActorCriticOutput` object
-        containing the model's policy (distribution over actions) and
-        evaluation of the current state (value).
-
-        # Parameters
-        observations : Batched input observations.
-        memory : `Memory` containing the hidden states from initial timepoints.
-        prev_actions : Tensor of previous actions taken.
-        masks : Masks applied to hidden states. See `RNNStateEncoder`.
-        # Returns
-        Tuple of the `ActorCriticOutput` and recurrent hidden state.
-        """
-
-        #we really need to switch to resnet now that visual features are actually important
-
-        # pickup_bool = observations["pickedup_object"]
-        # after_pickup = pickup_bool == 1
-
-        visual_observation = observations['rgb_lowres']
-
-        visual_observation_encoding = compute_cnn_output(self.full_visual_encoder, visual_observation)
-
-
-        # arm_distance_to_obj_source = observations['arm_point_nav_source']
-        # arm_distance_to_obj_destination = observations['arm_point_nav_destination']
-
-        # arm_distance_to_obj_source_embedding = self.pointnav_embedding(arm_distance_to_obj_source)
-        # arm_distance_to_obj_destination_embedding = self.pointnav_embedding(arm_distance_to_obj_destination)
-        # pointnav_embedding = arm_distance_to_obj_source_embedding
-        # pointnav_embedding[after_pickup] = arm_distance_to_obj_destination_embedding[after_pickup]
-
-        #TODO remove as soon as the bug is resolved
-        # assert not torch.any(torch.isinf(pointnav_embedding) + torch.isnan(pointnav_embedding)), 'pointnav_embedding is nan'
-        assert not torch.any(torch.isinf(visual_observation) + torch.isnan(visual_observation)), 'visual_observation is nan'
-
-        # arm_distance_to_obj = arm_distance_to_obj_source
-        # arm_distance_to_obj[after_pickup] = arm_distance_to_obj_destination[after_pickup]
-        # pointnav_embedding = self.pointnav_embedding(arm_distance_to_obj)
-
-        visual_observation_encoding = torch.cat([visual_observation_encoding], dim=-1)
-
-
-        x_out, rnn_hidden_states = self.state_encoder(
-            visual_observation_encoding, memory.tensor("rnn"), masks
-        )
-
-
-        # I think we need two model one for pick up and one for drop off
-
-        actor_out_pickup = self.actor_pickup(x_out)
-        critic_out_pickup = self.critic_pickup(x_out)
-
-
-        actor_out_final = actor_out_pickup
-        critic_out_final = critic_out_pickup
-
-        actor_out = CategoricalDistr(logits=actor_out_final)
-
-        actor_critic_output = ActorCriticOutput(
-            distributions=actor_out, values=critic_out_final, extras={}
-        )
-
-        memory = memory.set_tensor("rnn", rnn_hidden_states)
-
-        #TODO remove as soon as bug is resolved
-        actor_is_nan = torch.isinf(actor_out_final) + torch.isnan(actor_out_final)
-        if torch.any(actor_is_nan):
-            print('actor is nan', actor_is_nan.sum())
-            print('scene number', observations['scene_number'])
-
-        # TODO really bad design
-        if self.visualize:
-            hacky_visualization(observations, base_directory_to_right_images=self.starting_time)
-
-
-        return (
-            actor_critic_output,
-            memory,
-        )
-
diff --git a/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion.py b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion.py
new file mode 100644
index 0000000..6170c31
--- /dev/null
+++ b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion.py
@@ -0,0 +1,158 @@
+import platform
+import random
+
+import gym
+import torch
+from torch import nn
+
+
+from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_ithor_base import \
+    StretchBringObjectiThorBaseConfig
+from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_mixin_ddppo import \
+    StretchBringObjectMixInPPOConfig
+from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_mixin_simplegru import \
+    StretchBringObjectMixInSimpleGRUConfig
+from manipulathor_baselines.stretch_bring_object_baselines.models.stretch_pointnav_emul_model import StretchPointNavEmulModel
+from manipulathor_baselines.stretch_bring_object_baselines.models.stretch_real_pointnav_model import StretchRealPointNavModel
+from manipulathor_utils.debugger_util import ForkedPdb
+from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, KITCHEN_TRAIN, LIVING_ROOM_TRAIN, \
+    BEDROOM_TRAIN, ROBOTHOR_TRAIN, ROBOTHOR_VAL, BATHROOM_TEST, BATHROOM_TRAIN, BEDROOM_TEST, LIVING_ROOM_TEST, \
+    KITCHEN_TEST
+from utils.stretch_utils.stretch_bring_object_task_samplers import StretchDiverseBringObjectTaskSampler
+from utils.stretch_utils.stretch_bring_object_tasks import StretchExploreWiseRewardTask, \
+    StretchExploreWiseRewardTaskOnlyPickUp, StretchObjectNavTask
+from utils.stretch_utils.stretch_constants import STRETCH_ENV_ARGS, STRETCH_MANIPULATHOR_COMMIT_ID, INTEL_CAMERA_WIDTH
+from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchIntel, DepthSensorStretchIntel, \
+    RGBSensorStretchKinect, DepthSensorStretchKinect, AgentBodyPointNavSensor, AgentBodyPointNavEmulSensor, \
+    RGBSensorStretchKinectZero, \
+    DepthSensorStretchKinectZero, IntelRawDepthSensor, ArmPointNavEmulSensor, KinectRawDepthSensor, \
+    KinectNoisyObjectMask, IntelNoisyObjectMask, StretchPickedUpObjSensor
+from utils.stretch_utils.stretch_visualizer import StretchBringObjImageVisualizer
+
+
+class PointNavEmulStretchAllRooms(
+    StretchBringObjectiThorBaseConfig,
+    StretchBringObjectMixInPPOConfig,
+    StretchBringObjectMixInSimpleGRUConfig,
+):
+    """An Object Navigation experiment configuration in iThor with RGB
+    input."""
+    desired_screen_size = INTEL_CAMERA_WIDTH
+    NOISE_LEVEL = 0
+    distance_thr = 1.5 # is this a good number?
+
+    source_mask_sensor_intel = IntelNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='source', distance_thr=distance_thr, only_close_big_masks=True)
+    destination_mask_sensor_intel = IntelNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='destination', distance_thr=distance_thr, only_close_big_masks=True)
+    depth_sensor_intel = IntelRawDepthSensor()
+
+    source_mask_sensor_kinect = KinectNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='source', distance_thr=distance_thr, only_close_big_masks=True)
+    destination_mask_sensor_kinect = KinectNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='destination', distance_thr=distance_thr, only_close_big_masks=True)
+    depth_sensor_kinect = KinectRawDepthSensor()
+
+
+    SENSORS = [
+        RGBSensorStretchIntel(
+            height=desired_screen_size,
+            width=desired_screen_size,
+            use_resnet_normalization=True,
+            uuid="rgb_lowres",
+        ),
+        DepthSensorStretchIntel(height=desired_screen_size,width=desired_screen_size,use_normalization=True,uuid="depth_lowres",),
+        RGBSensorStretchKinect(
+            height=desired_screen_size,
+            width=desired_screen_size,
+            use_resnet_normalization=True,
+            uuid="rgb_lowres_arm",
+        ),
+        DepthSensorStretchKinect(
+            height=desired_screen_size,
+            width=desired_screen_size,
+            use_normalization=True,
+            uuid="depth_lowres_arm",
+        ),
+        StretchPickedUpObjSensor(),
+        AgentBodyPointNavEmulSensor(type='source', mask_sensor=source_mask_sensor_intel, depth_sensor=depth_sensor_intel),
+        AgentBodyPointNavEmulSensor(type='destination', mask_sensor=destination_mask_sensor_intel, depth_sensor=depth_sensor_intel),
+        ArmPointNavEmulSensor(type='source', mask_sensor=source_mask_sensor_kinect, depth_sensor=depth_sensor_kinect),
+        ArmPointNavEmulSensor(type='destination', mask_sensor=destination_mask_sensor_kinect, depth_sensor=depth_sensor_kinect),
+        source_mask_sensor_intel,
+        destination_mask_sensor_intel,
+        source_mask_sensor_kinect,
+        destination_mask_sensor_kinect,
+
+    ]
+
+    MAX_STEPS = 200
+
+    TASK_SAMPLER = StretchDiverseBringObjectTaskSampler
+    TASK_TYPE = StretchExploreWiseRewardTask
+
+    NUM_PROCESSES = 20
+
+
+    TRAIN_SCENES = KITCHEN_TRAIN + LIVING_ROOM_TRAIN + BEDROOM_TRAIN + BATHROOM_TRAIN
+    TEST_SCENES = KITCHEN_TEST + LIVING_ROOM_TEST + BEDROOM_TEST + BATHROOM_TEST
+    OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ != 'robothor']))
+
+
+    random.shuffle(TRAIN_SCENES)
+
+    def __init__(self):
+        super().__init__()
+        self.REWARD_CONFIG['exploration_reward'] = 0.1
+        self.REWARD_CONFIG['object_found'] = 1
+        # self.ENV_ARGS = STRETCH_ENV_ARGS
+        self.ENV_ARGS['visibilityDistance'] = self.distance_thr
+        self.ENV_ARGS['commit_id'] = STRETCH_MANIPULATHOR_COMMIT_ID
+        self.ENV_ARGS['renderInstanceSegmentation'] = True
+        
+        self.ENV_ARGS['motion_noise_type'] = 'habitat'
+        self.ENV_ARGS['motion_noise_args'] = dict()
+        self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1]
+        self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01]
+        self.ENV_ARGS['motion_noise_args']['effect_scale'] = .25
+
+    def test_task_sampler_args(self, **kwargs):
+        sampler_args = super(PointNavEmulStretchAllRooms, self).test_task_sampler_args(**kwargs)
+        if platform.system() == "Darwin":
+            pass
+        else:
+            for sensor_type in sampler_args['sensors']:
+                if isinstance(sensor_type, AgentBodyPointNavEmulSensor):
+                    sensor_type.device = torch.device(kwargs["devices"][0])
+
+            for sensor_type in sampler_args['sensors']:
+                if isinstance(sensor_type, ArmPointNavEmulSensor):
+                    sensor_type.device = torch.device(kwargs["devices"][0])
+
+        return sampler_args
+
+    def train_task_sampler_args(self, **kwargs):
+        sampler_args = super(PointNavEmulStretchAllRooms, self).train_task_sampler_args(**kwargs)
+        if platform.system() == "Darwin":
+            pass
+        else:
+
+            for sensor_type in sampler_args['sensors']:
+                if isinstance(sensor_type, AgentBodyPointNavEmulSensor):
+                    sensor_type.device = torch.device(kwargs["devices"][0])
+            for sensor_type in sampler_args['sensors']:
+                if isinstance(sensor_type, ArmPointNavEmulSensor):
+                    sensor_type.device = torch.device(kwargs["devices"][0])
+        return sampler_args
+
+
+    @classmethod
+    def create_model(cls, **kwargs) -> nn.Module:
+        return StretchPointNavEmulModel(
+            action_space=gym.spaces.Discrete(
+                len(cls.TASK_TYPE.class_action_names())
+            ),
+            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
+            hidden_size=512,
+            visualize=cls.VISUALIZE
+        )
+
+    @classmethod
+    def tag(cls):
+        return cls.__name__
diff --git a/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py
new file mode 100644
index 0000000..8f9ecec
--- /dev/null
+++ b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py
@@ -0,0 +1,159 @@
+import platform
+import random
+
+import gym
+import torch
+from torch import nn
+
+
+from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_ithor_base import \
+    StretchBringObjectiThorBaseConfig
+from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_mixin_ddppo import \
+    StretchBringObjectMixInPPOConfig
+from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_mixin_simplegru import \
+    StretchBringObjectMixInSimpleGRUConfig
+from manipulathor_baselines.stretch_bring_object_baselines.models.stretch_pointnav_emul_model import StretchPointNavEmulModel
+from manipulathor_baselines.stretch_bring_object_baselines.models.stretch_real_pointnav_model import StretchRealPointNavModel
+from manipulathor_utils.debugger_util import ForkedPdb
+from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, KITCHEN_TRAIN, LIVING_ROOM_TRAIN, \
+    BEDROOM_TRAIN, ROBOTHOR_TRAIN, ROBOTHOR_VAL, BATHROOM_TEST, BATHROOM_TRAIN, BEDROOM_TEST, LIVING_ROOM_TEST, \
+    KITCHEN_TEST
+from utils.stretch_utils.stretch_bring_object_task_samplers import StretchDiverseBringObjectTaskSampler
+from utils.stretch_utils.stretch_bring_object_tasks import StretchExploreWiseRewardTask, \
+    StretchExploreWiseRewardTaskOnlyPickUp, StretchObjectNavTask
+from utils.stretch_utils.stretch_constants import STRETCH_ENV_ARGS, STRETCH_MANIPULATHOR_COMMIT_ID, INTEL_CAMERA_WIDTH
+from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchIntel, DepthSensorStretchIntel, \
+    RGBSensorStretchKinect, DepthSensorStretchKinect, AgentBodyPointNavSensor, AgentBodyPointNavEmulSensor, \
+    RGBSensorStretchKinectZero, AgentBodyPointNavEmulSensorDeadReckoning, ArmPointNavEmulSensorDeadReckoning, \
+    DepthSensorStretchKinectZero, IntelRawDepthSensor, ArmPointNavEmulSensor, KinectRawDepthSensor, \
+    KinectNoisyObjectMask, IntelNoisyObjectMask, StretchPickedUpObjSensor
+from utils.stretch_utils.stretch_visualizer import StretchBringObjImageVisualizer
+
+
+class PointNavEmulStretchAllRooms(
+    StretchBringObjectiThorBaseConfig,
+    StretchBringObjectMixInPPOConfig,
+    StretchBringObjectMixInSimpleGRUConfig,
+):
+    """An Object Navigation experiment configuration in iThor with RGB
+    input."""
+    desired_screen_size = INTEL_CAMERA_WIDTH
+    NOISE_LEVEL = 0
+    distance_thr = 1.5 # is this a good number?
+
+    source_mask_sensor_intel = IntelNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='source', distance_thr=distance_thr, only_close_big_masks=True)
+    destination_mask_sensor_intel = IntelNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='destination', distance_thr=distance_thr, only_close_big_masks=True)
+    depth_sensor_intel = IntelRawDepthSensor()
+
+    source_mask_sensor_kinect = KinectNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='source', distance_thr=distance_thr, only_close_big_masks=True)
+    destination_mask_sensor_kinect = KinectNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='destination', distance_thr=distance_thr, only_close_big_masks=True)
+    depth_sensor_kinect = KinectRawDepthSensor()
+
+
+    SENSORS = [
+        RGBSensorStretchIntel(
+            height=desired_screen_size,
+            width=desired_screen_size,
+            use_resnet_normalization=True,
+            uuid="rgb_lowres",
+        ),
+        DepthSensorStretchIntel(height=desired_screen_size,width=desired_screen_size,use_normalization=True,uuid="depth_lowres",),
+        RGBSensorStretchKinect(
+            height=desired_screen_size,
+            width=desired_screen_size,
+            use_resnet_normalization=True,
+            uuid="rgb_lowres_arm",
+        ),
+        DepthSensorStretchKinect(
+            height=desired_screen_size,
+            width=desired_screen_size,
+            use_normalization=True,
+            uuid="depth_lowres_arm",
+        ),
+        StretchPickedUpObjSensor(),
+        AgentBodyPointNavEmulSensorDeadReckoning(type='source', mask_sensor=source_mask_sensor_intel, depth_sensor=depth_sensor_intel),
+        AgentBodyPointNavEmulSensorDeadReckoning(type='destination', mask_sensor=destination_mask_sensor_intel, depth_sensor=depth_sensor_intel),
+        ArmPointNavEmulSensorDeadReckoning(type='source', mask_sensor=source_mask_sensor_kinect, depth_sensor=depth_sensor_kinect),
+        ArmPointNavEmulSensorDeadReckoning(type='destination', mask_sensor=destination_mask_sensor_kinect, depth_sensor=depth_sensor_kinect),
+        source_mask_sensor_intel,
+        destination_mask_sensor_intel,
+        source_mask_sensor_kinect,
+        destination_mask_sensor_kinect,
+
+    ]
+
+    MAX_STEPS = 200
+
+    TASK_SAMPLER = StretchDiverseBringObjectTaskSampler
+    TASK_TYPE = StretchExploreWiseRewardTask
+
+    NUM_PROCESSES = 20
+
+
+    TRAIN_SCENES = KITCHEN_TRAIN + LIVING_ROOM_TRAIN + BEDROOM_TRAIN + BATHROOM_TRAIN
+    TEST_SCENES = KITCHEN_TEST + LIVING_ROOM_TEST + BEDROOM_TEST + BATHROOM_TEST
+    OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ != 'robothor']))
+
+
+    random.shuffle(TRAIN_SCENES)
+
+    def __init__(self):
+        super().__init__()
+        self.REWARD_CONFIG['exploration_reward'] = 0.1
+        self.REWARD_CONFIG['object_found'] = 1
+        self.REWARD_CONFIG['failed_action_penalty'] = -0.06 # double the current - stretch really likes to back up into things
+        # self.ENV_ARGS = STRETCH_ENV_ARGS
+        self.ENV_ARGS['visibilityDistance'] = self.distance_thr
+        self.ENV_ARGS['commit_id'] = STRETCH_MANIPULATHOR_COMMIT_ID
+        self.ENV_ARGS['renderInstanceSegmentation'] = True
+        
+        self.ENV_ARGS['motion_noise_type'] = 'habitat'
+        self.ENV_ARGS['motion_noise_args'] = dict()
+        self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1]
+        self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01]
+        self.ENV_ARGS['motion_noise_args']['effect_scale'] = .25
+
+    def test_task_sampler_args(self, **kwargs):
+        sampler_args = super(PointNavEmulStretchAllRooms, self).test_task_sampler_args(**kwargs)
+        if platform.system() == "Darwin":
+            pass
+        else:
+            for sensor_type in sampler_args['sensors']:
+                if isinstance(sensor_type, AgentBodyPointNavEmulSensorDeadReckoning):
+                    sensor_type.device = torch.device(kwargs["devices"][0])
+
+            for sensor_type in sampler_args['sensors']:
+                if isinstance(sensor_type, ArmPointNavEmulSensorDeadReckoning):
+                    sensor_type.device = torch.device(kwargs["devices"][0])
+
+        return sampler_args
+
+    def train_task_sampler_args(self, **kwargs):
+        sampler_args = super(PointNavEmulStretchAllRooms, self).train_task_sampler_args(**kwargs)
+        if platform.system() == "Darwin":
+            pass
+        else:
+
+            for sensor_type in sampler_args['sensors']:
+                if isinstance(sensor_type, AgentBodyPointNavEmulSensorDeadReckoning):
+                    sensor_type.device = torch.device(kwargs["devices"][0])
+            for sensor_type in sampler_args['sensors']:
+                if isinstance(sensor_type, ArmPointNavEmulSensorDeadReckoning):
+                    sensor_type.device = torch.device(kwargs["devices"][0])
+        return sampler_args
+
+
+    @classmethod
+    def create_model(cls, **kwargs) -> nn.Module:
+        return StretchPointNavEmulModel(
+            action_space=gym.spaces.Discrete(
+                len(cls.TASK_TYPE.class_action_names())
+            ),
+            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
+            hidden_size=512,
+            visualize=cls.VISUALIZE
+        )
+
+    @classmethod
+    def tag(cls):
+        return cls.__name__
diff --git a/manipulathor_baselines/stretch_bring_object_baselines/models/stretch_pointnav_emul_model.py b/manipulathor_baselines/stretch_bring_object_baselines/models/stretch_pointnav_emul_model.py
index 47290bf..0e63d26 100644
--- a/manipulathor_baselines/stretch_bring_object_baselines/models/stretch_pointnav_emul_model.py
+++ b/manipulathor_baselines/stretch_bring_object_baselines/models/stretch_pointnav_emul_model.py
@@ -182,6 +182,7 @@ def forward(  # type:ignore
         visual_observation_arm = torch.cat([observations['depth_lowres_arm'], observations['rgb_lowres_arm'], arm_mask], dim=-1).float()
         visual_observation_arm = check_for_nan_visual_observations(visual_observation_arm)
         visual_observation_encoding_arm = compute_cnn_output(self.full_visual_encoder_arm, visual_observation_arm)
+        # ForkedPdb().set_trace()
 
 
         # arm_distance_to_obj_source = observations['point_nav_real_source'].copy()
diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py
new file mode 100644
index 0000000..396f0a0
--- /dev/null
+++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py
@@ -0,0 +1,142 @@
+import platform
+from typing import Sequence, Union
+
+from torch import nn
+import torch
+import yaml
+
+from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
+from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel, RGBSensorStretchKinectBigFov
+from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
+
+
+from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment
+
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.obj_nav_base_config import ObjectNavBaseConfig
+from utils.stretch_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler
+from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask
+from manipulathor_utils.debugger_util import ForkedPdb
+
+from manipulathor_baselines.stretch_object_nav_baselines.models.clip_resnet_ncamera_preprocess_mixin import \
+    ClipResNetPreprocessNCameraGRUActorCriticMixin
+from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
+from allenact.base_abstractions.preprocessor import Preprocessor
+from allenact.utils.experiment_utils import Builder
+from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer
+from ithor_arm.ithor_arm_viz import TestMetricLogger
+
+
+
+class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV(
+    ObjectNavBaseConfig
+):
+    """An Object Navigation experiment configuration in iThor with RGB
+    input."""
+
+    TRAIN_SCENES = [
+        "FloorPlan{}_physics".format(str(i))
+        for i in range(1, 20 + 1)
+    ]
+    VALID_SCENES = [
+        "FloorPlan{}_physics".format(str(i))
+        for i in range(21, 26)
+    ]
+    TEST_SCENES = [
+        "FloorPlan{}_physics".format(str(i))
+        for i in range(26, 31)
+    ]
+
+    ALL_SCENES = TRAIN_SCENES + TEST_SCENES + VALID_SCENES
+
+    WHICH_AGENT = 'stretch'
+
+    # OBJECT_TYPES = tuple(sorted(TRAIN_OBJECTS))
+    with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f:
+        OBJECT_TYPES=yaml.safe_load(f)
+
+    SENSORS = [
+        RGBSensorThor(
+            height=ObjectNavBaseConfig.SCREEN_SIZE,
+            width=ObjectNavBaseConfig.SCREEN_SIZE,
+            use_resnet_normalization=True,
+            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+            uuid="rgb_lowres",
+        ),
+        RGBSensorStretchKinectBigFov(
+            height=ObjectNavBaseConfig.SCREEN_SIZE,
+            width=ObjectNavBaseConfig.SCREEN_SIZE,
+            use_resnet_normalization=True,
+            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+            uuid="rgb_lowres_arm",
+        ),
+        GoalObjectTypeThorSensor(
+            object_types=OBJECT_TYPES,
+        ),
+    ]
+
+    if platform.system() == "Darwin":
+        MAX_STEPS = 100
+        # SENSORS += [ #TODO FIX ORDER HERE
+        #     RGBSensorStretchKinectBigFov(
+        #     height=ObjectNavBaseConfig.SCREEN_SIZE,
+        #     width=ObjectNavBaseConfig.SCREEN_SIZE,
+        #     use_resnet_normalization=True,
+        #     mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+        #     stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+        #         uuid="rgb_lowres_arm_only_viz",
+        #     ),
+        #     RGBSensorThor(
+        #     height=ObjectNavBaseConfig.SCREEN_SIZE,
+        #     width=ObjectNavBaseConfig.SCREEN_SIZE,
+        #     use_resnet_normalization=True,
+        #     mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+        #     stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+        #         uuid="rgb_lowres_only_viz",
+        #     ),
+        # ]
+
+    TASK_SAMPLER = AllRoomsObjectNavTaskSampler
+    TASK_TYPE = StretchObjectNavTask
+    ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment
+    POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger]
+
+    TEST_GPU_IDS = list(range(torch.cuda.device_count()))
+    NUMBER_OF_TEST_PROCESS = 8
+
+    NUM_PROCESSES = 40
+    CLIP_MODEL_TYPE = "RN50"
+
+
+    def __init__(self):
+        super().__init__() 
+        self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice
+        
+        self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin(
+            sensors=self.SENSORS,
+            clip_model_type=self.CLIP_MODEL_TYPE,
+            screen_size=self.SCREEN_SIZE,
+        )
+        self.REWARD_CONFIG['shaping_weight'] = 0.0
+        self.REWARD_CONFIG['exploration_reward'] = 0.05
+        self.REWARD_CONFIG['got_stuck_penalty'] = 0.0
+        self.REWARD_CONFIG['failed_action_penalty'] = -0.5
+        self.ENV_ARGS['renderInstanceSegmentation'] = True
+
+    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
+        return self.preprocessing_and_model.preprocessors()
+
+    
+    def create_model(self, **kwargs) -> nn.Module:
+        return self.preprocessing_and_model.create_model(
+            num_actions=len(self.TASK_TYPE.class_action_names()), **kwargs,
+            visualize=self.VISUALIZE
+        )
+
+
+    @classmethod
+    def tag(cls):
+        return cls.TASK_TYPE.__name__ + '-RGB-2Camera-iTHOR' + '-' +  cls.WHICH_AGENT
+
+    
diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py
new file mode 100644
index 0000000..9c6ccfe
--- /dev/null
+++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py
@@ -0,0 +1,397 @@
+from abc import ABC
+import platform
+from math import ceil
+from typing import Optional, Sequence, Dict, Any, List, Union
+
+import gym
+import numpy as np
+import torch
+import torch.optim as optim
+
+from allenact.base_abstractions.experiment_config import ExperimentConfig
+from allenact.base_abstractions.task import TaskSampler
+import ai2thor.fifo_server
+
+from allenact.utils.experiment_utils import (
+    Builder,
+    PipelineStage,
+    TrainingPipeline,
+    TrainingSettings,
+    evenly_distribute_count_into_bins
+)
+from allenact.base_abstractions.experiment_config import MachineParams
+from allenact.algorithms.onpolicy_sync.losses import PPO
+from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
+
+from allenact.base_abstractions.sensor import Sensor
+from allenact.base_abstractions.preprocessor import Preprocessor
+from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
+from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor
+
+from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer
+from ithor_arm.ithor_arm_viz import TestMetricLogger
+
+from utils.stretch_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler
+from utils.stretch_utils.stretch_object_nav_tasks import ObjectNavTask
+from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS, UPDATED_PROCTHOR_COMMIT_ID
+
+LOCOBOT_ENV_ARGS = dict(
+    gridSize=0.25,
+    width=224,
+    height=224,
+    visibilityDistance=1.0,
+    agentMode="locobot",
+    fieldOfView=63.453048374758716,
+    rotateStepDegrees=30.0,
+    snapToGrid=False,
+    renderInstanceSegmentation=False,
+    renderDepthImage=False,
+    commit_id=UPDATED_PROCTHOR_COMMIT_ID
+)
+
+STRETCH_ENV_ARGS = dict(
+    gridSize=0.25,
+    width=224,
+    height=224,
+    visibilityDistance=3.0,
+    agentMode='stretch',
+    fieldOfView=69,
+    agentControllerType="mid-level",
+    server_class=ai2thor.fifo_server.FifoServer,
+    snapToGrid=False,
+    useMassThreshold=True,
+    massThreshold=10,
+    autoSimulation=False,
+    autoSyncTransforms=True,
+    renderInstanceSegmentation=False,
+    renderDepthImage=False,
+    commit_id=UPDATED_PROCTHOR_COMMIT_ID, #### TODO not sure if this works for stretch
+    horizon_init=15
+)
+
+
+class ObjectNavBaseConfig(ExperimentConfig, ABC):
+    """The base config for ProcTHOR ObjectNav experiments."""
+
+    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
+    SENSORS: Optional[Sequence[Sensor]] = None
+
+    # STEP_SIZE = 0.25
+    # ROTATION_DEGREES = 30.0
+    # VISIBILITY_DISTANCE = 1.0
+    # STOCHASTIC = False
+
+    VISUALIZE = False
+    if platform.system() == "Darwin":
+        VISUALIZE = True
+
+    TRAIN_DEVICES = (
+        tuple(range(torch.cuda.device_count()))
+        if torch.cuda.is_available()
+        else (torch.device("cpu"),)
+    )
+    VAL_DEVICES = (
+        (torch.cuda.device_count() - 1,)
+        if torch.cuda.is_available()
+        else (torch.device("cpu"),)
+    )
+    TEST_DEVICES = (
+        tuple(range(torch.cuda.device_count()))
+        if torch.cuda.is_available()
+        else (torch.device("cpu"),)
+    )
+    distributed_nodes = 1
+
+    NUM_PROCESSES: Optional[int] = None
+    NUMBER_OF_TEST_PROCESS: Optional[int] = None
+    NUMBER_OF_VALID_PROCESS: Optional[int] = 1
+    TRAIN_GPU_IDS = list(range(torch.cuda.device_count()))
+    SAMPLER_GPU_IDS = TRAIN_GPU_IDS
+    VALID_GPU_IDS = []
+    TEST_GPU_IDS = []
+
+    TRAIN_SCENES: str = None
+    VALID_SCENES: str = None
+    TEST_SCENES: str = None
+    VALID_SAMPLES_IN_SCENE = 1
+    TEST_SAMPLES_IN_SCENE = 1
+    OBJECT_TYPES: Optional[Sequence[str]] = None
+
+    WHICH_AGENT = None
+
+    CAMERA_WIDTH = 224
+    CAMERA_HEIGHT = 224
+    SCREEN_SIZE = 224
+
+    POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger]
+
+    TASK_SAMPLER = AllRoomsObjectNavTaskSampler
+    TASK_TYPE = ObjectNavTask
+    DISTANCE_TYPE = "l2"  # "geo"  # Can be "geo" or "l2"
+    MAX_STEPS = 500
+
+    def __init__(self):
+        # super().__init__()
+        self.REWARD_CONFIG = {
+            "step_penalty": -0.01,
+            "goal_success_reward": 10.0,
+            "reached_horizon_reward": 0.0,
+            "failed_stop_reward": 0.0,
+            "shaping_weight": 1.0, 
+            "positive_only_reward": False,
+        } # shaping weight removed for eval in task sampler
+        if self.WHICH_AGENT == 'locobot':
+            self.ENV_ARGS = LOCOBOT_ENV_ARGS
+        elif self.WHICH_AGENT == 'stretch':
+            self.ENV_ARGS = STRETCH_ENV_ARGS
+        else:
+            raise NotImplementedError
+
+    
+    @classmethod
+    def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
+        return tuple()
+
+    @staticmethod
+    def _partition_inds(n: int, num_parts: int):
+        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
+            np.int32
+        )
+
+    @classmethod
+    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
+        from datetime import datetime
+
+        now = datetime.now()
+
+        exp_name_w_time = cls.__name__ + "_" + now.strftime("%m_%d_%Y_%H_%M_%S_%f")
+        if cls.VISUALIZE:
+            visualizers = [
+                viz(exp_name=exp_name_w_time) for viz in cls.POTENTIAL_VISUALIZERS
+            ]
+            kwargs["visualizers"] = visualizers
+        kwargs["objects"] = cls.OBJECT_TYPES
+        kwargs["task_type"] = cls.TASK_TYPE
+        kwargs["exp_name"] = exp_name_w_time
+        return cls.TASK_SAMPLER(**kwargs)
+
+    def _get_sampler_args_for_scene_split(
+        self,
+        scenes: List[str],
+        process_ind: int,
+        total_processes: int,
+        seeds: Optional[List[int]] = None,
+        deterministic_cudnn: bool = False,
+        devices: Optional[List[int]] = None
+    ) -> Dict[str, Any]:
+        if total_processes > len(scenes):  # oversample some scenes -> bias
+            if total_processes % len(scenes) != 0:
+                print(
+                    "Warning: oversampling some of the scenes to feed all processes."
+                    " You can avoid this by setting a number of workers divisible by the number of scenes"
+                )
+            scenes = scenes * int(ceil(total_processes / len(scenes)))
+            scenes = scenes[: total_processes * (len(scenes) // total_processes)]
+        else:
+            if len(scenes) % total_processes != 0:
+                print(
+                    "Warning: oversampling some of the scenes to feed all processes."
+                    " You can avoid this by setting a number of workers divisor of the number of scenes"
+                )
+        inds = self._partition_inds(len(scenes), total_processes)
+
+        out = {
+            "scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
+            "env_args": self.ENV_ARGS,
+            "max_steps": self.MAX_STEPS,
+            "sensors": self.SENSORS,
+            "action_space": gym.spaces.Discrete(
+                len(self.TASK_TYPE.class_action_names())
+            ),
+            "seed": seeds[process_ind] if seeds is not None else None,
+            "deterministic_cudnn": deterministic_cudnn,
+            "rewards_config": self.REWARD_CONFIG,
+        }
+        
+        x_display = (("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None)
+        out['env_args']["x_display"] = x_display
+        return out
+
+    def train_task_sampler_args(
+        self,
+        process_ind: int,
+        total_processes: int,
+        devices: Optional[List[int]] = None,
+        seeds: Optional[List[int]] = None,
+        deterministic_cudnn: bool = False,
+    ) -> Dict[str, Any]:
+        res = self._get_sampler_args_for_scene_split(
+            self.TRAIN_SCENES,
+            process_ind,
+            total_processes,
+            seeds=seeds,
+            deterministic_cudnn=deterministic_cudnn,
+            devices=devices
+        )
+        res["scene_period"] = "manual"
+        res["sampler_mode"] = "train"
+
+        return res
+
+    def valid_task_sampler_args(
+        self,
+        process_ind: int,
+        total_processes: int,
+        devices: Optional[List[int]],
+        seeds: Optional[List[int]] = None,
+        deterministic_cudnn: bool = False,
+    ) -> Dict[str, Any]:
+        res = self._get_sampler_args_for_scene_split(
+            self.VALID_SCENES,
+            process_ind,
+            total_processes,
+            seeds=seeds,
+            deterministic_cudnn=deterministic_cudnn,
+            devices=devices
+        )
+        res["scene_period"] = self.VALID_SAMPLES_IN_SCENE
+        res["sampler_mode"] = "val"
+        res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"])
+        return res
+
+    def test_task_sampler_args(
+        self,
+        process_ind: int,
+        total_processes: int,
+        devices: Optional[List[int]],
+        seeds: Optional[List[int]] = None,
+        deterministic_cudnn: bool = False,
+    ) -> Dict[str, Any]:
+        res = self._get_sampler_args_for_scene_split(
+            self.TEST_SCENES,
+            process_ind,
+            total_processes,
+            seeds=seeds,
+            deterministic_cudnn=deterministic_cudnn,
+            devices=devices
+        )
+        res["scene_period"] = self.TEST_SAMPLES_IN_SCENE
+        res["sampler_mode"] = "test"
+        return res
+        
+
+    def training_pipeline(self, **kwargs):
+        log_interval_small = (
+            self.distributed_nodes*self.NUM_PROCESSES * 32 * 10
+            if torch.cuda.is_available
+            else 1
+        )
+        log_interval_medium = (
+            self.distributed_nodes*self.NUM_PROCESSES * 64 * 5
+            if torch.cuda.is_available
+            else 1
+        )
+        log_interval_large = (
+            self.distributed_nodes*self.NUM_PROCESSES * 128 * 5
+            if torch.cuda.is_available
+            else 1
+        )
+
+        batch_steps_0 = int(10e6)
+        batch_steps_1 = int(10e6)
+        batch_steps_2 = int(1e9) - batch_steps_1 - batch_steps_0
+
+        return TrainingPipeline(
+            save_interval=2_000_000,
+            metric_accumulate_interval=10_000,
+            optimizer_builder=Builder(optim.Adam, dict(lr=0.0003)),
+            num_mini_batch=1,
+            update_repeats=4,
+            max_grad_norm=0.5,
+            num_steps=128,
+            named_losses={"ppo_loss": PPO(**PPOConfig)},
+            gamma=0.99,
+            use_gae=True,
+            gae_lambda=0.95,
+            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
+            pipeline_stages=[
+                PipelineStage(
+                    loss_names=["ppo_loss"],
+                    max_stage_steps=batch_steps_0,
+                    training_settings=TrainingSettings(
+                        num_steps=32, metric_accumulate_interval=log_interval_small
+                    ),
+                ),
+                PipelineStage(
+                    loss_names=["ppo_loss"],
+                    max_stage_steps=batch_steps_1,
+                    training_settings=TrainingSettings(
+                        num_steps=64, metric_accumulate_interval=log_interval_medium
+                    ),
+                ),
+                PipelineStage(
+                    loss_names=["ppo_loss"],
+                    max_stage_steps=batch_steps_2,
+                    training_settings=TrainingSettings(
+                        num_steps=128, metric_accumulate_interval=log_interval_large
+                    ),
+                ),
+            ],
+        )
+    
+    def machine_params(self, mode="train", **kwargs):
+        sampler_devices: Sequence[int] = []
+        if mode == "train":
+            workers_per_device = 1
+            gpu_ids = (
+                []
+                if not torch.cuda.is_available()
+                else self.TRAIN_GPU_IDS * workers_per_device
+            )
+            nprocesses = (
+                1
+                if not torch.cuda.is_available()
+                else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids))
+            )
+            sampler_devices = self.SAMPLER_GPU_IDS
+        elif mode == "valid":
+            nprocesses = self.NUMBER_OF_VALID_PROCESS
+            gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS
+        elif mode == "test":
+            # nprocesses = self.NUMBER_OF_TEST_PROCESS if torch.cuda.is_available() else 1
+            gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS
+            nprocesses = (
+                1
+                if not torch.cuda.is_available()
+                else evenly_distribute_count_into_bins(self.NUMBER_OF_TEST_PROCESS, len(gpu_ids))
+            )
+
+            # print('Test Mode', gpu_ids, 'because cuda is',torch.cuda.is_available(), 'number of workers', nprocesses)
+        else:
+            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
+
+        sensors = [*self.SENSORS]
+        if mode != "train":
+            sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)]
+
+        sensor_preprocessor_graph = (
+            SensorPreprocessorGraph(
+                source_observation_spaces=SensorSuite(sensors).observation_spaces,
+                preprocessors=self.preprocessors(),
+            )
+            if mode == "train"
+            or (
+                (isinstance(nprocesses, int) and nprocesses > 0)
+                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
+            )
+            else None
+        )
+
+        # remove
+        # print('MACHINE PARAM', 'nprocesses',nprocesses,'devices',gpu_ids,'sampler_devices',sampler_devices,'mode = train',  mode == "train",'gpu ids', gpu_ids,)  # ignored with > 1 gpu_ids)
+        return MachineParams(nprocesses=nprocesses,
+        devices=gpu_ids,
+        sampler_devices=sampler_devices if mode == "train" else gpu_ids,  # ignored with > 1 gpu_ids
+        sensor_preprocessor_graph=sensor_preprocessor_graph)
+
+
diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py
new file mode 100644
index 0000000..ed96bf4
--- /dev/null
+++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py
@@ -0,0 +1,62 @@
+import platform
+import yaml
+
+from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel
+from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
+
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide \
+    import ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV
+from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
+
+
+class ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV(
+    ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV
+):
+    """An Object Navigation experiment configuration in iThor with RGB
+    input."""
+
+    with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f:
+        OBJECT_TYPES=yaml.safe_load(f)
+        
+
+    SENSORS = [
+        RGBSensorStretchIntel(
+            height=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+            width=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+            use_resnet_normalization=True,
+            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+            uuid="rgb_lowres",
+        ),
+        RGBSensorStretchKinect(
+            height=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+            width=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+            use_resnet_normalization=True,
+            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+            uuid="rgb_lowres_arm",
+        ),
+        GoalObjectTypeThorSensor(
+            object_types=OBJECT_TYPES,
+        ),
+    ]
+
+    if platform.system() == "Darwin":
+        SENSORS += [
+            RGBSensorStretchKinect(
+            height=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+            width=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+            use_resnet_normalization=True,
+            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+                uuid="rgb_lowres_arm_only_viz",
+            ),
+            RGBSensorStretchIntel(
+            height=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+            width=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+            use_resnet_normalization=True,
+            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+                uuid="rgb_lowres_only_viz",
+            ),
+        ]
diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py
new file mode 100644
index 0000000..f25bf23
--- /dev/null
+++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py
@@ -0,0 +1,96 @@
+import platform
+import yaml
+
+from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
+from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchKinectBigFov
+from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
+
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only \
+    import ProcTHORObjectNavClipResnet50RGBOnly
+from utils.stretch_utils.stretch_object_nav_tasks import \
+    StretchObjectNavTaskSegmentationSuccess, StretchObjectNavTaskSegmentationSuccessActionFail, ExploreWiseObjectNavTask
+from manipulathor_utils.debugger_util import ForkedPdb
+
+
+from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
+
+
+
+class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV(
+    ProcTHORObjectNavClipResnet50RGBOnly
+):
+    """An Object Navigation experiment configuration in iThor with RGB
+    input."""
+
+    
+    with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f:
+        OBJECT_TYPES=yaml.safe_load(f)
+    
+    # TASK_TYPE = StretchObjectNavTaskSegmentationSuccess
+    # TASK_TYPE = StretchObjectNavTaskSegmentationSuccessActionFail
+    TASK_TYPE = ExploreWiseObjectNavTask
+
+    SENSORS = [
+        RGBSensorThor(
+            height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE,
+            width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE,
+            use_resnet_normalization=True,
+            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+            uuid="rgb_lowres",
+        ),
+
+        RGBSensorStretchKinectBigFov(
+            height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE,
+            width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE,
+            use_resnet_normalization=True,
+            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+            uuid="rgb_lowres_arm",
+        ),
+
+        GoalObjectTypeThorSensor(
+            object_types=OBJECT_TYPES,
+        ),
+    ]
+
+    # NUM_PROCESSES = 40
+    # NUM_TRAIN_HOUSES = None
+
+    if platform.system() == "Darwin":
+        SENSORS += [
+            RGBSensorStretchKinectBigFov(
+                height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE,
+                width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE,
+                use_resnet_normalization=True,
+                mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+                stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+                uuid="rgb_lowres_arm_only_viz",
+            ),
+            RGBSensorThor(
+                height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE,
+                width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE,
+                use_resnet_normalization=True,
+                mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+                stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+                uuid="rgb_lowres_only_viz",
+            ),
+        ]
+
+    def __init__(self):
+        super().__init__()
+        self.ENV_ARGS['renderInstanceSegmentation'] = True
+        assert (
+                self.WHICH_AGENT == 'stretch' # this only works for stretch
+                and self.ENV_ARGS['allow_flipping'] == False # not with 2-camera
+        )
+        self.REWARD_CONFIG['shaping_weight'] = 0.0
+        self.REWARD_CONFIG['exploration_reward'] = 0.1
+        self.REWARD_CONFIG['got_stuck_penalty'] = 0.0
+        self.REWARD_CONFIG['failed_action_penalty'] = -0.5
+
+    @classmethod
+    def tag(cls):
+        return cls.TASK_TYPE.__name__ + '-RGB-2Camera-ProcTHOR' + '-' +  cls.WHICH_AGENT
+
+    
diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py
similarity index 93%
rename from manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib.py
rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py
index 2dfc866..9305f61 100644
--- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib.py
+++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py
@@ -1,6 +1,6 @@
 import torch
 
-from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_2camera_procthor_wide import \
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide import \
     ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV
 
 
diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py
new file mode 100644
index 0000000..c9b2c40
--- /dev/null
+++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py
@@ -0,0 +1,25 @@
+# import platform
+# import yaml
+
+# from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel
+# from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
+
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide \
+    import ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_narrow \
+    import ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV
+    # from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
+
+
+class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVNoisy(
+    ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV):
+
+    def __init__(self):
+        super().__init__()
+        self.ENV_ARGS['motion_noise_type'] = 'habitat'
+        self.ENV_ARGS['motion_noise_args'] = dict()
+        self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1]
+        self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01]
+        self.ENV_ARGS['motion_noise_args']['effect_scale'] = .1 
+
+        self.ENV_ARGS['returnToStart'] = False # 
\ No newline at end of file
diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py
new file mode 100644
index 0000000..4c18670
--- /dev/null
+++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py
@@ -0,0 +1,181 @@
+from ast import For
+from typing import Any, Dict, List, Optional, Sequence
+from typing_extensions import Literal, final
+import platform
+
+import datasets
+import numpy as np
+
+from allenact.base_abstractions.preprocessor import (
+    Preprocessor,
+    SensorPreprocessorGraph,
+)
+from allenact.base_abstractions.sensor import (
+    Sensor,
+    Union,
+)
+from allenact.base_abstractions.task import TaskSampler
+from allenact.utils.experiment_utils import (
+    Builder,
+)
+
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.obj_nav_base_config import ObjectNavBaseConfig
+from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler
+from utils.stretch_utils.stretch_object_nav_tasks import ObjectNavTask
+from utils.procthor_utils.procthor_helper import PROCTHOR_INVALID_SCENES
+from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, UPDATED_PROCTHOR_COMMIT_ID
+
+from manipulathor_utils.debugger_util import ForkedPdb
+
+
+
+class ProcTHORObjectNavBaseConfig(ObjectNavBaseConfig):
+    """The base config for ProcTHOR ObjectNav experiments."""
+
+    TASK_SAMPLER = ProcTHORObjectNavTaskSampler
+    TASK_TYPE = ObjectNavTask
+
+    OBJECT_TYPES: Optional[Sequence[str]]
+
+    SENSORS: Sequence[Sensor] = []
+
+    DISTANCE_TYPE = "l2"  # "geo"  # Can be "geo" or "l2"
+
+    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[
+        int
+    ] = 20 # default config/main.yaml
+    RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = (
+        -1
+    )  # Should be > 0 if `ADVANCE_SCENE_ROLLOUT_PERIOD` is `None`
+
+    if platform.system() == "Darwin":
+        RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = 1
+
+    RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 1 # change from 100
+
+    TEST_ON_VALIDATION = False
+
+    HOUSE_DATASET = datasets.load_dataset(
+        "allenai/houses", use_auth_token=True, ignore_verifications=True
+    ) # this loads even for inherited classes where is unused - not necessarily a problem 
+
+    NUM_TRAIN_HOUSES = None # none means all
+
+    @classmethod
+    def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
+        return tuple()
+
+    @classmethod
+    def make_sampler_fn(
+        cls, **kwargs
+    ) -> TaskSampler:
+        from datetime import datetime
+
+        now = datetime.now()
+
+        exp_name_w_time = cls.__name__ + "_" + now.strftime("%m_%d_%Y_%H_%M_%S_%f")
+        if cls.VISUALIZE:
+            visualizers = [
+                viz(exp_name=exp_name_w_time) for viz in cls.POTENTIAL_VISUALIZERS
+            ]
+            kwargs["visualizers"] = visualizers
+        kwargs["exp_name"] = exp_name_w_time
+
+        return cls.TASK_SAMPLER(**kwargs)
+
+    @staticmethod
+    def _partition_inds(n: int, num_parts: int):
+        return np.round(
+            np.linspace(start=0, stop=n, num=num_parts + 1, endpoint=True)
+        ).astype(np.int32)
+
+    def _get_sampler_args_for_scene_split(
+        self,
+        houses: datasets.Dataset,
+        mode: Literal["train", "eval"],
+        resample_same_scene_freq: int,
+        process_ind: int,
+        max_tasks: Optional[int],
+        allow_flipping: Optional[bool] = None,
+        devices: Optional[List[int]] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+
+        house_inds = list(range(len(houses)))
+        if mode is "train":
+            scenes = [str(h) for h in house_inds if h not in PROCTHOR_INVALID_SCENES]
+        else:
+            scenes = [str(h) for h in house_inds]
+
+        general_args = super()._get_sampler_args_for_scene_split(scenes=scenes,                                                               # scenes=scenes,
+                                                                process_ind=process_ind,
+                                                                devices=devices,
+                                                                **kwargs)
+        
+        x_display = (("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None)
+
+        procthor_specific = {
+            "sampler_mode": mode,
+            "houses": houses,
+            "house_inds": list(map(int,general_args['scenes'])),
+            "process_ind": process_ind,
+            "target_object_types": self.OBJECT_TYPES,
+            "max_tasks": max_tasks if max_tasks is not None else len(general_args['scenes']),
+            "distance_type": self.DISTANCE_TYPE,
+            "resample_same_scene_freq": resample_same_scene_freq,
+        }
+        del general_args['scenes']
+        out = {**general_args,**procthor_specific}
+
+        out["task_type"] = self.TASK_TYPE
+
+        out["env_args"]["x_display"] = x_display
+        out["env_args"]['commit_id'] = UPDATED_PROCTHOR_COMMIT_ID#PROCTHOR_COMMIT_ID
+        out["env_args"]['scene'] = 'Procedural'
+        if allow_flipping is not None:
+            out["env_args"]['allow_flipping'] = allow_flipping
+        
+        return out
+
+
+    def train_task_sampler_args(
+        self,
+        **kwargs
+    ) -> Dict[str, Any]:
+        train_houses = self.HOUSE_DATASET["train"]
+
+        if self.NUM_TRAIN_HOUSES:
+            train_houses = train_houses.select(range(self.NUM_TRAIN_HOUSES))
+
+        return self._get_sampler_args_for_scene_split(
+            houses=train_houses,
+            mode="train",
+            max_tasks=float("inf"),
+            resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN,
+            **kwargs,
+        )
+
+    def valid_task_sampler_args(self, **kwargs) -> Dict[str, Any]:
+        val_houses = self.HOUSE_DATASET["validation"]
+        return self._get_sampler_args_for_scene_split(
+            houses=val_houses.select(range(100)),
+            mode="eval",
+            max_tasks=10,
+            resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE,
+            allow_flipping=False,
+            **kwargs,
+        )
+
+    def test_task_sampler_args(self, **kwargs) -> Dict[str, Any]:
+        if self.TEST_ON_VALIDATION:
+            return self.valid_task_sampler_args(**kwargs)
+        test_houses = self.HOUSE_DATASET["test"]
+        return self._get_sampler_args_for_scene_split(
+            houses=test_houses.select(range(100)),
+            mode="eval",
+            max_tasks=10,
+            resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE,
+            allow_flipping=False,
+            **kwargs,
+        )
+
diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py
new file mode 100644
index 0000000..9ecd9d2
--- /dev/null
+++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py
@@ -0,0 +1,111 @@
+import platform
+import random
+from typing import Sequence, Union
+
+import gym
+import numpy as np
+from torch import nn
+import yaml
+import copy
+
+from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
+from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
+
+
+from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment
+
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig
+from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler
+from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask, ObjectNavTask, StretchNeckedObjectNavTask, StretchNeckedObjectNavTaskUpdateOrder
+from utils.stretch_utils.stretch_constants import STRETCH_ENV_ARGS
+from manipulathor_utils.debugger_util import ForkedPdb
+
+from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
+from manipulathor_baselines.stretch_object_nav_baselines.models.clip_resnet_ncamera_preprocess_mixin import \
+    ClipResNetPreprocessNCameraGRUActorCriticMixin
+from allenact.base_abstractions.preprocessor import Preprocessor
+from allenact.utils.experiment_utils import Builder
+
+
+class ProcTHORObjectNavClipResnet50RGBOnly(
+    ProcTHORObjectNavBaseConfig
+):
+    """Single-camera Object Navigation experiment configuration in ProcTHOR, using CLIP preprocessing."""
+
+    with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f:
+        OBJECT_TYPES=yaml.safe_load(f)
+
+    NOISE_LEVEL = 0
+    WHICH_AGENT = 'stretch' # 'locobot' 'default' 'stretch'
+
+    SENSORS = [
+        RGBSensorThor(
+            height=ProcTHORObjectNavBaseConfig.SCREEN_SIZE,
+            width=ProcTHORObjectNavBaseConfig.SCREEN_SIZE,
+            use_resnet_normalization=True,
+            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+            uuid="rgb_lowres",
+        ),
+        GoalObjectTypeThorSensor(
+            object_types=OBJECT_TYPES,
+        ),
+    ]
+
+    MAX_STEPS = 500
+    if platform.system() == "Darwin":
+        MAX_STEPS = 200
+        NUM_TRAIN_HOUSES = 100
+        SENSORS += [
+            RGBSensorThor(
+                height=ProcTHORObjectNavBaseConfig.SCREEN_SIZE,
+                width=ProcTHORObjectNavBaseConfig.SCREEN_SIZE,
+                use_resnet_normalization=True,
+                mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+                stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+                uuid="rgb_lowres_only_viz",
+            ),
+        ]
+
+    NUM_PROCESSES = 56
+
+    TASK_SAMPLER = ProcTHORObjectNavTaskSampler
+    TASK_TYPE = StretchObjectNavTask
+    ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment
+
+    # NUM_TRAIN_HOUSES = 500
+
+    CLIP_MODEL_TYPE = "RN50"
+
+
+    def __init__(self):
+        super().__init__() 
+
+        self.ENV_ARGS['p_randomize_material'] = 0.8
+        self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice
+        self.ENV_ARGS['renderInstanceSegmentation'] = False
+        self.ENV_ARGS['renderDepthImage'] = False        
+        self.ENV_ARGS['allow_flipping'] = False
+
+        self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin(
+            sensors=self.SENSORS,
+            clip_model_type=self.CLIP_MODEL_TYPE,
+            screen_size=self.SCREEN_SIZE,
+        )
+
+    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
+        return self.preprocessing_and_model.preprocessors()
+
+    
+    def create_model(self, **kwargs) -> nn.Module:
+        return self.preprocessing_and_model.create_model(
+            num_actions=len(self.TASK_TYPE.class_action_names()), **kwargs,
+            visualize=self.VISUALIZE
+        )
+
+    def get_agent(self):
+        return self.ENV_ARGS['agentMode']
+
+    @classmethod
+    def tag(cls):
+        return cls.TASK_TYPE.__name__ + '-RGB-SingleCam-ProcTHOR' + '-' +  cls.WHICH_AGENT
diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/obj_nav_2camera_robothor_narrow.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/obj_nav_2camera_robothor_narrow.py
new file mode 100644
index 0000000..763bddd
--- /dev/null
+++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/obj_nav_2camera_robothor_narrow.py
@@ -0,0 +1,82 @@
+import platform
+import random
+import yaml
+
+from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel
+from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
+
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \
+     ithorObjectNavClipResnet50RGBOnly2CameraWideFOV
+from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
+
+from utils.stretch_utils.stretch_object_nav_tasks import \
+    StretchObjectNavTaskSegmentationSuccess, StretchObjectNavTaskSegmentationSuccessActionFail, ExploreWiseObjectNavTask
+
+from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL
+
+
+
+class RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOV(
+    ithorObjectNavClipResnet50RGBOnly2CameraWideFOV
+):
+    """An Object Navigation experiment configuration in iThor with RGB
+    input."""
+
+    TRAIN_SCENES = ROBOTHOR_TRAIN
+    TEST_SCENES = ROBOTHOR_VAL
+    # OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ == 'robothor']))
+    # OBJECT_TYPES.sort()
+
+    with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f:
+        OBJECT_TYPES=yaml.safe_load(f)
+
+    random.shuffle(TRAIN_SCENES)
+    random.shuffle(TEST_SCENES)
+
+    # SENSORS = [
+    #     RGBSensorStretchIntel(
+    #         height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+    #         width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+    #         use_resnet_normalization=True,
+    #         mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+    #         stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+    #         uuid="rgb_lowres",
+    #     ),
+    #     RGBSensorStretchKinect(
+    #         height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+    #         width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+    #         use_resnet_normalization=True,
+    #         mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+    #         stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+    #         uuid="rgb_lowres_arm",
+    #     ),
+    #     GoalObjectTypeThorSensor(
+    #         object_types=OBJECT_TYPES,
+    #     ),
+    # ]
+
+    TASK_TYPE = ExploreWiseObjectNavTask
+    MAX_STEPS = 200
+
+    # if platform.system() == "Darwin":
+    #     SENSORS += [
+    #         RGBSensorStretchKinect(
+    #             height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+    #             width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+    #             use_resnet_normalization=True,
+    #             mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+    #             stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+    #             uuid="rgb_lowres_arm_only_viz",
+    #         ),
+    #         RGBSensorStretchIntel(
+    #             height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+    #             width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+    #             use_resnet_normalization=True,
+    #             mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+    #             stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+    #             uuid="rgb_lowres_only_viz",
+    #         ),
+    #     ]
+
+
+    
diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py
new file mode 100644
index 0000000..642adae
--- /dev/null
+++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py
@@ -0,0 +1,78 @@
+from utils.stretch_utils.stretch_constants import INTEL_CAMERA_WIDTH
+from manipulathor_utils.debugger_util import ForkedPdb
+
+from utils.stretch_utils.real_stretch_sensors import RealRGBSensorStretchIntel, RealRGBSensorStretchKinect
+from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
+
+from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
+
+from utils.stretch_utils.real_stretch_environment import StretchRealEnvironment
+from utils.stretch_utils.all_rooms_object_nav_task_sampler import RealStretchAllRoomsObjectNavTaskSampler
+from utils.stretch_utils.stretch_object_nav_tasks import RealStretchObjectNavTask
+
+
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \
+     ithorObjectNavClipResnet50RGBOnly2CameraWideFOV
+
+
+class RealStretchObjectNav(
+    ithorObjectNavClipResnet50RGBOnly2CameraWideFOV
+):
+    desired_screen_size = INTEL_CAMERA_WIDTH
+    SENSORS = [
+        RealRGBSensorStretchIntel(
+            height=desired_screen_size,
+            width=desired_screen_size,
+            use_resnet_normalization=True,
+            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+            uuid="rgb_lowres",
+        ),
+        RealRGBSensorStretchKinect(
+            height=desired_screen_size,
+            width=desired_screen_size,
+            use_resnet_normalization=True,
+            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+            uuid="rgb_lowres_arm",
+        ),
+        GoalObjectTypeThorSensor(
+            object_types=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.OBJECT_TYPES, # doesn't matter for now, it's apples all the way dow
+        ),
+    ]
+
+    # this should only ever be run on darwin anyway - sensors for visualization
+    # SENSORS += [
+    #     RealRGBSensorStretchIntel(
+    #         height=desired_screen_size,
+    #         width=desired_screen_size,
+    #         use_resnet_normalization=True,
+    #         mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+    #         stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+    #         uuid="rgb_lowres_only_viz",
+    #     ),
+    #     RealRGBSensorStretchKinect(
+    #         height=desired_screen_size,
+    #         width=desired_screen_size,
+    #         use_resnet_normalization=True,
+    #         mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+    #         stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+    #         uuid="rgb_lowres_arm_only_viz",
+    #     ),
+    # ]
+
+    MAX_STEPS = 60
+
+    TASK_SAMPLER = RealStretchAllRoomsObjectNavTaskSampler 
+    TASK_TYPE = RealStretchObjectNavTask #
+    ENVIRONMENT_TYPE = StretchRealEnvironment # account for the super init
+    VISUALIZE = True
+
+    NUM_PROCESSES = 1
+    NUMBER_OF_TEST_PROCESS = 0
+    NUMBER_OF_VALID_PROCESS = 0
+    VAL_DEVICES = []
+    TEST_DEVICES = []
+
+    TRAIN_SCENES = ['RealRobothor']
+    TEST_SCENES = ['RealRobothor']
diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_ithorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_ithorstyle.py
new file mode 100644
index 0000000..3066a0c
--- /dev/null
+++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_ithorstyle.py
@@ -0,0 +1,74 @@
+import platform
+import random
+import yaml
+
+from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
+from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
+
+from utils.stretch_utils.stretch_object_nav_tasks import StretchNeckedObjectNavTask
+
+
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \
+     ithorObjectNavClipResnet50RGBOnly2CameraWideFOV
+from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
+
+from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL
+
+
+
+class RoboTHORObjectNavTestiTHORstyleSingleCam(
+    ithorObjectNavClipResnet50RGBOnly2CameraWideFOV
+):
+    """An Object Navigation experiment configuration in iThor with RGB
+    input."""
+
+    TRAIN_SCENES = ROBOTHOR_TRAIN
+    TEST_SCENES = ROBOTHOR_VAL
+    # OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ == 'robothor']))
+    # OBJECT_TYPES.sort()
+
+    with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f:
+        OBJECT_TYPES=yaml.safe_load(f)
+
+    random.shuffle(TRAIN_SCENES)
+    random.shuffle(TEST_SCENES)
+
+    SENSORS = [
+        RGBSensorThor(
+            height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+            width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+            use_resnet_normalization=True,
+            mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+            stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+            uuid="rgb_lowres",
+        ),
+        GoalObjectTypeThorSensor(
+            object_types=OBJECT_TYPES,
+        ),
+    ]
+
+    if platform.system() == "Darwin":
+        SENSORS += [
+            RGBSensorThor(
+                height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+                width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE,
+                use_resnet_normalization=True,
+                mean=ClipResNetPreprocessor.CLIP_RGB_MEANS,
+                stdev=ClipResNetPreprocessor.CLIP_RGB_STDS,
+                uuid="rgb_lowres_only_viz",
+            ),
+        ]
+    
+    TASK_TYPE = StretchNeckedObjectNavTask
+    MAX_STEPS = 200
+
+    def __init__(self):
+        super().__init__() 
+        self.ENV_ARGS['agentMode']='default'
+        # maybe - include updated procthor commit ID? check if it makes a difference
+
+
+
+
+
+    
diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py
new file mode 100644
index 0000000..964e652
--- /dev/null
+++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py
@@ -0,0 +1,84 @@
+import datasets
+import torch
+
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_narrow \
+    import ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide import \
+    ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only import \
+    ProcTHORObjectNavClipResnet50RGBOnly
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide_stochastic import \
+    ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVNoisy
+
+from utils.procthor_utils.procthor_object_nav_task_samplers import RoboThorObjectNavTestTaskSampler
+from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask, ObjectNavTask, StretchNeckedObjectNavTask, StretchNeckedObjectNavTaskUpdateOrder
+
+from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment
+
+
+from manipulathor_utils.debugger_util import ForkedPdb
+
+
+# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly):
+# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV):
+# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV):
+class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVNoisy):
+
+    EVAL_TASKS = datasets.load_dataset(
+        f"allenai/robothor-objectnav-eval", use_auth_token=True
+    )
+
+    TEST_TASK_SAMPLER = RoboThorObjectNavTestTaskSampler
+    TEST_ON_VALIDATION = True
+    # TEST_GPU_IDS = list(range(torch.cuda.device_count())) # uncomment for vision server testing
+
+    @classmethod
+    def tag(cls):
+        return super().tag() + "-RoboTHOR-Test"
+    
+    @classmethod
+    def make_sampler_fn(cls, **kwargs):
+        from datetime import datetime
+
+        now = datetime.now()
+
+        exp_name_w_time = cls.__name__ + "_" + now.strftime("%m_%d_%Y_%H_%M_%S_%f")
+        if cls.VISUALIZE:
+            visualizers = [
+                viz(exp_name=exp_name_w_time) for viz in cls.POTENTIAL_VISUALIZERS
+            ]
+            kwargs["visualizers"] = visualizers
+        kwargs["exp_name"] = exp_name_w_time
+
+        if kwargs["sampler_mode"] == "train":
+            return cls.TASK_SAMPLER(**kwargs)
+        else:
+            return cls.TEST_TASK_SAMPLER(**kwargs)
+
+    def valid_task_sampler_args(self, **kwargs):
+        out = self._get_sampler_args_for_scene_split(
+            houses=self.EVAL_TASKS["validation"].shuffle(),
+            mode="eval",
+            max_tasks=10,
+            allow_flipping=False,
+            resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE,  # ignored
+            **kwargs,
+        )
+        return out
+
+    def test_task_sampler_args(self, **kwargs):
+        if self.TEST_ON_VALIDATION:
+            houses = self.EVAL_TASKS["validation"]
+        else:
+            houses = self.EVAL_TASKS["test"].shuffle()
+            # return self.valid_task_sampler_args(**kwargs)
+
+        out = self._get_sampler_args_for_scene_split(
+            houses=houses,
+            mode="eval",
+            max_tasks=None,
+            allow_flipping=False,
+            resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE,  # ignored
+            **kwargs,
+        )
+        return out
\ No newline at end of file
diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py
new file mode 100644
index 0000000..06910a9
--- /dev/null
+++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py
@@ -0,0 +1,143 @@
+from typing import Any, Dict, List, Optional, Sequence
+from typing_extensions import Literal
+import torch
+
+from manipulathor_baselines.stretch_object_nav_baselines.experiments.robothor.test_robothor_procthorstyle import \
+    ObjectNavRoboTHORTestProcTHORstyle
+from allenact.base_abstractions.experiment_config import MachineParams
+from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
+from allenact.utils.experiment_utils import evenly_distribute_count_into_bins
+from allenact.base_abstractions.sensor import (
+    ExpertActionSensor,
+    SensorSuite,
+)
+
+
+class ObjectNavRoboTHORTestProcTHORstyleDistrib(
+    ObjectNavRoboTHORTestProcTHORstyle
+):
+    NUM_PROCESSES = 56 # one them crashed for space?
+    NUM_TRAIN_PROCESSES = 64
+    NUM_VAL_PROCESSES = 2
+    NUM_TEST_PROCESSES = 60
+
+    TRAIN_DEVICES = (
+        tuple(range(torch.cuda.device_count()))
+        if torch.cuda.is_available()
+        else (torch.device("cpu"),)
+    )
+    VAL_DEVICES = (
+        (torch.cuda.device_count() - 1,)
+        if torch.cuda.is_available()
+        else (torch.device("cpu"),)
+    )
+    TEST_DEVICES = (
+        tuple(range(torch.cuda.device_count()))
+        if torch.cuda.is_available()
+        else (torch.device("cpu"),)
+    )
+    def __init__(
+            self,
+            distributed_nodes: int = 1,
+    ):
+        super().__init__()
+        self.distributed_nodes = distributed_nodes
+        # self.ENV_ARGS['continuousMode']=True
+        # self.ENV_ARGS["returnToStart"]= False # do either of these actually do something
+
+        # self.train_gpu_ids = tuple(range(torch.cuda.device_count())) # should I do this for everyone?, should i add val
+
+
+    # def machine_params(self, mode="train", **kwargs):
+    #     params = super().machine_params(mode, **kwargs)
+
+    #     if mode == "train":
+    #         params.devices = params.devices * self.distributed_nodes
+    #         params.nprocesses = params.nprocesses * self.distributed_nodes
+    #         params.sampler_devices = params.sampler_devices * self.distributed_nodes
+
+    #         if "machine_id" in kwargs:
+    #             machine_id = kwargs["machine_id"]
+    #             assert (
+    #                     0 <= machine_id < self.distributed_nodes
+    #             ), f"machine_id {machine_id} out of range [0, {self.distributed_nodes - 1}]"
+
+    #             local_worker_ids = list(
+    #                 range(
+    #                     len(self.train_gpu_ids) * machine_id,
+    #                     len(self.train_gpu_ids) * (machine_id + 1),
+    #                     )
+    #             )
+
+    #             params.set_local_worker_ids(local_worker_ids)
+
+    #         # Confirm we're setting up train params nicely:
+    #         print(
+    #             f"devices {params.devices}"
+    #             f"\nnprocesses {params.nprocesses}"
+    #             f"\nsampler_devices {params.sampler_devices}"
+    #             f"\nlocal_worker_ids {params.local_worker_ids}"
+    #         )
+    #     elif mode == "valid":
+    #         # Use all GPUs at their maximum capacity for training
+    #         # (you may run validation in a separate machine)
+    #         params.nprocesses = (0,)
+
+    #     return params 
+
+    def machine_params(self, mode: Literal["train", "valid", "test"], **kwargs):
+        devices: Sequence[torch.device]
+        nprocesses: int
+        if mode == "train":
+            devices = self.TRAIN_DEVICES * self.distributed_nodes
+            nprocesses = self.NUM_TRAIN_PROCESSES * self.distributed_nodes
+        elif mode == "valid":
+            devices = self.VAL_DEVICES
+            nprocesses = self.NUM_VAL_PROCESSES
+        elif mode == "test":
+            devices = self.TEST_DEVICES
+            nprocesses = self.NUM_TEST_PROCESSES
+
+        nprocesses = evenly_distribute_count_into_bins(
+            count=nprocesses, nbins=len(devices)
+        )
+
+        sensors = [*self.SENSORS]
+        if mode != "train":
+            sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)]
+
+        sensor_preprocessor_graph = (
+            SensorPreprocessorGraph(
+                source_observation_spaces=SensorSuite(sensors).observation_spaces,
+                preprocessors=self.preprocessors(),
+            )
+            if mode == "train"
+            or (
+                (isinstance(nprocesses, int) and nprocesses > 0)
+                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
+            )
+            else None
+        )
+
+        params = MachineParams(
+            nprocesses=nprocesses,
+            devices=devices,
+            sampler_devices=devices,
+            sensor_preprocessor_graph=sensor_preprocessor_graph,
+        )
+
+        # NOTE: for distributed setup
+        if mode == "train" and "machine_id" in kwargs:
+            machine_id = kwargs["machine_id"]
+            assert (
+                0 <= machine_id < self.distributed_nodes
+            ), f"machine_id {machine_id} out of range [0, {self.distributed_nodes} - 1]"
+            local_worker_ids = list(
+                range(
+                    len(self.TRAIN_DEVICES) * machine_id,
+                    len(self.TRAIN_DEVICES) * (machine_id + 1),
+                )
+            )
+            params.set_local_worker_ids(local_worker_ids)
+
+        return params
\ No newline at end of file
diff --git a/manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_objnav_ncamera_model.py
similarity index 92%
rename from manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py
rename to manipulathor_baselines/stretch_object_nav_baselines/models/clip_objnav_ncamera_model.py
index 4295c91..36637f8 100644
--- a/manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py
+++ b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_objnav_ncamera_model.py
@@ -6,6 +6,8 @@
 import platform
 from datetime import datetime
 from typing import Tuple, Optional, List, Dict, cast
+import platform
+from datetime import datetime
 
 import gym
 import torch
@@ -21,7 +23,6 @@
 from manipulathor_utils.debugger_util import ForkedPdb
 from utils.hacky_viz_utils import hacky_visualization
 
-
 class ResnetTensorNavNCameraActorCritic(VisualNavActorCritic):
     def __init__(
         # base params
@@ -43,6 +44,7 @@ def __init__(
         goal_dims: int = 32,
         resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32),
         combiner_hidden_out_dims: Tuple[int, int] = (128, 32),
+        visualize=False,
     ):
         super().__init__(
             action_space=action_space,
@@ -52,6 +54,7 @@ def __init__(
             beliefs_fusion=beliefs_fusion,
             auxiliary_uuids=auxiliary_uuids,
         )
+        self.visualize = visualize
 
         self.goal_visual_encoder = ResnetNCameraTensorGoalEncoder(  # type:ignore
             self.observation_space,
@@ -90,10 +93,15 @@ def is_blind(self) -> bool:
 
     def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor:
         return self.goal_visual_encoder(observations)
-    def forward(self, **kwargs): #TODO NOW remove
-        if platform.system() == "Darwin":
-            hacky_visualization(kwargs['observations'], base_directory_to_right_images=self.starting_time)
-        return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs)
+    # def forward(self, **kwargs): #TODO NOW remove
+    #     if platform.system() == "Darwin":
+    #         hacky_visualization(kwargs['observations'], base_directory_to_right_images=self.starting_time)
+    #     return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs)
+
+    # def forward(self, **kwargs): #TODO NOW remove
+    #     if self.visualize:
+    #         hacky_visualization(kwargs['observations'], base_directory_to_right_images=self.starting_time)
+    #     return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs)
 
 
 class ResnetNCameraTensorGoalEncoder(nn.Module):
diff --git a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py
new file mode 100644
index 0000000..3aa772e
--- /dev/null
+++ b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py
@@ -0,0 +1,85 @@
+from typing import Sequence, Union, Type
+
+import attr
+import gym
+import numpy as np
+import torch.nn as nn
+
+from allenact.base_abstractions.preprocessor import Preprocessor
+from allenact.base_abstractions.sensor import Sensor
+from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
+from allenact.utils.experiment_utils import Builder
+from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor
+from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor
+
+from allenact_plugins.navigation_plugin.objectnav.models import (
+    ResnetTensorNavActorCritic,
+)
+
+from manipulathor_baselines.stretch_object_nav_baselines.models.clip_objnav_ncamera_model import \
+    ResnetTensorNavNCameraActorCritic
+from manipulathor_utils.debugger_util import ForkedPdb
+
+
+@attr.s(kw_only=True)
+class ClipResNetPreprocessNCameraGRUActorCriticMixin:
+    sensors: Sequence[Sensor] = attr.ib()
+    clip_model_type: str = attr.ib()
+    screen_size: int = attr.ib()
+    pool: bool = attr.ib(default=False)
+
+    def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]:
+        preprocessors = []
+        self.resnet_preprocessor_uuids = []
+        for camera in [s for s in self.sensors if (isinstance(s, RGBSensor) or isinstance(s,DepthSensor))]:
+            if "_only_viz" not in camera.uuid:
+                if isinstance(camera, RGBSensor):
+                    assert (
+                        np.linalg.norm(
+                            np.array(camera._norm_means)
+                            - np.array(ClipResNetPreprocessor.CLIP_RGB_MEANS)
+                        )
+                        < 1e-5
+                    )
+                    assert (
+                        np.linalg.norm(
+                            np.array(camera._norm_sds)
+                            - np.array(ClipResNetPreprocessor.CLIP_RGB_STDS)
+                        )
+                        < 1e-5
+                    )
+                preprocessors.append(
+                    ClipResNetPreprocessor(
+                        rgb_input_uuid=camera.uuid,
+                        clip_model_type=self.clip_model_type,
+                        pool=self.pool,
+                        output_uuid=camera.uuid+"_clip_resnet",
+                    )
+                )
+                self.resnet_preprocessor_uuids.append(camera.uuid+"_clip_resnet")
+
+            else:
+                self.resnet_preprocessor_uuids.append(camera.uuid)
+
+        return preprocessors
+
+    def create_model(self, num_actions: int, visualize: bool, **kwargs) -> nn.Module:
+        goal_sensor_uuid = next(
+            (s.uuid for s in self.sensors if isinstance(s, GoalObjectTypeThorSensor)),
+            None,
+        )
+        self.resnet_preprocessor_uuids = [s for s in self.resnet_preprocessor_uuids if "_only_viz" not in s]
+
+        # display or assert sensor order here? possible source for sneaky failure if they're not the same 
+        # as in pretraining when loading weights.
+
+        return ResnetTensorNavNCameraActorCritic(
+            action_space=gym.spaces.Discrete(num_actions),
+            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
+            goal_sensor_uuid=goal_sensor_uuid,
+            resnet_preprocessor_uuids=self.resnet_preprocessor_uuids,
+            hidden_size=512,
+            goal_dims=32,
+            add_prev_actions=True,
+            visualize=visualize
+        )
\ No newline at end of file
diff --git a/scripts/run_same_commands_servers.py b/scripts/run_same_commands_servers.py
index 00377e9..9d1e810 100644
--- a/scripts/run_same_commands_servers.py
+++ b/scripts/run_same_commands_servers.py
@@ -267,11 +267,41 @@
 #  --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \
 #  --seed 10 --machine_id 0 --extra_tag armpointnav_with_ProcTHOR_RGBonly_after_fix_clip '
 
-command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib \
+# command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib \
+# --distributed_ip_and_port IP_ADR:6060 \
+#  --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \
+#  --seed 10 --machine_id 0 --extra_tag prcthor_obj_nav '
+
+# command_aws5 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOVDistrib/finetune_robothor/2022-05-27_21-00-03/exp_RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOVDistrib_finetune_robothor__stage_02__steps_000160201650.pt ~/; ./manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch/experiments/ithor/kiana_obj_nav_2camera_procthor_wide_distrib \
+# --distributed_ip_and_port IP_ADR:6060 \
+#  --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \
+#  --seed 10 --machine_id 0 --extra_tag kiana_objnav_outside_room_wide -c ~/exp_ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVDistrib_prcthor_obj_nav__stage_02__steps_000100123710.pt '
+
+# command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \
+# --distributed_ip_and_port IP_ADR:6060 \
+#  --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \
+#  --seed 10 --machine_id 0'
+
+command_aws5 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test/2022-06-08_21-59-41/exp_StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000014017504.pt  ~/; ./manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \
 --distributed_ip_and_port IP_ADR:6060 \
  --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \
- --seed 10 --machine_id 0 --extra_tag prcthor_obj_nav '
+ --seed 10 --machine_id 0 -c ~/exp_StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000014017504.pt '
 
+command_aws1 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/ExploreWiseObjectNavTask-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test/2022-07-18_15-49-34/exp_ExploreWiseObjectNavTask-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000273645744.pt ~/; \
+     ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \
+--distributed_ip_and_port IP_ADR:6060 \
+ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \
+ --seed 10 --machine_id 0 -c ~/exp_ExploreWiseObjectNavTask-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000273645744.pt '
+
+# command_aws1 = ' ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \
+# --distributed_ip_and_port IP_ADR:6060 \
+#  --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \
+#  --seed 10 --machine_id 0 '
+
+# command_aws1 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \
+# --distributed_ip_and_port IP_ADR:6060 \
+#  --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \
+#  --seed 10 --machine_id 0'
 
 # command = command_aws5
 # command = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/no_pointnav_stretch_all_rooms_distrib \
@@ -284,7 +314,7 @@
 server_sets = {
     'aws1':{
         'servers':[f'aws{i}' for i in range(1,5)],
-        'ip_adr': '18.237.24.199',
+        'ip_adr': '52.13.80.54',
         'command': command_aws1,
     },
     'aws5':  {
@@ -309,6 +339,7 @@ def parse_args():
     args.command  = server['command']
     args.command = args.command.replace('IP_ADR', ip_adr)
     args.command = args.command.replace('NUM_MACHINES', str(len(args.servers)))
+    args.command = args.command.replace('MAIN_SERVER', ip_adr) 
     return args
 
 def main(args):
diff --git a/scripts/update_tensorboards.py b/scripts/update_tensorboards.py
index cf8d390..2d234ea 100644
--- a/scripts/update_tensorboards.py
+++ b/scripts/update_tensorboards.py
@@ -3,7 +3,7 @@
 import time
 import psutil
 
-servers = [('aws1', 6006), ('aws5', 6007)]
+servers = [('aws1', 6006), ('aws5', 6007),('aws6', 6008)]#, ('aws7', 6009)]
 
 
 while(True):
diff --git a/utils/calculation_utils.py b/utils/calculation_utils.py
index 7b18731..f01693d 100644
--- a/utils/calculation_utils.py
+++ b/utils/calculation_utils.py
@@ -1,7 +1,9 @@
 import torch
-from allenact.embodiedai.mapping.mapping_utils.point_cloud_utils import depth_frame_to_world_space_xyz
+from allenact.embodiedai.mapping.mapping_utils.point_cloud_utils import depth_frame_to_world_space_xyz, camera_space_xyz_to_world_xyz
+from utils.noise_in_motion_util import squeeze_bool_mask
 import numpy as np
 
+
 def position_distance(s1, s2):
     position1 = s1["position"]
     position2 = s2["position"]
@@ -27,4 +29,34 @@ def calc_world_coordinates(min_xyz, camera_xyz, camera_rotation, camera_horizon,
             horizon=camera_horizon,
             fov=fov,
         )
-        return world_space_point_cloud
\ No newline at end of file
+        return world_space_point_cloud
+
+def calc_world_xyz_from_agent_relative(object_xyz_list, agent_xyz, agent_rotation, device):
+    # This might just be a duplication/convenience thing of the above
+    with torch.no_grad():
+        agent_xyz = (torch.from_numpy(agent_xyz).float().to(device))
+
+        object_xyz_matrix = np.ndarray([3,len(object_xyz_list)])
+        for i in range(len(object_xyz_list)):
+            x,y,z = object_xyz_list[i]['position']['x'],object_xyz_list[i]['position']['y'],object_xyz_list[i]['position']['z']
+            object_xyz_matrix[:,i] = [x,y,z]
+        object_xyz_matrix = torch.from_numpy(object_xyz_matrix).to(device)
+
+        world_xyz_matrix = camera_space_xyz_to_world_xyz(object_xyz_matrix, agent_xyz, agent_rotation, horizon=0 ).cpu().numpy()
+        world_positions = []
+        for i in range(len(object_xyz_list)):
+            x,y,z = world_xyz_matrix[:,i]
+            world_positions.append(dict(position=dict(x=x, y=y,z=z), rotation=dict(x=0,y=0, z=0)))
+
+        return world_positions
+
+def get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device):
+    mask = squeeze_bool_mask(mask)
+    depth_frame_masked = depth_frame_original.copy()
+    depth_frame_masked[~mask] = -1
+    depth_frame_masked[depth_frame_masked == 0] = -1 # TODO: what is this for? missing values?
+    world_space_point_cloud = calc_world_coordinates(min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device, depth_frame_masked)
+    valid_points = (world_space_point_cloud == world_space_point_cloud).sum(dim=-1) == 3
+    point_in_world = world_space_point_cloud[valid_points]
+    midpoint_agent_coord = point_in_world.mean(dim=0)
+    return midpoint_agent_coord
diff --git a/utils/procthor_utils/procthor_helper.py b/utils/procthor_utils/procthor_helper.py
index 505c95a..af8d402 100644
--- a/utils/procthor_utils/procthor_helper.py
+++ b/utils/procthor_utils/procthor_helper.py
@@ -63,7 +63,7 @@ def distance_to_object_id(
     def path_from_point_to_object_id(
         point: Dict[str, float], object_id: str, allowed_error: float
     ) -> Optional[List[Dict[str, float]]]:
-        event = env.step(
+        event = env.controller.step(
             action="GetShortestPath",
             objectId=object_id,
             position=point,
@@ -109,7 +109,7 @@ def retry_dist(position: Dict[str, float], object_id: str) -> float:
             else:
                 break
         if d < 0:
-            get_logger().warning(
+            get_logger().debug(
                 f"In house {house_name}, could not find a path from {position} to {object_id}"
                 f" with {allowed_error} error tolerance. Returning a distance of -1."
             )
diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py
index 07e6319..907aae6 100644
--- a/utils/procthor_utils/procthor_object_nav_task_samplers.py
+++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py
@@ -1,4 +1,4 @@
-import glob
+from typing import Any, Dict, List, Optional
 import platform
 import pickle
 import random
@@ -10,7 +10,6 @@
 import datasets
 import numpy as np
 from ai2thor.controller import Controller
-# from allenact.base_abstractions.misc import RLStepResult
 from allenact.base_abstractions.sensor import Sensor
 from allenact.base_abstractions.task import Task, TaskSampler
 from allenact.utils.cache_utils import DynamicDistanceCache
@@ -18,13 +17,12 @@
 from allenact.utils.system import get_logger
 from ithor_arm.ithor_arm_viz import LoggerVisualizer
 
-# from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
 
 from utils.procthor_utils.procthor_types import AgentPose, Vector3
-from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask
+from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask
 from utils.stretch_utils.stretch_constants import ADITIONAL_ARM_ARGS
 from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment
-from scripts.stretch_jupyter_helper import make_all_objects_unbreakable
+from scripts.stretch_jupyter_helper import get_relative_stretch_current_arm_state
 
 
 from manipulathor_utils.debugger_util import ForkedPdb
@@ -34,19 +32,19 @@ class ProcTHORObjectNavTaskSampler(TaskSampler):
 
     def __init__(
         self,
-        scenes: List[str],
         sensors: List[Sensor],
         max_steps: int,
         env_args: Dict[str, Any],
-        action_space: gym.Space,
         rewards_config: Dict,
-        objects: List[str],
         task_type: type,
-        scene_period: Optional[Union[int, str]] = None,
+        houses: datasets.Dataset,
+        house_inds: List[int],
+        target_object_types: List[str],
+        resample_same_scene_freq: int,
+        distance_type: str,
         max_tasks: Optional[int] = None,
         seed: Optional[int] = None,
         deterministic_cudnn: bool = False,
-        fixed_tasks: Optional[List[Dict[str, Any]]] = None,
         visualizers: List[LoggerVisualizer] = [],
         *args,
         **kwargs
@@ -56,26 +54,19 @@ def __init__(
         self.environment_type = env_args['environment_type']
         del env_args['environment_type']
         self.env_args = env_args
-        self.scenes = scenes
+        
         self.grid_size = 0.25
         self.env: Optional[StretchManipulaTHOREnvironment] = None
         self.sensors = sensors
         self.max_steps = max_steps
-        self._action_space = action_space
-        self.objects = objects
-
-        self.scene_counter: Optional[int] = None
-        self.scene_order: Optional[List[str]] = None
-        self.scene_id: Optional[int] = None
-        self.scene_period: Optional[
-            Union[str, int]
-        ] = scene_period  # default makes a random choice
-
+        self._action_space = gym.spaces.Discrete(len(self.TASK_TYPE.class_action_names()))
+        self.resample_same_scene_freq = resample_same_scene_freq
 
         self._last_sampled_task: Optional[Task] = None
-
+        
         self.seed: Optional[int] = None
-        self.set_seed(seed)
+        if seed is not None:
+            self.set_seed(seed)
 
         if deterministic_cudnn:
             set_deterministic_cudnn()
@@ -83,58 +74,40 @@ def __init__(
         
         self.visualizers = visualizers
         self.sampler_mode = kwargs["sampler_mode"]
-        self.cap_training = kwargs["cap_training"]
-
-        # if self.env_args['scene'] == 'Procedural':
-        self.house_dataset = datasets.load_dataset("allenai/houses", use_auth_token=True)
-
-        RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = (
-            -1
-        )  # Should be > 0 if `ADVANCE_SCENE_ROLLOUT_PERIOD` is `None`
-        # RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 100
-        # if platform.system() == "Darwin":
-        #     RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 1
-
-        RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 50
-        self.resample_same_scene_freq = -1 #RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN
-        if platform.system() == "Darwin":
-            self.resample_same_scene_freq = 1#TODO NOW WHAT SHOULD WE DO? kiana for now
-        # assert self.resample_same_scene_freq == 1 # IMPORTANT IT WON"T WORK FOR 100
+        if self.sampler_mode != "train":
+            self.rewards_config['shaping_weight'] = 0.0
+            self.rewards_config['exploration_reward'] = 0.0
+
         self.episode_index = 0
+        self.houses = houses
+        self.house_inds = house_inds
         self.house_inds_index = 0
-        self.reachable_positions_map = {}
-        self.house_dataset = self.house_dataset['train'] #TODO separately for test and val
-        # self.house_dataset = self.house_dataset.select(range(10))
-        # ForkedPdb().set_trace()
-
-        # ROOMS_TO_USE = [int(scene.replace('ProcTHOR', '')) for scene in self.scenes]
-        # ROOMS_TO_USE = [x for x in range(self.house_dataset.num_rows)]
-        ROOMS_TO_USE = [int(scene.replace('ProcTHOR', '')) for scene in self.scenes]
-
-        # self.args_house_inds = [x for x in range(self.house_dataset.num_rows)]
-        self.args_house_inds = ROOMS_TO_USE
             
         self.valid_rotations = [0,90,180,270]
-        self.distance_type = "l2"
+        self.distance_type = distance_type
         self.distance_cache = DynamicDistanceCache(rounding=1)
-        self.target_object_types_set = set(self.objects)
+        self.target_object_types_set = set(target_object_types)
         self.obj_type_counter = Counter(
-            {obj_type: 0 for obj_type in self.objects}
+            {obj_type: 0 for obj_type in target_object_types}
         )
         self.reachable_positions_map: Dict[int, Vector3] = dict()
         self.objects_in_scene_map: Dict[str, List[str]] = dict()
         self.visible_objects_cache = dict()
-        self.max_tasks = max_tasks if max_tasks is not None else np.Inf # stop when I tell you to stop
+        self.max_tasks = max_tasks 
         self.reset_tasks = self.max_tasks
-        self.max_vis_points=6
-        self.max_agent_positions=6
+        
+        self.max_vis_points = 6
+        self.max_agent_positions = 6         
+        self.p_greedy_target_object = 0.8
+        self.min_raycast_distance = 1.5
+
+        self.success_distance = 1.0
+
         self.reset()
 
 
     def set_seed(self, seed: int):
-        self.seed = seed
-        if seed is not None:
-            set_seed(seed)
+        set_seed(seed)
         
     def _create_environment(self, **kwargs) -> StretchManipulaTHOREnvironment:
         env = self.environment_type(
@@ -165,8 +138,8 @@ def last_sampled_task(self) -> Optional[StretchObjectNavTask]:
         return self._last_sampled_task
 
     def close(self) -> None:
-        if self.env.controller is not None:
-            self.env.controller.stop()
+        if self.env is not None:
+            self.env.stop()
 
     @property
     def all_observation_spaces_equal(self) -> bool:
@@ -190,11 +163,16 @@ def get_nearest_positions(self, world_position: Vector3) -> List[Vector3]:
         ]
 
     def get_nearest_agent_height(self, y_coordinate: float) -> float:
-        return 1.5759992 # from default agent - is guess. TODO check stretch
+        if self.env_args['agentMode'] == 'locobot':
+            return 0.8697997
+        elif self.env_args['agentMode'] == 'stretch':
+            return 1.27 # to intel camera, measured physical
+        else:
+            return 1.5759992 # from default agent - is guess. TODO check stretch
 
     @property
     def house_index(self) -> int:
-        return self.args_house_inds[self.house_inds_index]
+        return self.house_inds[self.house_inds_index]
 
     def is_object_visible(self, object_id: str) -> bool:
         """Return True if object_id is visible without any interaction in the scene.
@@ -237,7 +215,7 @@ def is_object_visible(self, object_id: str) -> bool:
                 if (
                     event.metadata["lastActionSuccess"]
                     and hit["objectId"] == object_id
-                    and hit["hitDistance"] < self.env_args['visibilityDistance']
+                    and hit["hitDistance"] < np.min([self.env_args['visibilityDistance'],self.min_raycast_distance])
                 ):
                     self.visible_objects_cache[self.house_index][object_id] = True
                     return True
@@ -267,10 +245,11 @@ def sample_target_object_ids(self) -> Tuple[str, List[str]]:
         Objects returned will all be of the same objectType. Only considers visible
         objects in the house.
         """
-        if random.random() < 0.8: # p_greedy_target_object
+        if random.random() < self.p_greedy_target_object:
             for obj_type, count in reversed(self.obj_type_counter.most_common()):
                 instances_of_type = self.target_objects_in_scene.get(obj_type, [])
 
+
                 # NOTE: object type doesn't appear in the scene.
                 if not instances_of_type:
                     continue
@@ -319,9 +298,11 @@ def increment_scene(self) -> bool:
         self.increment_scene_index()
 
         # self.env.controller.step(action="DestroyHouse", raise_for_failure=True)
-        self.env.controller.reset()
+        # self.env.controller.reset()
+        self.env.reset(scene_name='Procedural')
+        
         self.env.list_of_actions_so_far = []
-        self.house_entry = self.house_dataset[self.house_index]
+        self.house_entry = self.houses[self.house_index]
         self.house = pickle.loads(self.house_entry["house"])
 
         if platform.system() == "Darwin": #TODO remove
@@ -330,20 +311,15 @@ def increment_scene(self) -> bool:
         self.env.controller.step(
             action="CreateHouse", house=self.house, raise_for_failure=True
         )
-        self.env.controller.step("ResetObjectFilter")
-
-        #TODO dude this is ugly!
-        pose = self.house["metadata"]["agent"].copy()
-        event = self.env.controller.step(action="TeleportFull", **pose)
-        if not event:
-            # get_logger().warning(f"Initial teleport failing in {self.house_index}.") # clear logger noise
-            return False #TODO this can mess FPS
-        self.env.controller.step(action="MakeAllObjectsMoveable")
-        self.env.controller.step(action="MakeObjectsStaticKinematicMassThreshold")
-        make_all_objects_unbreakable(self.env.controller)
-
-        # NOTE: Set reachable positions
+        
         if self.house_index not in self.reachable_positions_map:
+            pose = self.house["metadata"]["agent"].copy()
+            if self.env_args['agentMode'] == 'locobot':
+                del pose["standing"]
+            event = self.env.controller.step(action="TeleportFull", **pose)
+            if not event:
+                get_logger().warning(f"Initial teleport failing in {self.house_index}.")
+                return False
             rp_event = self.env.controller.step(action="GetReachablePositions")
             if not rp_event:
                 # NOTE: Skip scenes where GetReachablePositions fails
@@ -353,10 +329,17 @@ def increment_scene(self) -> bool:
                 return False
             reachable_positions = rp_event.metadata["actionReturn"]
             self.reachable_positions_map[self.house_index] = reachable_positions
+        
+        # verify the stretch arm is stowed
+        if self.env_args['agentMode'] == 'stretch':
+            arm_pos = get_relative_stretch_current_arm_state(self.env.controller)
+            assert abs(sum(arm_pos.values())) < 0.001
+
         return True
+        
 
     def increment_scene_index(self):
-        self.house_inds_index = (self.house_inds_index + 1) % len(self.args_house_inds)
+        self.house_inds_index = (self.house_inds_index + 1) % len(self.house_inds)
 
     def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObjectNavTask]:
         if self.env is None:
@@ -385,8 +368,9 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject
                 while not self.increment_scene():
                     pass
 
-        if random.random() < 0.8: #TODO
+        if random.random() < self.env_args['p_randomize_material']:
             self.env.controller.step(action="RandomizeMaterials", raise_for_failure=True)
+            self.env.controller.step(action="RandomizeLighting", synchronized=True, raise_for_failure=True)
         else:
             self.env.controller.step(action="ResetMaterials", raise_for_failure=True)
 
@@ -397,21 +381,26 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject
         )
 
         # NOTE: Set agent pose
-        starting_pose = AgentPose(
-            position=random.choice(self.reachable_positions),
-            rotation=Vector3(x=0, y=random.choice(self.valid_rotations), z=0),
-            horizon=0,
-            standing=True,
-        )
-        event = self.env.controller.step(action="TeleportFull", **starting_pose)
-        # if not event:
-        #     get_logger().warning(
-        #         f"Teleport failing in {self.house_index} at {starting_pose}"
-        #     )
+        event = None
+        attempts = 0
+        while not event:
+            attempts+=1
+            starting_pose = AgentPose(
+                position=random.choice(self.reachable_positions),
+                rotation=Vector3(x=0, y=random.choice(self.valid_rotations), z=0),
+                horizon=0,
+            )
+            if self.env_args['agentMode'] != 'locobot':
+                starting_pose['standing']=True
+                starting_pose['horizon'] = self.env_args['horizon_init']
+            event = self.env.controller.step(action="TeleportFull", **starting_pose)
+            if attempts > 10:
+                get_logger().error(f"Teleport failed {attempts-1} times in house {self.house_index} - something may be wrong")
+            
 
         self.episode_index += 1
         self.max_tasks -= 1
-
+        
         self._last_sampled_task = self.TASK_TYPE(
             env=self.env,
             sensors=self.sensors,
@@ -431,6 +420,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject
                 "object_type": target_object_type,
                 "starting_pose": starting_pose,
                 "mirrored": self.env_args['allow_flipping'] and random.random() > 0.5,
+                'success_distance': self.success_distance
             },
         )
         return self._last_sampled_task
@@ -439,3 +429,88 @@ def reset(self):
         self.episode_index = 0
         self.max_tasks = self.reset_tasks
         self.house_inds_index = 0
+
+
+
+class RoboThorObjectNavTestTaskSampler(ProcTHORObjectNavTaskSampler):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.last_scene = None
+
+    def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObjectNavTask]:
+        # ForkedPdb().set_trace()
+        while True:
+            # NOTE: Stopping condition
+            if self.env is None:
+                self.env = self._create_environment()
+        
+            # NOTE: Stopping condition
+            if self.max_tasks <= 0:
+                return None
+
+
+            epidx = self.house_inds[self.max_tasks - 1]
+            ep = self.houses[epidx]
+            # ForkedPdb().set_trace()
+
+            if self.last_scene is None or self.last_scene != ep["scene"]:
+                self.last_scene = ep["scene"]
+                # self.env.controller.reset(ep["scene"])
+                self.env.reset(scene_name=ep["scene"])
+
+            # NOTE: not using ep["targetObjectIds"] due to floating points with
+            # target objects moving.
+            event = self.env.controller.step(action="ResetObjectFilter")
+            target_object_ids = [
+                obj["objectId"]
+                for obj in event.metadata["objects"]
+                if obj["objectType"] == ep["targetObjectType"]
+            ]
+            self.env.controller.step(
+                action="SetObjectFilter",
+                objectIds=target_object_ids,
+                raise_for_failure=True,
+            )
+            if self.env_args['agentMode'] != 'locobot':
+                ep["agentPose"]["standing"] = True
+                ep["agentPose"]["horizon"] = self.env_args['horizon_init'] # reset for stretch agent
+            event = self.env.controller.step(action="TeleportFull", **ep["agentPose"])
+            if not event:
+                # NOTE: Skip scenes where TeleportFull fails.
+                # This is added from a bug in the RoboTHOR eval dataset.
+                get_logger().error(
+                    f"Teleport failing {event.metadata['actionReturn']} in {epidx}."
+                )
+                self.max_tasks -= 1
+                self.episode_index += 1
+                continue
+
+            difficulty = {"difficulty": ep["difficulty"]} if "difficulty" in ep else {}
+            self._last_sampled_task = self.TASK_TYPE(
+                # visualize=self.episode_index in self.epids_to_visualize,
+                env=self.env,
+                sensors=self.sensors,
+                max_steps=self.max_steps,
+                reward_config=self.rewards_config,
+                distance_type=self.distance_type,
+                distance_cache=self.distance_cache,
+                visualizers=self.visualizers,
+                task_info={
+                    "mode": self.env_args['agentMode'],
+                    "scene_name": ep["scene"],
+                    "target_object_ids": target_object_ids,
+                    "object_type": ep["targetObjectType"],
+                    "starting_pose": ep["agentPose"],
+                    "mirrored": False,
+                    "id": f"{ep['scene']}__global{epidx}__{ep['targetObjectType']}",
+                    'success_distance': self.success_distance,
+                    **difficulty,
+                },
+            )
+
+            self.max_tasks -= 1
+            self.episode_index += 1
+
+            return self._last_sampled_task
+
+
diff --git a/utils/procthor_utils/all_rooms_object_nav_task_sampler.py b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py
similarity index 73%
rename from utils/procthor_utils/all_rooms_object_nav_task_sampler.py
rename to utils/stretch_utils/all_rooms_object_nav_task_sampler.py
index 76dab88..cc467cf 100644
--- a/utils/procthor_utils/all_rooms_object_nav_task_sampler.py
+++ b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py
@@ -18,8 +18,14 @@
 from scripts.dataset_generation.find_categories_to_use import ROBOTHOR_TRAIN, KITCHEN_TRAIN, KITCHEN_TEST, KITCHEN_VAL
 from utils.manipulathor_data_loader_utils import get_random_query_image, get_random_query_feature_from_img_adr
 from scripts.stretch_jupyter_helper import get_reachable_positions, transport_wrapper
+from utils.stretch_utils.stretch_object_nav_tasks import ObjectNavTask
 from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer
 
+from utils.stretch_utils.real_stretch_environment import StretchRealEnvironment
+from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment
+
+
+
 
 class AllRoomsObjectNavTaskSampler(TaskSampler):
 
@@ -136,7 +142,8 @@ def __init__(
         self.reset()
         self.visualizers = visualizers
         self.sampler_mode = kwargs["sampler_mode"]
-        self.cap_training = kwargs["cap_training"]
+
+        self.success_distance = 1.0
 
         possible_initial_locations = (
             "datasets/apnd-dataset/valid_agent_initial_locations.json"
@@ -272,6 +279,13 @@ def put_object_in_location(location_point):
             # if not event_transport_init_obj.metadata['lastActionSuccess']:
                 # print('scene', scene_name, 'init', init_object['object_id'])
                 # print('ERROR: one of transfers fail', 'init', event_transport_init_obj.metadata['errorMessage'])
+
+            if random.random() < 0.8:
+                # self.env.controller.step(action="RandomizeMaterials", raise_for_failure=True)
+                self.env.controller.step(action="RandomizeLighting", synchronized=True, raise_for_failure=True)
+            # else:
+                # self.env.controller.step(action="ResetMaterials", raise_for_failure=True)
+
         else:
             #Object is already at the location it should be
             target_obj_type = data_point['target_obj_type']
@@ -289,7 +303,7 @@ def get_full_object_info(env, object_type):
 
             init_object = get_full_object_info(self.env, target_obj_type)
 
-        agent_state["cameraHorizon"] = 0 # 0 for stretch, 20 for other manipulathor agent
+        agent_state["cameraHorizon"] = self.env_args['horizon_init'] # 0 for stretch, 20 for other manipulathor agent
         event = this_controller.step(
             dict(
                 action="TeleportFull",
@@ -323,7 +337,8 @@ def get_full_object_info(env, object_type):
             'mode': self.env_args['agentMode'],
             'house_name': scene_name,
             'scene_name': scene_name,
-            'mirrored': False #self.env_args['allow_flipping'] and random.random() > 0.5, # not including for non-procthor
+            'mirrored': False, #self.env_args['allow_flipping'] and random.random() > 0.5, # not including for non-procthor
+            'success_distance': self.success_distance
         }
 
 
@@ -340,6 +355,7 @@ def get_full_object_info(env, object_type):
             reward_config=self.rewards_config,
             distance_type=self.distance_type,
             distance_cache=self.distance_cache,
+            additional_visualize=False # not implemented for non-procthor
         )
 
         return self._last_sampled_task
@@ -391,7 +407,7 @@ def get_source_target_indices(self):
                 "name": "agent",
                 "position": dict(x=agent_pose['x'], y=agent_pose['y'], z=agent_pose['z']),
                 "rotation": dict(x=0, y=agent_pose['rotation'], z=0),
-                "cameraHorizon": agent_pose['horizon'],
+                "cameraHorizon": self.env_args['horizon_init'], #agent_pose['horizon'],
                 "isStanding": True,
             }
 
@@ -408,7 +424,7 @@ def get_source_target_indices(self):
                 "name": "agent",
                 "position": dict(x=agent_pose['x'], y=agent_pose['y'], z=agent_pose['z']),
                 "rotation": dict(x=0, y=agent_pose['rotation'], z=0),
-                "cameraHorizon": agent_pose['horizon'],
+                "cameraHorizon": self.env_args['horizon_init'], #agent_pose['horizon'],
                 "isStanding": True,
             }
 
@@ -418,3 +434,106 @@ def get_source_target_indices(self):
 
         return data_point
 
+
+class RealStretchAllRoomsObjectNavTaskSampler(AllRoomsObjectNavTaskSampler):
+
+    def _create_environment(self, **kwargs) -> ManipulaTHOREnvironment:
+        env = StretchRealEnvironment(
+            make_agents_visible=False,
+            object_open_speed=0.05,
+            env_args=self.env_args,
+        )
+        return env
+    
+    def __init__(self, **kwargs) -> None:
+
+        super().__init__(**kwargs)
+
+        self.all_possible_points = {}
+
+        # This can be done more elegantly from datasets/objects/robothor_habitat2022.yaml and exclude toilet
+        self.possible_real_objects = [
+            {'object_id': 'AlarmClock|1|1|1', 'object_type':"AlarmClock"},
+            {'object_id': 'Apple|1|1|1', 'object_type':"Apple"},
+            {'object_id': 'BaseballBat|1|1|1', 'object_type':"BaseballBat"},
+            {'object_id': 'BasketBall|1|1|1', 'object_type':"BasketBall"},
+            {'object_id': 'Bed|1|1|1', 'object_type':"Bed"},
+            {'object_id': 'Bowl|1|1|1', 'object_type':"Bowl"},
+            {'object_id': 'Chair|1|1|1', 'object_type':"Chair"},
+            {'object_id': 'GarbageCan|1|1|1', 'object_type':"GarbageCan"},
+            {'object_id': 'HousePlant|1|1|1', 'object_type':"HousePlant"},
+            {'object_id': 'Laptop|1|1|1', 'object_type':"Laptop"},
+            {'object_id': 'Mug|1|1|1', 'object_type':"Mug"},
+            {'object_id': 'Sofa|1|1|1', 'object_type':"Sofa"},
+            {'object_id': 'SprayBottle|1|1|1', 'object_type':"SprayBottle"},
+            {'object_id': 'Television|1|1|1', 'object_type':"Television"},
+            {'object_id': 'Vase|1|1|1', 'object_type':"Vase"}
+        ]
+
+        self.preset_easyish_tasks = [
+            {'object_id': 'Apple|1|1|1', 'object_type':"Apple"},
+            {'object_id': 'BaseballBat|1|1|1', 'object_type':"BaseballBat"},
+            {'object_id': 'BasketBall|1|1|1', 'object_type':"BasketBall"},
+            {'object_id': 'Bowl|1|1|1', 'object_type':"Bowl"},
+            {'object_id': 'Chair|1|1|1', 'object_type':"Chair"},
+            {'object_id': 'HousePlant|1|1|1', 'object_type':"HousePlant"},
+            {'object_id': 'Mug|1|1|1', 'object_type':"Mug"},
+            {'object_id': 'SprayBottle|1|1|1', 'object_type':"SprayBottle"},
+            {'object_id': 'Television|1|1|1', 'object_type':"Television"},
+            {'object_id': 'Vase|1|1|1', 'object_type':"Vase"}
+        ]
+
+        self.real_object_index = 0
+
+        if self.sampler_mode == "test":
+            self.max_tasks = self.reset_tasks = 200
+
+    def next_task(
+            self, force_advance_scene: bool = False
+    ) -> Optional[ObjectNavTask]:
+
+        if self.max_tasks is not None and self.max_tasks <= 0:
+            return None
+
+        if self.sampler_mode != "train" and self.length <= 0:
+            return None
+
+        if self.env is None:
+            self.env = self._create_environment()
+        self.env.reset(scene_name='RealRobothor')
+        
+        # skip_object = True
+        # while skip_object:
+        #     target_object = random.choice(self.possible_real_objects)
+        #     print('I am now seeking a', target_object['object_type'], '. Accept by setting skip_object=False')
+        #     ForkedPdb().set_trace()
+        
+        target_object = self.preset_easyish_tasks[self.real_object_index]
+        print('I am now seeking a', target_object['object_type'], '. Continue when ready.')
+        ForkedPdb().set_trace()
+
+        task_info = {
+            'target_object_ids': [target_object['object_id']],
+            'object_type': target_object['object_type'],
+            'starting_pose': {},
+            'mode': self.env_args['agentMode'],
+            'house_name': 'RealRobothor',
+            'scene_name': 'RealRobothor',
+            'mirrored': False # yeah no
+        }
+
+        self._last_sampled_task = self.TASK_TYPE(
+            env=self.env,
+            sensors=self.sensors,
+            task_info=task_info,
+            max_steps=self.max_steps,
+            action_space=self._action_space,
+            visualizers=self.visualizers,
+            reward_config=self.rewards_config,
+            distance_type="real_world",
+            distance_cache=self.distance_cache,
+            additional_visualize=False # not implemented for non-procthor
+        )
+        self.real_object_index = self.real_object_index + 1
+
+        return self._last_sampled_task
\ No newline at end of file
diff --git a/utils/stretch_utils/real_stretch_environment.py b/utils/stretch_utils/real_stretch_environment.py
index c7dd705..1d06982 100644
--- a/utils/stretch_utils/real_stretch_environment.py
+++ b/utils/stretch_utils/real_stretch_environment.py
@@ -39,7 +39,7 @@ def check_controller_version(self):
 
 
     def create_controller(self):
-        controller = ai2thor.robot_controller.Controller(host="stretch2.corp.ai2", port=9000, width=1280, height=720)
+        controller = ai2thor.robot_controller.Controller(host="stretch2", port=9000, width=1280, height=720)
         return controller
 
     def start(
diff --git a/utils/stretch_utils/real_stretch_sensors.py b/utils/stretch_utils/real_stretch_sensors.py
index 09ad06d..c9a1410 100644
--- a/utils/stretch_utils/real_stretch_sensors.py
+++ b/utils/stretch_utils/real_stretch_sensors.py
@@ -13,16 +13,17 @@
 import torch
 from PIL import Image
 import matplotlib.pyplot as plt
-from allenact.base_abstractions.sensor import DepthSensor, Sensor, RGBSensor
+from allenact.base_abstractions.sensor import Sensor #, DepthSensor, RGBSensor
+
 from allenact.base_abstractions.task import Task
 from allenact.utils.misc_utils import prepare_locals_for_super
 from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
 from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor
-from detectron2.config import get_cfg
-from detectron2.data import MetadataCatalog
-from detectron2.engine import DefaultPredictor
-from detectron2.model_zoo import model_zoo
-from detectron2.utils.visualizer import Visualizer
+# from detectron2.config import get_cfg
+# from detectron2.data import MetadataCatalog
+# from detectron2.engine import DefaultPredictor
+# from detectron2.model_zoo import model_zoo
+# from detectron2.utils.visualizer import Visualizer
 import matplotlib.pyplot as plt
 from torch.distributions.utils import lazy_property
 
@@ -38,7 +39,10 @@
 from utils.calculation_utils import calc_world_coordinates
 from utils.detection_translator_util import THOR2COCO
 from utils.noise_in_motion_util import squeeze_bool_mask
-from utils.real_stretch_utils import get_binary_mask_of_arm, get_mid_point_of_object_from_depth_and_mask
+
+from scipy.interpolate import UnivariateSpline
+
+# from utils.real_stretch_utils import get_binary_mask_of_arm, get_mid_point_of_object_from_depth_and_mask
 from utils.stretch_utils.stretch_constants import INTEL_RESIZED_H, INTEL_RESIZED_W, KINECT_REAL_W, KINECT_REAL_H, \
     MAX_INTEL_DEPTH, MIN_INTEL_DEPTH, MAX_KINECT_DEPTH, MIN_KINECT_DEPTH, INTEL_FOV_W, INTEL_FOV_H, KINECT_FOV_W, \
     KINECT_FOV_H
@@ -85,7 +89,7 @@ def frame_from_env(self, env: IThorEnvironment, task: Optional[Task]) -> np.ndar
         return (depth)
 
 def normalize_real_intel_image(image,final_size=224):
-    image = cv2.rotate(image, cv2.cv2.ROTATE_90_CLOCKWISE)
+    image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
     # if len(image.shape) == 3:
     #     image = image[:,:,::-1]
     #     image = cv2.flip(image,1)
@@ -115,6 +119,22 @@ def normalize_real_kinect_image(frame,size=224):
     w,h = (int(current_size[0] * ratio), int(current_size[1] * ratio))
 
     frame = cv2.resize(frame,(h,w))
+
+    spl = UnivariateSpline([0, 64, 128, 192, 255], [0, 30, 80, 160, 255])
+    color_channel_decr = spl(range(256))
+
+    c_r, c_g, c_b = cv2.split(frame)
+    c_r = cv2.LUT(c_r, color_channel_decr).astype('uint8')
+    frame_mod = cv2.merge((c_r,c_g,c_b))
+    hsv=cv2.cvtColor(frame_mod,cv2.COLOR_RGB2HSV).astype('int64')
+    hsv[:,:,2] = hsv[:,:,2] + 25
+    hsv = np.clip(hsv,0,255).astype('uint8')
+    frame_bright = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
+
+    sidebyside=np.hstack((frame,frame_mod,frame_bright))
+    frame=frame_bright
+
+    # ForkedPdb().set_trace()
     if len(frame.shape) == 3:
         result = np.zeros((size, size, frame.shape[2]))
     elif len(frame.shape) == 2:
@@ -129,376 +149,384 @@ def normalize_real_kinect_image(frame,size=224):
         result[result < MIN_KINECT_DEPTH] = 0
     return result.astype(frame.dtype)
 
-class StretchDetectronObjectMask(Sensor):
-    def __init__(self, type: str,noise, source_camera, uuid: str = "object_mask", **kwargs: Any):
-        observation_space = gym.spaces.Box(
-            low=0, high=1, shape=(1,), dtype=np.float32
-        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
-        self.type = type
-        uuid = '{}_{}'.format(uuid, type)
-        self.noise = noise
-        self.cache = {'object_name': '', 'image':None, 'mask':None}
-        super().__init__(**prepare_locals_for_super(locals()))
-        self.source_camera = source_camera
-
-    @lazy_property
-    def predictor(self):
-        self.cfg = get_cfg()
-        if platform.system() == "Darwin":
-            self.cfg.MODEL.DEVICE = "cpu"
-        # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
-        self.cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) # is this the model we want? Yes it is a pretty good model
-        self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05  # set threshold for this model #TODO good number?
-        # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
-        self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
-        # self.predictor = DefaultPredictor(self.cfg)
-        self.class_labels = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]).thing_classes
-        return DefaultPredictor(self.cfg)
 
+""" 
+cv2.imshow('window2',cv2.cvtColor(sidebyside,cv2.COLOR_RGB2BGR))
+cv2.waitKey(0)
+cv2.imshow('window',cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR))
+cv2.waitKey(0) 
+
+"""
+# class StretchDetectronObjectMask(Sensor):
+#     def __init__(self, type: str,noise, source_camera, uuid: str = "object_mask", **kwargs: Any):
+#         observation_space = gym.spaces.Box(
+#             low=0, high=1, shape=(1,), dtype=np.float32
+#         )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
+#         self.type = type
+#         uuid = '{}_{}'.format(uuid, type)
+#         self.noise = noise
+#         self.cache = {'object_name': '', 'image':None, 'mask':None}
+#         super().__init__(**prepare_locals_for_super(locals()))
+#         self.source_camera = source_camera
+
+#     @lazy_property
+#     def predictor(self):
+#         self.cfg = get_cfg()
+#         if platform.system() == "Darwin":
+#             self.cfg.MODEL.DEVICE = "cpu"
+#         # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
+#         self.cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) # is this the model we want? Yes it is a pretty good model
+#         self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05  # set threshold for this model #TODO good number?
+#         # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
+#         self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
+#         # self.predictor = DefaultPredictor(self.cfg)
+#         self.class_labels = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]).thing_classes
+#         return DefaultPredictor(self.cfg)
+
+
+#     def get_observation(
+#             self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
+#     ) -> Any:
+
+#         if self.type == 'source':
+#             info_to_search = 'source_object_id'
+#         elif self.type == 'destination':
+#             info_to_search = 'goal_object_id'
+#             #TODO remove
+#             mask = torch.zeros((224,224,1))
+#             return mask
+
+#         original_im = self.source_camera.get_observation(env, task, *args, **kwargs)
+
+#         if info_to_search == self.cache['object_name']:
+#             if np.all(self.cache['image'] == original_im):
+#                 return self.cache['mask']
+#         else:
+#             self.cache = {}
+
+
+#         # TODO VERYYYYYY IMPORTANT
+#         im = original_im[:,:,::-1]
+#         # the detection requires BGR???
+
+#         outputs = self.predictor(im * 255.)
+
+#         #  remove
+#         # outputs = self.predictor(im * 255.)
+#         # cv2.imwrite('my_image.png',im * 255.)
+
+#         #
+#         def visualize_detections(im, outputs ):
+#             v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]), scale=1.2)
+#             out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
+#             dir = 'experiment_output/visualization_predictions'
+#             timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f.png")
+#             os.makedirs(dir, exist_ok=True)
+#             print('saving all detections in ', os.path.join(dir, timestamp))
+#             plt.imsave(os.path.join(dir, timestamp), out.get_image()[:, :, ::-1])
+
+
+
+#         category = task.task_info[info_to_search].split('|')[0]
+#         assert category in THOR2COCO
+#         class_ind_to_look_for = self.class_labels.index(THOR2COCO[category])
+#         all_predicted_labels = outputs['instances'].pred_classes
+#         all_predicted_bbox = outputs['instances'].pred_boxes #TODO switch to segmentation
+
+
+
+#         mask = torch.zeros((im.shape[0], im.shape[1]))
+#         valid_boxes = [all_predicted_bbox[i] for i in range(len(all_predicted_labels)) if all_predicted_labels[i] == class_ind_to_look_for]
+
+#         for box in valid_boxes:
+#             x1, y1, x2, y2 = [int(x) for x in box.tensor.squeeze()]
+#             mask[y1:y2, x1:x2] = 1
+#         mask = torch.nn.functional.interpolate(mask.unsqueeze(0).unsqueeze(0), size=(224, 224)).squeeze(0).squeeze(0)
+
+#         #TODO save all the masks so we don't pass the image through detectyion multiple timesact
+
+#         # print('camera', self.source_camera, 'type', self.type, 'category', category)
+#         # print('observed objects', [self.class_labels[x] for x in all_predicted_labels])
+
+#         #TODO this can be optimized because we are passing this image twice and no point
+
+#         # use these later for visualization purposes?
+
+#         # plt.imsave('something.png', im)
+#         mask = mask.unsqueeze(-1)
+#         self.cache = {'object_name': info_to_search, 'image':original_im, 'mask':mask}
+
+#         return mask
+
+
+# class StretchObjectMask(Sensor):
+#     def __init__(self, type: str,noise,  uuid: str = "object_mask", **kwargs: Any):
+#         observation_space = gym.spaces.Box(
+#             low=0, high=1, shape=(1,), dtype=np.float32
+#         )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
+#         self.type = type
+#         uuid = '{}_{}'.format(uuid, type)
+#         self.noise = noise
+#         self.cache = None
+#         super().__init__(**prepare_locals_for_super(locals()))
+
+#     def get_observation(
+#             self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
+#     ) -> Any:
+#         if env.last_image_changed is False and self.cache is not None:
+#             return self.cache
+
+#         # TODO remove
+#         # mask = np.zeros((224, 224, 1))
+#         # return mask
+
+
+#         img = env.controller.last_event.frame
+#         resized_image = cv2.resize(img, dsize=(224,224))
+#         global input_received, center_x, center_y
+#         center_x, center_y = -1, -1
+#         input_received = False
+
+#         def normalize_number(x, w):
+#             if x < 0:
+#                 x = 0
+#             if x > w:
+#                 x = w
+#             return x
+
+#         def clear_plot():
+#             fig.clear(); fig.clf(); plt.clf(); ax.cla(); fig.clear(True)
+#         def close_plot():
+#             clear_plot()
+#             plt.close(fig)
+#             plt.close('all')
+#         def onclick(event):
+#             global center_x, center_y, input_received
+#             if event.button == 3: #Right click
+#                 center_x = -1
+#                 center_y = -1
+#                 input_received = True
+#                 close_plot()
+#             if event.button == 1: #Left click
+#                 center_x = event.xdata
+#                 center_y = event.ydata
+#                 input_received = True
+#                 close_plot()
+
+#         import matplotlib
+#         matplotlib.use('MacOSX')
+#         fig, ax = plt.subplots()
+#         # clear_plot()
+#         ax.imshow(resized_image)
+#         cid = fig.canvas.mpl_connect('button_press_event', onclick)
+#         plt.show()
+#         fig.canvas.mpl_disconnect(cid)
+#         close_plot()
+
+
+#         self.window_size = 20 #TODO do I want to change the size of this one maybe?
+#         mask = np.zeros((224, 224, 1))
+#         if center_y == -1 and center_x == -1:
+#             mask[:,:] = 0.
+#         else:
+#             offset = self.window_size / 2
+#             object_boundaries = center_x - offset, center_y - offset, center_x + offset, center_y + offset
+#             x1, y1, x2, y2 = [int(normalize_number(i, 224)) for i in object_boundaries]
+#             mask[y1:y2, x1:x2] = 1.
+#         self.cache = mask
+#         return mask
+
+
+
+# class RealStretchPickedUpObjSensor(Sensor):
+#     def __init__(self, uuid: str = "pickedup_object", **kwargs: Any):
+#         observation_space = gym.spaces.Box(
+#             low=0, high=1, shape=(1,), dtype=np.float32
+#         )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
+#         super().__init__(**prepare_locals_for_super(locals()))
+
+#     def get_observation(
+#             self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
+#     ) -> Any:
+#         return False
+
+# class RealKinectArmPointNavEmulSensor(Sensor):
+
+#     def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, arm_mask_sensor:Sensor, uuid: str = "arm_point_nav_emul", **kwargs: Any):
+#         observation_space = gym.spaces.Box(
+#             low=0, high=1, shape=(1,), dtype=np.float32
+#         )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
+#         self.type = type
+#         self.mask_sensor = mask_sensor
+#         self.depth_sensor = depth_sensor
+#         self.arm_mask_sensor = arm_mask_sensor
+#         uuid = '{}_{}'.format(uuid, type)
+
+#         self.min_xyz = np.zeros((3))
+
+#         self.dummy_answer = torch.zeros(3)
+#         self.dummy_answer[:] = 4 # is this good enough?
+#         self.device = torch.device("cpu")
+#         super().__init__(**prepare_locals_for_super(locals()))
+#     def get_camera_int_ext(self,env):
+#         #TODO all these values need to be checked
+#         fov=max(KINECT_FOV_W, KINECT_FOV_H)#TODO are you sure? it should be smaller one I think
+#         agent_state = env.controller.last_event.metadata['agent']
+#         camera_horizon = 45
+#         camera_xyz = np.array([agent_state['position'][k] for k in ['x','y','z']])
+#         camera_rotation = (agent_state['rotation']['y'] + 90) % 360
+#         return fov, camera_horizon, camera_xyz, camera_rotation
+#     def get_observation(
+#             self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any
+#     ) -> Any:
+#         #TODO remove
+#         if self.type == 'destination':
+#             return self.dummy_answer
+#         mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) #TODO this is called multiple times?
+#         depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1)
+
+#         if task.num_steps_taken() == 0:
+#             self.pointnav_history_aggr = []
+#             self.real_prev_location = None
+#             self.belief_prev_location = None
+
+
+#         fov, camera_horizon, camera_xyz, camera_rotation = self.get_camera_int_ext(env)
+#         arm_world_coord = None
+
+#         if mask.sum() != 0:
+
+#             midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device)
+#             if not torch.any(torch.isnan(midpoint_agent_coord) + torch.isinf(midpoint_agent_coord)):
+#                 self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken()))
+
+#         arm_mask = self.arm_mask_sensor.get_observation(env, task, *args, **kwargs) #TODO this is also called twice
+#         if arm_mask.sum() == 0: #Do we want to do some approximations or no?
+#             arm_world_coord = None #TODO approax for this
+#         else:
+#             arm_world_coord = get_mid_point_of_object_from_depth_and_mask(arm_mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device)
+#             # distance_in_agent_coord = midpoint_agent_coord - arm_location_in_camera
+#             # result = distance_in_agent_coord.cpu()
+#         # if arm_mask.sum() != 0 or mask.sum() != 0:
+#         #
+#         #     import cv2
+#         #     cv2.imwrite('/Users/kianae/Desktop/image.png', env.kinect_frame[:,:,::-1])
+#         #     cv2.imwrite('/Users/kianae/Desktop/mask.png', mask.squeeze().numpy() * 255)
+#         #     cv2.imwrite('/Users/kianae/Desktop/arm_mask.png', arm_mask * 255)
+#         result = self.history_aggregation(camera_xyz, camera_rotation, arm_world_coord, task.num_steps_taken())
+#         return result
+#     def history_aggregation(self, camera_xyz, camera_rotation, arm_world_coord, current_step_number):
+#         if len(self.pointnav_history_aggr) == 0 or arm_world_coord is None:
+#             return self.dummy_answer
+#         else:
+#             weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
+#             total_weights = sum(weights)
+#             total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
+#             total_sum = sum(total_sum)
+#             midpoint = total_sum / total_weights
+#             agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0))
+#             midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0))
+#             midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state)
+#             midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']])
+
+#             arm_world_coord = dict(position=dict(x=arm_world_coord[0], y=arm_world_coord[1], z=arm_world_coord[2]), rotation=dict(x=0,y=0,z=0))
+#             arm_state_agent_coord = convert_world_to_agent_coordinate(arm_world_coord, agent_state)
+#             arm_state_agent_coord = torch.Tensor([arm_state_agent_coord['position'][k] for k in ['x','y','z']])
+
+#             distance_in_agent_coord = midpoint_agent_coord - arm_state_agent_coord
+
+#             return distance_in_agent_coord.cpu()
+
+
+# class KinectArmMaskSensor(Sensor):
+#     def __init__(self, uuid: str = "arm_mask_kinect", **kwargs: Any):
+#         observation_space = gym.spaces.Box(
+#             low=0, high=1, shape=(1,), dtype=np.float32
+#         )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
+#         super().__init__(**prepare_locals_for_super(locals()))
+
+#     def get_observation(
+#             self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
+#     ) -> Any:
+#         kinect_raw_value = env.kinect_raw_frame
+#         mask = get_binary_mask_of_arm(kinect_raw_value)
+#         mask = normalize_real_kinect_image(mask)
+#         return np.expand_dims(mask, axis=-1)
+
+# class RealIntelAgentBodyPointNavEmulSensor(Sensor):
+
+#     def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "point_nav_emul", **kwargs: Any):
+#         observation_space = gym.spaces.Box(
+#             low=0, high=1, shape=(1,), dtype=np.float32
+#         )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
+#         self.type = type
+#         self.mask_sensor = mask_sensor
+#         self.depth_sensor = depth_sensor
+#         uuid = '{}_{}'.format(uuid, type)
+
+#         self.min_xyz = np.zeros((3))
+#         self.dummy_answer = torch.zeros(3)
+#         self.dummy_answer[:] = 4 # is this good enough?
+#         self.device = torch.device("cpu")
+
+
+#         super().__init__(**prepare_locals_for_super(locals()))
+#     def get_camera_int_ext(self,env):
+#         #TODO all these values need to be checked
+#         fov=max(INTEL_FOV_W, INTEL_FOV_H) #TODO are you sure? it should be smaller one I think
+#         agent_state = env.controller.last_event.metadata['agent']
+#         camera_horizon = 0
+#         camera_xyz = np.array([agent_state['position'][k] for k in ['x','y','z']])
+#         camera_rotation = agent_state['rotation']['y']
+#         return fov, camera_horizon, camera_xyz, camera_rotation
+
+
+
+
+#     def get_observation(
+#             self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any
+#     ) -> Any:
+
+#         mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs))
+
+#         if task.num_steps_taken() == 0:
+#             self.pointnav_history_aggr = []
+#             self.real_prev_location = None
+#             self.belief_prev_location = None
+
+
+#         fov, camera_horizon, camera_xyz, camera_rotation = self.get_camera_int_ext(env)
+
+
+#         if mask.sum() != 0:
+#             depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1)
+#             middle_of_object = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device)
+#             self.pointnav_history_aggr.append((middle_of_object.cpu(), 1, task.num_steps_taken()))
+
+#             # result = middle_of_object.cpu()
+#         # else:
+#         #     result = self.dummy_answer
+#         result = self.history_aggregation(camera_xyz, camera_rotation, task.num_steps_taken())
+#         return result
+#     def history_aggregation(self, camera_xyz, camera_rotation, current_step_number):
+#         if len(self.pointnav_history_aggr) == 0:
+#             return self.dummy_answer
+#         else:
+#             weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
+#             total_weights = sum(weights)
+#             total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
+#             total_sum = sum(total_sum)
+#             midpoint = total_sum / total_weights
+#             agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0))
+#             midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0))
+#             midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state)
+
+#             distance_in_agent_coord = dict(x=midpoint_agent_coord['position']['x'], y=midpoint_agent_coord['position']['y'], z=midpoint_agent_coord['position']['z'])
+
+#             agent_centric_middle_of_object = torch.Tensor([distance_in_agent_coord['x'], distance_in_agent_coord['y'], distance_in_agent_coord['z']])
 
-    def get_observation(
-            self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
-    ) -> Any:
-
-        if self.type == 'source':
-            info_to_search = 'source_object_id'
-        elif self.type == 'destination':
-            info_to_search = 'goal_object_id'
-            #TODO remove
-            mask = torch.zeros((224,224,1))
-            return mask
-
-        original_im = self.source_camera.get_observation(env, task, *args, **kwargs)
-
-        if info_to_search == self.cache['object_name']:
-            if np.all(self.cache['image'] == original_im):
-                return self.cache['mask']
-        else:
-            self.cache = {}
-
-
-        # TODO VERYYYYYY IMPORTANT
-        im = original_im[:,:,::-1]
-        # the detection requires BGR???
-
-        outputs = self.predictor(im * 255.)
-
-        #  remove
-        # outputs = self.predictor(im * 255.)
-        # cv2.imwrite('my_image.png',im * 255.)
-
-        #
-        def visualize_detections(im, outputs ):
-            v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]), scale=1.2)
-            out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
-            dir = 'experiment_output/visualization_predictions'
-            timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f.png")
-            os.makedirs(dir, exist_ok=True)
-            print('saving all detections in ', os.path.join(dir, timestamp))
-            plt.imsave(os.path.join(dir, timestamp), out.get_image()[:, :, ::-1])
-
-
-
-        category = task.task_info[info_to_search].split('|')[0]
-        assert category in THOR2COCO
-        class_ind_to_look_for = self.class_labels.index(THOR2COCO[category])
-        all_predicted_labels = outputs['instances'].pred_classes
-        all_predicted_bbox = outputs['instances'].pred_boxes #TODO switch to segmentation
-
-
-
-        mask = torch.zeros((im.shape[0], im.shape[1]))
-        valid_boxes = [all_predicted_bbox[i] for i in range(len(all_predicted_labels)) if all_predicted_labels[i] == class_ind_to_look_for]
-
-        for box in valid_boxes:
-            x1, y1, x2, y2 = [int(x) for x in box.tensor.squeeze()]
-            mask[y1:y2, x1:x2] = 1
-        mask = torch.nn.functional.interpolate(mask.unsqueeze(0).unsqueeze(0), size=(224, 224)).squeeze(0).squeeze(0)
-
-        #TODO save all the masks so we don't pass the image through detectyion multiple timesact
-
-        # print('camera', self.source_camera, 'type', self.type, 'category', category)
-        # print('observed objects', [self.class_labels[x] for x in all_predicted_labels])
-
-        #TODO this can be optimized because we are passing this image twice and no point
-
-        # use these later for visualization purposes?
-
-        # plt.imsave('something.png', im)
-        mask = mask.unsqueeze(-1)
-        self.cache = {'object_name': info_to_search, 'image':original_im, 'mask':mask}
-
-        return mask
-
-
-class StretchObjectMask(Sensor):
-    def __init__(self, type: str,noise,  uuid: str = "object_mask", **kwargs: Any):
-        observation_space = gym.spaces.Box(
-            low=0, high=1, shape=(1,), dtype=np.float32
-        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
-        self.type = type
-        uuid = '{}_{}'.format(uuid, type)
-        self.noise = noise
-        self.cache = None
-        super().__init__(**prepare_locals_for_super(locals()))
-
-    def get_observation(
-            self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
-    ) -> Any:
-        if env.last_image_changed is False and self.cache is not None:
-            return self.cache
-
-        # TODO remove
-        # mask = np.zeros((224, 224, 1))
-        return mask
-
-
-        img = env.controller.last_event.frame
-        resized_image = cv2.resize(img, dsize=(224,224))
-        global input_received, center_x, center_y
-        center_x, center_y = -1, -1
-        input_received = False
-
-        def normalize_number(x, w):
-            if x < 0:
-                x = 0
-            if x > w:
-                x = w
-            return x
-
-        def clear_plot():
-            fig.clear(); fig.clf(); plt.clf(); ax.cla(); fig.clear(True)
-        def close_plot():
-            clear_plot()
-            plt.close(fig)
-            plt.close('all')
-        def onclick(event):
-            global center_x, center_y, input_received
-            if event.button == 3: #Right click
-                center_x = -1
-                center_y = -1
-                input_received = True
-                close_plot()
-            if event.button == 1: #Left click
-                center_x = event.xdata
-                center_y = event.ydata
-                input_received = True
-                close_plot()
-
-        import matplotlib
-        matplotlib.use('MacOSX')
-        fig, ax = plt.subplots()
-        # clear_plot()
-        ax.imshow(resized_image)
-        cid = fig.canvas.mpl_connect('button_press_event', onclick)
-        plt.show()
-        fig.canvas.mpl_disconnect(cid)
-        close_plot()
-
-
-        self.window_size = 20 #TODO do I want to change the size of this one maybe?
-        mask = np.zeros((224, 224, 1))
-        if center_y == -1 and center_x == -1:
-            mask[:,:] = 0.
-        else:
-            offset = self.window_size / 2
-            object_boundaries = center_x - offset, center_y - offset, center_x + offset, center_y + offset
-            x1, y1, x2, y2 = [int(normalize_number(i, 224)) for i in object_boundaries]
-            mask[y1:y2, x1:x2] = 1.
-        self.cache = mask
-        return mask
-
-
-
-class RealStretchPickedUpObjSensor(Sensor):
-    def __init__(self, uuid: str = "pickedup_object", **kwargs: Any):
-        observation_space = gym.spaces.Box(
-            low=0, high=1, shape=(1,), dtype=np.float32
-        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
-        super().__init__(**prepare_locals_for_super(locals()))
-
-    def get_observation(
-            self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
-    ) -> Any:
-        return False
-
-class RealKinectArmPointNavEmulSensor(Sensor):
-
-    def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, arm_mask_sensor:Sensor, uuid: str = "arm_point_nav_emul", **kwargs: Any):
-        observation_space = gym.spaces.Box(
-            low=0, high=1, shape=(1,), dtype=np.float32
-        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
-        self.type = type
-        self.mask_sensor = mask_sensor
-        self.depth_sensor = depth_sensor
-        self.arm_mask_sensor = arm_mask_sensor
-        uuid = '{}_{}'.format(uuid, type)
-
-        self.min_xyz = np.zeros((3))
-
-        self.dummy_answer = torch.zeros(3)
-        self.dummy_answer[:] = 4 # is this good enough?
-        self.device = torch.device("cpu")
-        super().__init__(**prepare_locals_for_super(locals()))
-    def get_camera_int_ext(self,env):
-        #TODO all these values need to be checked
-        fov=max(KINECT_FOV_W, KINECT_FOV_H)#TODO are you sure? it should be smaller one I think
-        agent_state = env.controller.last_event.metadata['agent']
-        camera_horizon = 45
-        camera_xyz = np.array([agent_state['position'][k] for k in ['x','y','z']])
-        camera_rotation = (agent_state['rotation']['y'] + 90) % 360
-        return fov, camera_horizon, camera_xyz, camera_rotation
-    def get_observation(
-            self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any
-    ) -> Any:
-        #TODO remove
-        if self.type == 'destination':
-            return self.dummy_answer
-        mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) #TODO this is called multiple times?
-        depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1)
-
-        if task.num_steps_taken() == 0:
-            self.pointnav_history_aggr = []
-            self.real_prev_location = None
-            self.belief_prev_location = None
-
-
-        fov, camera_horizon, camera_xyz, camera_rotation = self.get_camera_int_ext(env)
-        arm_world_coord = None
-
-        if mask.sum() != 0:
-
-            midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device)
-            if not torch.any(torch.isnan(midpoint_agent_coord) + torch.isinf(midpoint_agent_coord)):
-                self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken()))
-
-        arm_mask = self.arm_mask_sensor.get_observation(env, task, *args, **kwargs) #TODO this is also called twice
-        if arm_mask.sum() == 0: #Do we want to do some approximations or no?
-            arm_world_coord = None #TODO approax for this
-        else:
-            arm_world_coord = get_mid_point_of_object_from_depth_and_mask(arm_mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device)
-            # distance_in_agent_coord = midpoint_agent_coord - arm_location_in_camera
-            # result = distance_in_agent_coord.cpu()
-        # if arm_mask.sum() != 0 or mask.sum() != 0:
-        #
-        #     import cv2
-        #     cv2.imwrite('/Users/kianae/Desktop/image.png', env.kinect_frame[:,:,::-1])
-        #     cv2.imwrite('/Users/kianae/Desktop/mask.png', mask.squeeze().numpy() * 255)
-        #     cv2.imwrite('/Users/kianae/Desktop/arm_mask.png', arm_mask * 255)
-        result = self.history_aggregation(camera_xyz, camera_rotation, arm_world_coord, task.num_steps_taken())
-        return result
-    def history_aggregation(self, camera_xyz, camera_rotation, arm_world_coord, current_step_number):
-        if len(self.pointnav_history_aggr) == 0 or arm_world_coord is None:
-            return self.dummy_answer
-        else:
-            weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
-            total_weights = sum(weights)
-            total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
-            total_sum = sum(total_sum)
-            midpoint = total_sum / total_weights
-            agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0))
-            midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0))
-            midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state)
-            midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']])
-
-            arm_world_coord = dict(position=dict(x=arm_world_coord[0], y=arm_world_coord[1], z=arm_world_coord[2]), rotation=dict(x=0,y=0,z=0))
-            arm_state_agent_coord = convert_world_to_agent_coordinate(arm_world_coord, agent_state)
-            arm_state_agent_coord = torch.Tensor([arm_state_agent_coord['position'][k] for k in ['x','y','z']])
-
-            distance_in_agent_coord = midpoint_agent_coord - arm_state_agent_coord
-
-            return distance_in_agent_coord.cpu()
-
-
-class KinectArmMaskSensor(Sensor):
-    def __init__(self, uuid: str = "arm_mask_kinect", **kwargs: Any):
-        observation_space = gym.spaces.Box(
-            low=0, high=1, shape=(1,), dtype=np.float32
-        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
-        super().__init__(**prepare_locals_for_super(locals()))
-
-    def get_observation(
-            self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
-    ) -> Any:
-        kinect_raw_value = env.kinect_raw_frame
-        mask = get_binary_mask_of_arm(kinect_raw_value)
-        mask = normalize_real_kinect_image(mask)
-        return np.expand_dims(mask, axis=-1)
-
-class RealIntelAgentBodyPointNavEmulSensor(Sensor):
-
-    def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "point_nav_emul", **kwargs: Any):
-        observation_space = gym.spaces.Box(
-            low=0, high=1, shape=(1,), dtype=np.float32
-        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
-        self.type = type
-        self.mask_sensor = mask_sensor
-        self.depth_sensor = depth_sensor
-        uuid = '{}_{}'.format(uuid, type)
-
-        self.min_xyz = np.zeros((3))
-        self.dummy_answer = torch.zeros(3)
-        self.dummy_answer[:] = 4 # is this good enough?
-        self.device = torch.device("cpu")
-
-
-        super().__init__(**prepare_locals_for_super(locals()))
-    def get_camera_int_ext(self,env):
-        #TODO all these values need to be checked
-        fov=max(INTEL_FOV_W, INTEL_FOV_H) #TODO are you sure? it should be smaller one I think
-        agent_state = env.controller.last_event.metadata['agent']
-        camera_horizon = 0
-        camera_xyz = np.array([agent_state['position'][k] for k in ['x','y','z']])
-        camera_rotation = agent_state['rotation']['y']
-        return fov, camera_horizon, camera_xyz, camera_rotation
-
-
-
-
-    def get_observation(
-            self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any
-    ) -> Any:
-
-        mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs))
-
-        if task.num_steps_taken() == 0:
-            self.pointnav_history_aggr = []
-            self.real_prev_location = None
-            self.belief_prev_location = None
-
-
-        fov, camera_horizon, camera_xyz, camera_rotation = self.get_camera_int_ext(env)
-
-
-        if mask.sum() != 0:
-            depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1)
-            middle_of_object = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device)
-            self.pointnav_history_aggr.append((middle_of_object.cpu(), 1, task.num_steps_taken()))
-
-            # result = middle_of_object.cpu()
-        # else:
-        #     result = self.dummy_answer
-        result = self.history_aggregation(camera_xyz, camera_rotation, task.num_steps_taken())
-        return result
-    def history_aggregation(self, camera_xyz, camera_rotation, current_step_number):
-        if len(self.pointnav_history_aggr) == 0:
-            return self.dummy_answer
-        else:
-            weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
-            total_weights = sum(weights)
-            total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
-            total_sum = sum(total_sum)
-            midpoint = total_sum / total_weights
-            agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0))
-            midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0))
-            midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state)
-
-            distance_in_agent_coord = dict(x=midpoint_agent_coord['position']['x'], y=midpoint_agent_coord['position']['y'], z=midpoint_agent_coord['position']['z'])
-
-            agent_centric_middle_of_object = torch.Tensor([distance_in_agent_coord['x'], distance_in_agent_coord['y'], distance_in_agent_coord['z']])
-
-            agent_centric_middle_of_object = agent_centric_middle_of_object
-            return agent_centric_middle_of_object
+#             agent_centric_middle_of_object = agent_centric_middle_of_object
+#             return agent_centric_middle_of_object
 
 
 
diff --git a/utils/stretch_utils/stretch_constants.py b/utils/stretch_utils/stretch_constants.py
index 5cdbc06..88ad531 100644
--- a/utils/stretch_utils/stretch_constants.py
+++ b/utils/stretch_utils/stretch_constants.py
@@ -16,6 +16,7 @@
 STRETCH_MANIPULATHOR_COMMIT_ID = '09b6ccf74558395a231927dee8be3b8c83b52ef7' #bigger fov
 # PROCTHOR_COMMIT_ID = '0eddca46783a788bfce69b146c496d931c981ae4'
 PROCTHOR_COMMIT_ID = '996a369b5484c7037d3737906be81b84a52473a0' #after the arm destroy bug
+UPDATED_PROCTHOR_COMMIT_ID = 'fd95db6135273a8ab2e35d4d350dd556c8ced655'
 
 STRETCH_ENV_ARGS = dict(
     gridSize=0.25,
diff --git a/utils/stretch_utils/stretch_ithor_arm_environment.py b/utils/stretch_utils/stretch_ithor_arm_environment.py
index 51e3394..237b6e9 100644
--- a/utils/stretch_utils/stretch_ithor_arm_environment.py
+++ b/utils/stretch_utils/stretch_ithor_arm_environment.py
@@ -14,6 +14,8 @@
 from torch.distributions.utils import lazy_property
 
 from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment
+from ithor_arm.ithor_arm_noise_models import NoiseInMotionHabitatFlavor, NoiseInMotionSimple1DNormal
+
 from utils.stretch_utils.stretch_constants import (
     ADITIONAL_ARM_ARGS,
     PICKUP, DONE, MOVE_AHEAD, ROTATE_RIGHT, ROTATE_LEFT, MOVE_BACK, MOVE_ARM_HEIGHT_P, MOVE_ARM_HEIGHT_M, MOVE_ARM_Z_P, MOVE_ARM_Z_M, MOVE_WRIST_P, MOVE_WRIST_M, MOVE_WRIST_P_SMALL, MOVE_WRIST_M_SMALL, ROTATE_LEFT_SMALL, ROTATE_RIGHT_SMALL,
@@ -111,6 +113,20 @@ def __init__(
         self._always_return_visible_range = False
         self.simplify_physics = simplify_physics
 
+        if 'motion_noise_type' in env_args.keys():
+            if env_args['motion_noise_type'] == 'habitat':
+                self.noise_model = NoiseInMotionHabitatFlavor(**env_args['motion_noise_args'])
+            elif env_args['motion_noise_type'] == 'simple1d':
+                self.noise_model = NoiseInMotionSimple1DNormal(**env_args['motion_noise_args'])
+            else:
+                print('Unrecognized motion noise model type')
+                ForkedPdb().set_trace()
+        else:
+            self.noise_model = NoiseInMotionHabitatFlavor(effect_scale=0.0) # un-noise model
+
+        self.ahead_nominal = AGENT_MOVEMENT_CONSTANT
+        self.rotate_nominal = AGENT_ROTATION_DEG
+
         # self.start(None)
 
 
@@ -121,6 +137,8 @@ def __init__(
         self.MEMORY_SIZE = 5
         # self.memory_frames = []
 
+        self.list_of_actions_so_far = []
+
 
         if "quality" not in self.env_args:
             self.env_args["quality"] = self._quality
@@ -185,46 +203,55 @@ def reset(
             scene_name = self.controller.last_event.metadata["sceneName"]
         # self.reset_init_params()#**kwargs) removing this fixes one of the crashing problem
 
-        # to solve the crash issue
-        # why do we still have this crashing problem?
-        try:
-            reset_environment_and_additional_commands(self.controller, scene_name)
-        except Exception as e:
-            print("RESETTING THE SCENE,", scene_name, 'because of', str(e))
-            self.controller = self.create_controller()
-            reset_environment_and_additional_commands(self.controller, scene_name)
-
-        if self.object_open_speed != 1.0:
-            self.controller.step(
-                {"action": "ChangeOpenSpeed", "x": self.object_open_speed}
-            )
-
-        self._initially_reachable_points = None
-        self._initially_reachable_points_set = None
-        self.controller.step({"action": "GetReachablePositions"})
-        if not self.controller.last_event.metadata["lastActionSuccess"]:
-            warnings.warn(
-                "Error when getting reachable points: {}".format(
-                    self.controller.last_event.metadata["errorMessage"]
+        if self.env_args['agentMode'] is not 'stretch':
+            self.controller.reset(scene_name)
+        elif scene_name=='Procedural':
+            self.controller.reset()
+        else:
+
+            # to solve the crash issue
+            # why do we still have this crashing problem?
+            try:
+                reset_environment_and_additional_commands(self.controller, scene_name)
+            except Exception as e:
+                print("RESETTING THE SCENE,", scene_name, 'because of', str(e))
+                self.controller = self.create_controller()
+                reset_environment_and_additional_commands(self.controller, scene_name)
+
+            if self.object_open_speed != 1.0:
+                self.controller.step(
+                    {"action": "ChangeOpenSpeed", "x": self.object_open_speed}
                 )
-            )
-        self._initially_reachable_points = self.last_action_return
+
+            self._initially_reachable_points = None
+            self._initially_reachable_points_set = None
+            self.controller.step({"action": "GetReachablePositions"})
+            if not self.controller.last_event.metadata["lastActionSuccess"]:
+                warnings.warn(
+                    "Error when getting reachable points: {}".format(
+                        self.controller.last_event.metadata["errorMessage"]
+                    )
+                )
+            self._initially_reachable_points = self.last_action_return
 
         self.list_of_actions_so_far = []
 
+        self.noise_model.reset_noise_model()
+        self.nominal_agent_location = self.get_agent_location()
+
 
     def check_controller_version(self, controller=None):
         controller_to_check = controller
         if controller_to_check is None:
             controller_to_check = self.controller
-        if STRETCH_MANIPULATHOR_COMMIT_ID is not None:
-            assert (
-                self.env_args['commit_id'] in controller_to_check._build.url
-            ), "Build number is not right, {} vs {}, use  pip3 install -e git+https://github.com/allenai/ai2thor.git@{}#egg=ai2thor".format(
-                controller_to_check._build.url,
-                STRETCH_MANIPULATHOR_COMMIT_ID,
-                STRETCH_MANIPULATHOR_COMMIT_ID,
-            )
+        # if STRETCH_MANIPULATHOR_COMMIT_ID is not None:
+        #     assert (
+        #         self.env_args['commit_id'] in controller_to_check._build.url
+        #     ), "Build number is not right, {} vs {}, use  pip3 install -e git+https://github.com/allenai/ai2thor.git@{}#egg=ai2thor".format(
+        #         controller_to_check._build.url,
+        #         STRETCH_MANIPULATHOR_COMMIT_ID,
+        #         STRETCH_MANIPULATHOR_COMMIT_ID,
+        #     )
 
     def create_controller(self):
         assert 'commit_id' in self.env_args
@@ -292,6 +319,42 @@ def get_pickupable_objects(self):
         object_list = event.metadata["arm"]["pickupableObjects"]
 
         return object_list
+    
+    def update_nominal_location(self, action_dict):
+        # location = {
+        #     "x": metadata["agent"]["position"]["x"],
+        #     "y": metadata["agent"]["position"]["y"],
+        #     "z": metadata["agent"]["position"]["z"],
+        #     "rotation": metadata["agent"]["rotation"]["y"],
+        #     "horizon": metadata["agent"]["cameraHorizon"],
+        #     "standing": metadata.get("isStanding", metadata["agent"].get("isStanding")),
+        # }
+
+        curr_loc = self.nominal_agent_location
+        new_loc = copy.deepcopy(curr_loc)
+
+        if action_dict['action'] is 'RotateLeft':
+            new_loc["rotation"] = (new_loc["rotation"] - self.rotate_nominal) % 360
+        if action_dict['action'] is 'RotateLeftSmall':
+            new_loc["rotation"] = (new_loc["rotation"] - self.rotate_nominal/5) % 360
+        if action_dict['action'] is 'RotateRight':
+            new_loc["rotation"] = (new_loc["rotation"] + self.rotate_nominal) % 360
+        elif action_dict['action'] is 'RotateRightSmall':
+            new_loc["rotation"] = (new_loc["rotation"] + self.rotate_nominal/5) % 360
+        elif action_dict['action'] is 'MoveAhead':
+            new_loc["x"] += self.ahead_nominal * np.sin(new_loc["rotation"] * np.pi / 180)
+            new_loc["z"] += self.ahead_nominal * np.cos(new_loc["rotation"] * np.pi / 180)
+        elif action_dict['action'] is 'MoveBack':
+            new_loc["x"] -= self.ahead_nominal * np.sin(new_loc["rotation"] * np.pi / 180)
+            new_loc["z"] -= self.ahead_nominal * np.cos(new_loc["rotation"] * np.pi / 180)
+        elif action_dict['action'] is 'TeleportFull':
+            new_loc["x"] = action_dict['x']
+            new_loc["y"] = action_dict['y']
+            new_loc["z"] = action_dict['z']
+            new_loc["rotation"] = action_dict['rotation']['y']
+            new_loc["horizon"] = action_dict['horizon']
+
+        self.nominal_agent_location = new_loc
 
     def step(
         self, action_dict: Dict[str, Union[str, int, float]]
@@ -299,6 +362,20 @@ def step(
         """Take a step in the ai2thor environment."""
 
         action = typing.cast(str, action_dict["action"])
+        original_action_dict = copy.deepcopy(action_dict)
+
+        if self.env_args['agentMode']=='locobot' or self.env_args['agentMode']=='default':
+            sr = self.controller.step(action_dict)
+            self.list_of_actions_so_far.append(action_dict)
+
+            if self._verbose:
+                print(self.controller.last_event)
+
+            if self.restrict_to_initially_reachable_points:
+                self._snap_agent_to_initially_reachable()
+            
+            return sr
+
 
         skip_render = "renderImage" in action_dict and not action_dict["renderImage"]
         last_frame: Optional[np.ndarray] = None
@@ -329,23 +406,87 @@ def step(
             action_dict = {
                 'action': 'Pass'
             } # we have to change the last action success if the pik up fails, we do that in the task now
-
-        elif action in [MOVE_AHEAD, MOVE_BACK, ROTATE_LEFT, ROTATE_RIGHT]:
+                
+        elif action in [MOVE_AHEAD, MOVE_BACK, ROTATE_RIGHT, ROTATE_LEFT, ROTATE_RIGHT_SMALL, ROTATE_LEFT_SMALL]:
             copy_aditions = copy.deepcopy(ADITIONAL_ARM_ARGS)
+
+            # RH: order matters, nominal action happens last
             action_dict = {**action_dict, **copy_aditions}
-            if action == MOVE_AHEAD:
+            # ForkedPdb().set_trace()
+            if action in [MOVE_AHEAD]:
+                noise = self.noise_model.get_ahead_drift(self.ahead_nominal)
+
+                action_dict["action"] = "RotateAgent"
+                action_dict["degrees"] = noise[2]
+                sr = self.controller.step(action_dict)
+
+                action_dict = {**copy_aditions}
+                action_dict["action"] = "MoveAgent"
+                action_dict["ahead"] = noise[0] + self.ahead_nominal
+                action_dict["right"] = noise[1]
+
+            elif action in [MOVE_BACK]:
+                noise = self.noise_model.get_ahead_drift(self.ahead_nominal)
+                # TODO revisit - make sense to sample the same for ahead and back? 
+                # inclination is effect matters less than currently unmodeled hysteresis effects
+
+                action_dict["action"] = "RotateAgent"
+                action_dict["degrees"] = noise[2]
+                sr = self.controller.step(action_dict)
+
+                action_dict = {**copy_aditions}
                 action_dict["action"] = "MoveAgent"
-                action_dict["ahead"] = AGENT_MOVEMENT_CONSTANT
-            elif action == MOVE_BACK:
+                action_dict["ahead"] = noise[0] - self.ahead_nominal
+                action_dict["right"] = noise[1]
+
+
+            elif action in [ROTATE_RIGHT]:
+                noise = self.noise_model.get_rotate_drift()
                 action_dict["action"] = "MoveAgent"
-                action_dict["ahead"] = -AGENT_MOVEMENT_CONSTANT
-            elif action == ROTATE_RIGHT:
+                action_dict["ahead"] = noise[0]
+                action_dict["right"] = noise[1]
+                sr = self.controller.step(action_dict)
+
+                action_dict = {**copy_aditions}
                 action_dict["action"] = "RotateAgent"
-                action_dict["degrees"] = AGENT_ROTATION_DEG
+                action_dict["degrees"] = noise[2] + self.rotate_nominal
 
-            elif action == ROTATE_LEFT:
+            elif action in [ROTATE_LEFT]:
+                noise = self.noise_model.get_rotate_drift()
+                action_dict["action"] = "MoveAgent"
+                action_dict["ahead"] = noise[0]
+                action_dict["right"] = noise[1]
+                sr = self.controller.step(action_dict)
+
+                action_dict = {**copy_aditions}
                 action_dict["action"] = "RotateAgent"
-                action_dict["degrees"] = -AGENT_ROTATION_DEG
+                action_dict["degrees"] = noise[2] - self.rotate_nominal
+            
+            elif action in [ROTATE_RIGHT_SMALL]:
+                # RH: lesser scaling noise is deliberate. Small actions are harder to be accurate
+                noise = self.noise_model.get_rotate_drift()
+                action_dict["action"] = "MoveAgent"
+                action_dict["ahead"] = noise[0]/2
+                action_dict["right"] = noise[1]/2
+                sr = self.controller.step(action_dict)
+
+                action_dict = {**copy_aditions}
+                action_dict["action"] = "RotateAgent"
+                action_dict["degrees"] = noise[2]/2 + self.rotate_nominal / 5
+
+            elif action in [ROTATE_LEFT_SMALL]:
+                # RH: lesser scaling noise is deliberate. Small actions are harder to be accurate
+                noise = self.noise_model.get_rotate_drift()
+                action_dict["action"] = "MoveAgent"
+                action_dict["ahead"] = noise[0]/2
+                action_dict["right"] = noise[1]/2
+                sr = self.controller.step(action_dict)
+
+                action_dict = {**copy_aditions}
+                action_dict["action"] = "RotateAgent"
+                action_dict["degrees"] = noise[2]/2 - self.rotate_nominal / 5
+
+
         elif action in [MOVE_ARM_HEIGHT_P,MOVE_ARM_HEIGHT_M,MOVE_ARM_Z_P,MOVE_ARM_Z_M,]:
             base_position = get_relative_stretch_current_arm_state(self.controller)
             change_value = ARM_MOVE_CONSTANT
@@ -367,17 +508,16 @@ def step(
             action_dict = dict(action='RotateWristRelative', yaw=-WRIST_ROTATION / 5)
         elif action == MOVE_WRIST_M_SMALL:
             action_dict = dict(action='RotateWristRelative', yaw=WRIST_ROTATION / 5)
-        elif action == ROTATE_LEFT_SMALL:
-            action_dict["action"] = "RotateAgent"
-            action_dict["degrees"] = -AGENT_ROTATION_DEG / 5
-        elif action == ROTATE_RIGHT_SMALL:
-            action_dict["action"] = "RotateAgent"
-            action_dict["degrees"] = AGENT_ROTATION_DEG / 5
 
 
-        sr = self.controller.step(action_dict)
+        sr_nominal = self.controller.step(action_dict)
         self.list_of_actions_so_far.append(action_dict)
 
+        # RH: Nominal location only updates for successful actions. Note that that drift 
+        # action might succeed even if the "main" action fails
+        if sr_nominal.metadata["lastActionSuccess"]:
+            self.update_nominal_location(original_action_dict)
+
         if self._verbose:
             print(self.controller.last_event)
 
@@ -388,4 +528,18 @@ def step(
             assert last_frame is not None
             self.last_event.frame = last_frame
 
-        return sr
+        if not sr_nominal.metadata["lastActionSuccess"] and action in [MOVE_AHEAD, MOVE_BACK, ROTATE_RIGHT, ROTATE_LEFT, ROTATE_RIGHT_SMALL, ROTATE_LEFT_SMALL]:
+            # if the action fails, sample the noise model for a turn 
+            # does this mess up metadata? and is this reasonable? what action failure modes happen in sim vs real?
+            noise = self.noise_model.get_rotate_drift()
+            action_dict = {**copy.deepcopy(ADITIONAL_ARM_ARGS)}
+            action_dict["action"] = "MoveAgent"
+            action_dict["ahead"] = noise[0]
+            action_dict["right"] = noise[1]
+            sr = self.controller.step(action_dict)
+            action_dict = {**copy.deepcopy(ADITIONAL_ARM_ARGS)}
+            action_dict["action"] = "RotateAgent"
+            action_dict["degrees"] = noise[2]
+            sr = self.controller.step(action_dict)
+
+        return sr_nominal
diff --git a/utils/stretch_utils/stretch_ithor_arm_environment_minimal.py b/utils/stretch_utils/stretch_ithor_arm_environment_minimal.py
new file mode 100644
index 0000000..687685f
--- /dev/null
+++ b/utils/stretch_utils/stretch_ithor_arm_environment_minimal.py
@@ -0,0 +1,178 @@
+import typing
+from typing import Dict, Union, Optional
+import numpy as np
+
+import ai2thor.server
+from ai2thor.controller import Controller
+from allenact.utils.system import get_logger
+
+from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment
+from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment
+
+from utils.stretch_utils.stretch_constants import (
+    DONE, MOVE_AHEAD, ROTATE_RIGHT, ROTATE_LEFT, MOVE_BACK, 
+)
+from scripts.stretch_jupyter_helper import AGENT_MOVEMENT_CONSTANT, AGENT_ROTATION_DEG, remove_nan_inf_for_frames
+from utils.stretch_utils.stretch_sim2real_utils import kinect_reshape, intel_reshape
+
+
+class MinimalStretchManipulaTHOREnvironment(ManipulaTHOREnvironment):
+
+    @property
+    def kinect_frame(self) -> np.ndarray:
+        """Returns rgb image corresponding to the agent's egocentric view."""
+        frame = self.controller.last_event.third_party_camera_frames[0].copy()
+        frame = remove_nan_inf_for_frames(frame, 'kinect_frame')
+        return kinect_reshape(frame)
+    @property
+    def kinect_depth(self) -> np.ndarray:
+        """Returns rgb image corresponding to the agent's egocentric view."""
+        if self.controller.last_event.third_party_depth_frames[0]:
+            return None
+        depth_frame = self.controller.last_event.third_party_depth_frames[0].copy()
+        depth_frame = remove_nan_inf_for_frames(depth_frame, 'depth_kinect')
+
+        # #TODO remove
+        if np.sum(depth_frame != self.controller.last_event.third_party_depth_frames[0].copy()) > 10:
+            raise Exception('Depth is nan again even after removing nan?')
+
+        return kinect_reshape(depth_frame)
+
+    @property
+    def intel_frame(self) -> np.ndarray:
+        """Returns rgb image corresponding to the agent's egocentric view."""
+        frame = self.controller.last_event.frame.copy()
+        frame = remove_nan_inf_for_frames(frame, 'intel_frame')
+        return intel_reshape(frame)
+    @property
+    def intel_depth(self) -> np.ndarray:
+        """Returns rgb image corresponding to the agent's egocentric view."""
+        if self.controller.last_event.depth_frame is None:
+            return None
+        depth_frame = self.controller.last_event.depth_frame.copy()
+        depth_frame = remove_nan_inf_for_frames(depth_frame, 'depth_intel')
+        return intel_reshape(depth_frame)
+
+    def step(
+        self, action_dict: Dict[str, Union[str, int, float]]
+    ) -> ai2thor.server.Event:
+        """Take a step in the ai2thor environment."""
+
+        action = typing.cast(str, action_dict["action"])
+
+        skip_render = "renderImage" in action_dict and not action_dict["renderImage"]
+        last_frame: Optional[np.ndarray] = None
+        if skip_render:
+            last_frame = self.current_frame
+
+        if self.simplify_physics:
+            action_dict["simplifyOPhysics"] = True
+        if action in [DONE]:
+            action_dict = {
+                'action': 'Pass'
+            } # we have to change the last action success if the pik up fails, we do that in the task now
+
+        elif action in [MOVE_AHEAD, MOVE_BACK, ROTATE_LEFT, ROTATE_RIGHT]:
+            if action == MOVE_AHEAD:
+                action_dict["action"] = "MoveAgent"
+                action_dict["ahead"] = AGENT_MOVEMENT_CONSTANT
+            elif action == MOVE_BACK:
+                action_dict["action"] = "MoveAgent"
+                action_dict["ahead"] = -AGENT_MOVEMENT_CONSTANT
+            elif action == ROTATE_RIGHT:
+                action_dict["action"] = "RotateAgent"
+                action_dict["degrees"] = AGENT_ROTATION_DEG
+
+            elif action == ROTATE_LEFT:
+                action_dict["action"] = "RotateAgent"
+                action_dict["degrees"] = -AGENT_ROTATION_DEG
+        
+
+        sr = self.controller.step(action_dict)
+        self.list_of_actions_so_far.append(action_dict)
+
+        if self._verbose:
+            print(self.controller.last_event)
+
+        if self.restrict_to_initially_reachable_points:
+            self._snap_agent_to_initially_reachable()
+
+        if skip_render:
+            assert last_frame is not None
+            self.last_event.frame = last_frame
+
+        return sr
+
+    def create_controller(self):
+        assert 'commit_id' in self.env_args, 'No commit id is specified'
+        controller = Controller(**self.env_args)
+        return controller
+    
+    def start(
+                self,
+                scene_name: Optional[str],
+                move_mag: float = 0.25,
+                **kwargs,
+        ) -> None:
+            """Starts the ai2thor controller if it was previously stopped.
+
+            After starting, `reset` will be called with the scene name and move magnitude.
+
+            # Parameters
+
+            scene_name : The scene to load.
+            move_mag : The amount of distance the agent moves in a single `MoveAhead` step.
+            kwargs : additional kwargs, passed to reset.
+            """
+            if self._started:
+                raise RuntimeError(
+                    "Trying to start the environment but it is already started."
+                )
+
+            self.controller = self.create_controller()
+
+            if (
+                    self._start_player_screen_height,
+                    self._start_player_screen_width,
+            ) != self.current_frame.shape[:2]:
+                self.controller.step(
+                    {
+                        "action": "ChangeResolution",
+                        "x": self._start_player_screen_width,
+                        "y": self._start_player_screen_height,
+                    }
+                )
+
+            self._started = True
+            self.reset(scene_name=scene_name, move_mag=move_mag, **kwargs)
+
+    def reset(
+            self,
+            scene_name: Optional[str],
+            move_mag: float = 0.25,
+            **kwargs,
+    ):
+        self._move_mag = move_mag
+        self._grid_size = self._move_mag
+        # self.memory_frames = []
+
+        if scene_name is None:
+            scene_name = self.controller.last_event.metadata["sceneName"]
+
+        if self.object_open_speed != 1.0:
+            self.controller.step(
+                {"action": "ChangeOpenSpeed", "x": self.object_open_speed}
+            )
+
+        self._initially_reachable_points = None
+        self._initially_reachable_points_set = None
+        self.controller.step({"action": "GetReachablePositions"})
+        if not self.controller.last_event.metadata["lastActionSuccess"]:
+            get_logger().warning(
+                "Error when getting reachable points: {}".format(
+                    self.controller.last_event.metadata["errorMessage"]
+                )
+            )
+        self._initially_reachable_points = self.last_action_return
+
+        self.list_of_actions_so_far = []
\ No newline at end of file
diff --git a/utils/procthor_utils/procthor_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py
similarity index 64%
rename from utils/procthor_utils/procthor_object_nav_tasks.py
rename to utils/stretch_utils/stretch_object_nav_tasks.py
index e2e334b..13d9ef0 100644
--- a/utils/procthor_utils/procthor_object_nav_tasks.py
+++ b/utils/stretch_utils/stretch_object_nav_tasks.py
@@ -1,10 +1,17 @@
+from ast import For
+from dis import dis
+from email.encoders import encode_noop
 import random
-from collections import Counter
 from typing import Any, Dict, List, Optional, Tuple, Union
 from typing_extensions import Literal
 
+import signal
+import time
+
 import gym
+from importlib_metadata import Lookup
 import numpy as np
+from sklearn.metrics import recall_score
 import torch
 from allenact.base_abstractions.misc import RLStepResult
 from allenact.base_abstractions.sensor import Sensor
@@ -25,9 +32,12 @@
     ROTATE_LEFT_SMALL
 )
 from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment
-# from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment
 
 from manipulathor_utils.debugger_util import ForkedPdb
+from datetime import datetime
+
+def handler(signum, frame):
+    raise Exception("Controller call took too long")
 
 class ObjectNavTask(Task[ManipulaTHOREnvironment]):
     _actions = (
@@ -86,6 +96,8 @@ def __init__(
             self.dist_to_target_func = self.min_geo_distance_to_target
         elif distance_type == "l2":
             self.dist_to_target_func = self.min_l2_distance_to_target
+        elif distance_type == "real_world":
+            self.dist_to_target_func = self.dummy_distance_to_target # maybe placeholder here for estimation later
         else:
             raise NotImplementedError
 
@@ -123,7 +135,7 @@ def min_l2_distance_to_target(self) -> float:
         if min_dist == float("inf"):
             get_logger().error(
                 f"No target object {self.task_info['object_type']} found"
-                f" in house {self.task_info['house_name']}."
+                f" in house {self.task_info['scene_name']}."
             )
             return -1.0
         return min_dist
@@ -139,7 +151,7 @@ def min_geo_distance_to_target(self) -> float:
                 env=self.env,
                 distance_cache=self.distance_cache,
                 object_id=object_id,
-                house_name=self.task_info["house_name"],
+                house_name=self.task_info["scene_name"],
             )
             if (min_dist is None and geo_dist >= 0) or (
                 geo_dist >= 0 and geo_dist < min_dist
@@ -149,6 +161,8 @@ def min_geo_distance_to_target(self) -> float:
             return -1.0
         return min_dist
     
+    def dummy_distance_to_target(self) -> float:
+        return float("inf")
     
     def start_visualize(self):
         for visualizer in self.visualizers:
@@ -213,7 +227,40 @@ def _is_goal_in_range(self) -> bool:
             obj
             for obj in self.env.last_event.metadata["objects"]
             if obj["visible"] and obj["objectType"] == self.task_info["object_type"]
+            and self.dist_to_target_func() < self.task_info['success_distance'] 
         )
+    
+    def calc_action_stat_metrics(self) -> Dict[str, Any]:
+        action_stat = {
+            "metric/action_stat/" + action_str: 0.0 for action_str in self._actions
+        }
+        action_success_stat = {
+            "metric/action_success/" + action_str: 0.0 for action_str in self._actions
+        }
+        action_success_stat["metric/action_success/total"] = 0.0
+
+        seq_len = len(self.task_info["taken_actions"])
+
+        for (action_name, action_success) in list(zip(
+                            self.task_info["taken_actions"],
+                            self.task_info["action_successes"])):
+            action_stat["metric/action_stat/" + action_name] += 1.0
+
+            action_success_stat[
+                "metric/action_success/{}".format(action_name)
+            ] += action_success
+            action_success_stat["metric/action_success/total"] += action_success
+
+        action_success_stat["metric/action_success/total"] /= seq_len
+
+        for action_name in self._actions:
+            action_success_stat[
+                "metric/" + "action_success/{}".format(action_name)
+            ] /= (action_stat["metric/action_stat/" + action_name] + 0.000001)
+            action_stat["metric/action_stat/" + action_name] /= seq_len
+
+        result = {**action_stat, **action_success_stat}
+        return result
 
     def shaping(self) -> float:
         if self.reward_config['shaping_weight'] == 0.0:
@@ -235,7 +282,7 @@ def shaping(self) -> float:
                 max_reward_mag = position_dist(p0, p1, ignore_y=True)
 
             if (
-                self.reward_config.positive_only_reward
+                self.reward_config['positive_only_reward']
                 and cur_distance is not None
                 and cur_distance > 0.5
             ):
@@ -251,7 +298,7 @@ def shaping(self) -> float:
                     min(reward, max_reward_mag),
                     -max_reward_mag,
                 )
-                * self.reward_config.shaping_weight
+                * self.reward_config['shaping_weight']
             )
     
     def get_observations(self, **kwargs) -> Any:
@@ -292,6 +339,7 @@ def metrics(self) -> Dict[str, Any]:
         if self.is_done():
             result={} # placeholder for future
             metrics = super().metrics()
+            metrics = {**metrics, **self.calc_action_stat_metrics()}
             metrics["dist_to_target"] = self.dist_to_target_func()
             metrics["total_reward"] = np.sum(self._rewards)
             metrics["spl"] = spl_metric(
@@ -327,8 +375,11 @@ def _step(self, action: int) -> RLStepResult:
             self.task_info["action_successes"].append(True)
         else:
             action_dict = {"action": action_str}
-            self.env.step(action_dict)
-            self.last_action_success = bool(self.env.last_event)
+            sr = self.env.step(action_dict)
+            # RH NOTE: this is an important change from procthor because of how the noise model 
+            # is implemented. the env step return corresponds to the nominal/called 
+            # action, which may or may not be the last thing the controller did.
+            self.last_action_success = bool(sr.metadata["lastActionSuccess"])
 
             position = self.env.last_event.metadata["agent"]["position"]
             self.path.append(position)
@@ -390,7 +441,85 @@ class StretchObjectNavTask(ObjectNavTask):
         DONE,
     )
 
-class ExploreWiseObjectNavTask(ObjectNavTask):
+class StretchNeckedObjectNavTask(ObjectNavTask):
+    _actions = (
+        MOVE_AHEAD,
+        # MOVE_BACK,
+        ROTATE_RIGHT,
+        ROTATE_LEFT,
+        "LookUp",
+        "LookDown",
+        DONE,
+    )
+
+class StretchNeckedObjectNavTaskUpdateOrder(ObjectNavTask):
+    # for evaluating weights from Matt
+    _actions = (
+        MOVE_AHEAD,
+        ROTATE_LEFT,
+        ROTATE_RIGHT,
+        DONE,
+        "LookUp",
+        "LookDown"
+    )
+
+class StretchObjectNavTaskSegmentationSuccess(StretchObjectNavTask):
+    def _is_goal_in_range(self) -> bool:
+        all_kinect_masks = self.env.controller.last_event.third_party_instance_masks[0]
+        for object_id in self.task_info["target_object_ids"]:
+            if object_id in all_kinect_masks and self.dist_to_target_func() < self.task_info['success_distance']:
+                return True
+        
+        return False
+
+class StretchObjectNavTaskSegmentationSuccessActionFail(StretchObjectNavTaskSegmentationSuccess):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.recent_three_strikes = 0
+    
+    def _step(self, action: int) -> RLStepResult:
+        sr = super()._step(action)
+
+        if self.last_action_success or self._took_end_action:
+            self.recent_three_strikes = 0
+            return sr
+        elif self.recent_three_strikes < 2:
+            self.recent_three_strikes += 1
+            return sr
+        else:
+            # print('Task ended for repeated action failure')
+            # ForkedPdb().set_trace()
+            self._took_end_action = True
+            step_result = RLStepResult(
+                observation=sr.observation,
+                reward=sr.reward - self.reward_config['got_stuck_penalty'],
+                done=self.is_done(),
+                info={"last_action_success": self.last_action_success, "action": action},
+            )
+            return step_result
+    
+    def judge(self) -> float:
+        """Judge the last event."""
+        reward = self.reward_config['step_penalty']
+        if not self.last_action_success:
+            reward += self.reward_config['failed_action_penalty']
+
+        reward += self.shaping()
+
+        if self._took_end_action:
+            if self._success:
+                reward += self.reward_config['goal_success_reward']
+            else:
+                reward += self.reward_config['failed_stop_reward']
+        elif self.num_steps_taken() + 1 >= self.max_steps:
+            reward += self.reward_config['reached_horizon_reward']
+
+        self._rewards.append(float(reward))
+        return float(reward)
+
+
+
+class ExploreWiseObjectNavTask(StretchObjectNavTaskSegmentationSuccessActionFail):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
         all_locations = [[k['x'], k['y'], k['z']] for k in (self.env.get_reachable_positions())]
@@ -400,7 +529,13 @@ def __init__(self, **kwargs):
 
     def judge(self) -> float:
         """Compute the reward after having taken a step."""
-        reward = self.reward_configs["step_penalty"]
+        reward = self.reward_config["step_penalty"]
+
+        # additional scaling step penalty, thresholds at half max steps at the failed action penalty
+        reward += -0.2 * (1.1)**(np.min([-(self.max_steps/2)+self.num_steps_taken(),0])) 
+
+        if not self.last_action_success:
+            reward += self.reward_config['failed_action_penalty']
 
         current_agent_location = self.env.get_agent_location()
         current_agent_location = torch.Tensor([current_agent_location['x'], current_agent_location['y'], current_agent_location['z']])
@@ -414,8 +549,8 @@ def judge(self) -> float:
         self.has_visited[location_index] = 1
 
         if visited_new_place:
-            reward += self.reward_configs["exploration_reward"]
-        
+            reward += self.reward_config["exploration_reward"]
+                
         reward += self.shaping()
 
         if self._took_end_action:
@@ -427,9 +562,77 @@ def judge(self) -> float:
             reward += self.reward_config['reached_horizon_reward']
         self._rewards.append(float(reward))
         return float(reward)
+    
+    def metrics(self) -> Dict[str, Any]:
+        result = super(ExploreWiseObjectNavTask, self).metrics()
+
+        if self.is_done():
+            result['percent_room_visited'] = self.has_visited.mean().item()
+            result['new_locations_visited'] = self.has_visited.sum().item()
+        return result
         
 
+class RealStretchObjectNavTask(StretchObjectNavTask):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.start_time = datetime.now()
+        self.last_time = None
+        signal.signal(signal.SIGALRM, handler)
+    
+    def _step(self, action: int) -> RLStepResult:
 
+        action_str = self.class_action_names()[action]
+        print('Model Said', action_str, ' as action ', str(self.num_steps_taken()))
+
+        self.manual_action = False
+
+        # add/remove/adjust to allow graceful exit from auto-battlebots
+        end_ep_early = False
+        if self.num_steps_taken() % 10 == 0:
+            print('verify continue? set end_ep_early=True to gracefully fail out')
+            ForkedPdb().set_trace()
+
+        self._last_action_str = action_str
+        action_dict = {"action": action_str}
+        signal.alarm(8) # second timeout - catch missed server connection. THIS IS NOT THREAD SAFE
+        try:
+            self.env.step(action_dict)
+        except:
+            print('Controller call took too long, continue to try again or set end_ep_early to fail out instead')
+            ForkedPdb().set_trace()
+            self.env.step(action_dict)
+        
+        signal.alarm(0)
+
+        if action_str == "Done" or end_ep_early:
+            self._took_end_action = True
+            dt_total = (datetime.now() - self.start_time).total_seconds()/60
+            print('I think I found a ', self.task_info['object_type'], ' after ', str(dt_total), ' minutes.' )
+            print('Was I correct? Set self._success in trace. Default false.')            
+            ForkedPdb().set_trace()
+
+        if self.last_time is not None:
+            dt = (datetime.now() - self.last_time).total_seconds()
+            print('FPS: ', str(1/dt))
+        self.last_time = datetime.now()
+
+        self.last_action_success = self.env.last_action_success
+
+        last_action_name = self._last_action_str
+        self.visualize(last_action_name)
+
+        step_result = RLStepResult(
+            observation=self.get_observations(),
+            reward=self.judge(),
+            done=self.is_done(),
+            info={"last_action_success": self.last_action_success},
+        )
+        return step_result
+    
+    def judge(self) -> float:
+        """Compute the reward after having taken a step."""
+        reward = 0
+        return reward
 
 
 
diff --git a/utils/stretch_utils/stretch_thor_sensors.py b/utils/stretch_utils/stretch_thor_sensors.py
index 4781bbe..f3191b1 100644
--- a/utils/stretch_utils/stretch_thor_sensors.py
+++ b/utils/stretch_utils/stretch_thor_sensors.py
@@ -25,12 +25,13 @@
 # from ithor_arm.ithor_arm_environment import StretchManipulaTHOREnvironment
 # from ithor_arm.ithor_arm_sensors import DepthSensorThor
 # from ithor_arm.near_deadline_sensors import calc_world_coordinates
-from utils.calculation_utils import calc_world_coordinates
+from utils.calculation_utils import calc_world_coordinates, calc_world_xyz_from_agent_relative, get_mid_point_of_object_from_depth_and_mask
 
 from manipulathor_utils.debugger_util import ForkedPdb
 from utils.noise_in_motion_util import squeeze_bool_mask
 from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment
 from utils.stretch_utils.stretch_sim2real_utils import kinect_reshape, intel_reshape
+from scripts.stretch_jupyter_helper import get_relative_stretch_current_arm_state
 
 
 
@@ -428,6 +429,227 @@ def average_so_far(self, camera_xyz, camera_rotation, arm_state, current_step_nu
 
             return agent_centric_middle_of_object
 
+
+
+class AgentBodyPointNavEmulSensorDeadReckoning(Sensor):
+
+    def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "point_nav_emul", **kwargs: Any):
+        observation_space = gym.spaces.Box(
+            low=0, high=1, shape=(1,), dtype=np.float32
+        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
+        self.type = type
+        self.mask_sensor = mask_sensor
+        self.depth_sensor = depth_sensor
+        uuid = '{}_{}'.format(uuid, type)
+
+        self.min_xyz = np.zeros((3))
+        self.dummy_answer = torch.zeros(3)
+        self.dummy_answer[:] = 4 # is this good enough?
+        self.device = torch.device("cpu")
+
+        super().__init__(**prepare_locals_for_super(locals()))
+
+
+    def get_relative_nominal_locations(self,env):
+        if len(env.controller.last_event.metadata['thirdPartyCameras']) != 1:
+            print('Warning multiple cameras')
+
+        true_base = env.get_agent_location()
+        true_base = copy.deepcopy(dict(position=dict(x=true_base['x'], y=true_base['y'],z=true_base['z']), 
+                                          rotation=dict(x=0,y=true_base['rotation'], z=0)))
+
+        nominal_base = env.nominal_agent_location
+        nominal_base_xyz = np.array([nominal_base['x'],nominal_base['y'],nominal_base['z']])
+        nominal_base = copy.deepcopy(dict(position=dict(x=nominal_base['x'], y=nominal_base['y'],z=nominal_base['z']), 
+                                          rotation=dict(x=0,y=nominal_base['rotation'], z=0)))
+
+        m = copy.deepcopy(env.controller.last_event.metadata)
+
+        wrist = env.get_absolute_hand_state()
+        relative_wrist = convert_world_to_agent_coordinate(wrist,true_base)
+
+        camera1 = m["cameraPosition"]
+        camera1 = dict(position=dict(x=camera1['x'], y=camera1['y'],z=camera1['z']), rotation=dict(x=0,y=0, z=0))
+        relative_camera_1 = convert_world_to_agent_coordinate(camera1,true_base)
+
+        camera2 = m['thirdPartyCameras'][0]
+        relative_camera_2 = convert_world_to_agent_coordinate(camera2,true_base)
+
+        relative_list = [relative_wrist,relative_camera_1,relative_camera_2]
+        nominal_list = calc_world_xyz_from_agent_relative(relative_list,nominal_base_xyz,nominal_base['rotation']['y'],self.device)
+
+        return dict(agent=nominal_base, wrist=nominal_list[0],camera1=nominal_list[1],camera2=nominal_list[2])
+
+    def get_agent_belief_state(self,env):
+        fov = env.controller.last_event.metadata['fov'] # assumption: real one is fine here
+        real_agent_state = env.get_agent_location()
+        nominal_positions = self.get_relative_nominal_locations(env)
+
+        belief_camera_horizon = env.controller.last_event.metadata["agent"]["cameraHorizon"] # assumption: real one is fine here
+        belief_camera_xyz = np.array([nominal_positions['camera1']['position'][k] for k in ['x','y','z']])
+        belief_camera_rotation = (nominal_positions['agent']['rotation']['y']) % 360
+
+        real_camera_xyz = np.array([real_agent_state[k] for k in ['x','y','z']])
+
+        self.belief_prev_location.append(belief_camera_xyz)
+        self.real_prev_location.append(real_camera_xyz)
+        arm_state = nominal_positions['wrist']# get_relative_stretch_current_arm_state(env.controller)
+        
+        return fov, belief_camera_horizon, belief_camera_xyz, belief_camera_rotation, arm_state
+
+    def get_observation(
+            self, env: StretchManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
+    ) -> Any:
+
+        mask = self.mask_sensor.get_observation(env, task, *args, **kwargs)
+        depth_frame = self.depth_sensor.get_observation(env, task, *args, **kwargs)
+        if task.num_steps_taken() == 0:
+            self.pointnav_history_aggr = []
+            self.real_prev_location = []
+            self.belief_prev_location = []
+
+        fov, camera_horizon, camera_xyz, camera_rotation, arm_state = self.get_agent_belief_state(env)
+
+        if mask.sum() != 0:
+            midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device)
+            self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken()))
+
+        return self.history_aggregation(camera_xyz, camera_rotation, task.num_steps_taken())
+    
+    def history_aggregation(self, camera_xyz, camera_rotation, current_step_number):
+        if len(self.pointnav_history_aggr) == 0:
+            return self.dummy_answer
+        else:
+            weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
+            total_weights = sum(weights)
+            total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
+            total_sum = sum(total_sum)
+            midpoint = total_sum / total_weights
+            agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0))
+            midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0))
+            midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state)
+            midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']])
+
+            return midpoint_agent_coord.cpu()
+
+
+class ArmPointNavEmulSensorDeadReckoning(Sensor):
+
+    def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "arm_point_nav_emul", **kwargs: Any):
+        observation_space = gym.spaces.Box(
+            low=0, high=1, shape=(1,), dtype=np.float32
+        )  # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
+        self.type = type
+        self.mask_sensor = mask_sensor
+        self.depth_sensor = depth_sensor
+        uuid = '{}_{}'.format(uuid, type)
+
+        self.min_xyz = np.zeros((3))
+
+        self.dummy_answer = torch.zeros(3)
+        self.dummy_answer[:] = 4 # is this good enough?
+        self.device = torch.device("cpu")
+        super().__init__(**prepare_locals_for_super(locals()))
+    
+    def get_relative_nominal_locations(self,env):
+        if len(env.controller.last_event.metadata['thirdPartyCameras']) != 1:
+            print('Warning multiple cameras')
+
+        true_base = env.get_agent_location()
+        true_base = copy.deepcopy(dict(position=dict(x=true_base['x'], y=true_base['y'],z=true_base['z']), 
+                                          rotation=dict(x=0,y=true_base['rotation'], z=0)))
+
+        nominal_base = env.nominal_agent_location
+        nominal_base_xyz = np.array([nominal_base['x'],nominal_base['y'],nominal_base['z']])
+        nominal_base = copy.deepcopy(dict(position=dict(x=nominal_base['x'], y=nominal_base['y'],z=nominal_base['z']), 
+                                          rotation=dict(x=0,y=nominal_base['rotation'], z=0)))
+
+        m = copy.deepcopy(env.controller.last_event.metadata)
+
+        wrist = env.get_absolute_hand_state()
+        relative_wrist = convert_world_to_agent_coordinate(wrist,true_base)
+
+        camera1 = m["cameraPosition"]
+        camera1 = dict(position=dict(x=camera1['x'], y=camera1['y'],z=camera1['z']), rotation=dict(x=0,y=0, z=0))
+        relative_camera_1 = convert_world_to_agent_coordinate(camera1,true_base)
+
+        camera2 = m['thirdPartyCameras'][0]
+        relative_camera_2 = convert_world_to_agent_coordinate(camera2,true_base)
+
+        relative_list = [relative_wrist,relative_camera_1,relative_camera_2]
+        nominal_list = calc_world_xyz_from_agent_relative(relative_list,nominal_base_xyz,nominal_base['rotation']['y'],self.device)
+
+        return dict(agent=nominal_base, wrist=nominal_list[0],camera1=nominal_list[1],camera2=nominal_list[2])
+
+    def get_agent_belief_state(self,env):
+        real_agent_state = env.get_agent_location()
+        nominal_positions = self.get_relative_nominal_locations(env)
+
+
+        belief_camera_xyz = np.array([nominal_positions['camera2']['position'][k] for k in ['x','y','z']])
+        belief_camera_rotation = (nominal_positions['agent']['rotation']['y'] + 90) % 360
+
+        real_camera_xyz = np.array([real_agent_state[k] for k in ['x','y','z']])
+
+        metadata = copy.deepcopy(env.controller.last_event.metadata['thirdPartyCameras'][0])
+        belief_camera_horizon = metadata['rotation']['x'] # assumption: real one is fine here
+        fov = metadata['fieldOfView'] # assumption: real one is fine here
+        assert abs(metadata['rotation']['z'] - 0) < 0.1 #TODO: why
+
+        self.belief_prev_location.append(belief_camera_xyz)
+        self.real_prev_location.append(real_camera_xyz)
+        arm_state = nominal_positions['wrist']# get_relative_stretch_current_arm_state(env.controller)
+        
+        return fov, belief_camera_horizon, belief_camera_xyz, belief_camera_rotation, arm_state
+
+    def get_observation(
+            self, env: StretchManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any
+    ) -> Any:
+
+        mask = self.mask_sensor.get_observation(env, task, *args, **kwargs)
+        depth_frame = self.depth_sensor.get_observation(env, task, *args, **kwargs)
+
+        # catch rare NaN error where tiny mask is lost in 0 missing values
+        squeeze_mask = squeeze_bool_mask(mask)
+        depth_frame_masked = depth_frame.copy()
+        depth_frame_masked[~squeeze_mask] = -1
+        depth_frame_masked[depth_frame_masked == 0] = -1 
+        
+        if task.num_steps_taken() == 0:
+            self.pointnav_history_aggr = []
+            self.real_prev_location = []
+            self.belief_prev_location = []
+
+        fov, camera_horizon, camera_xyz, camera_rotation, arm_state = self.get_agent_belief_state(env)
+
+        if depth_frame_masked[depth_frame_masked>0].sum() != 0:
+            midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device)
+            self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken()))
+
+        return self.history_aggregation(camera_xyz, camera_rotation, arm_state, task.num_steps_taken())
+    
+    def history_aggregation(self, camera_xyz, camera_rotation, arm_world_coord, current_step_number):
+        if len(self.pointnav_history_aggr) == 0:
+            return self.dummy_answer
+        else:
+            weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
+            total_weights = sum(weights)
+            total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr]
+            total_sum = sum(total_sum)
+            midpoint = total_sum / total_weights
+            agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0))
+            midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0))
+            midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state)
+            midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']])
+
+            arm_agent_coord = convert_world_to_agent_coordinate(arm_world_coord, agent_state)
+            arm_state_agent_coord = torch.Tensor([arm_agent_coord['position'][k] for k in ['x','y','z']])
+
+            distance_in_agent_coord = midpoint_agent_coord - arm_state_agent_coord
+
+            return distance_in_agent_coord.cpu()
+
+
 # TODO we have to rewrite the noisy movement experiment ones?
 # class AgentGTLocationSensor(Sensor):
 #
diff --git a/utils/stretch_utils/stretch_visualizer.py b/utils/stretch_utils/stretch_visualizer.py
index d87969d..2ab6ea6 100644
--- a/utils/stretch_utils/stretch_visualizer.py
+++ b/utils/stretch_utils/stretch_visualizer.py
@@ -183,25 +183,18 @@ def finish_episode(self, environment, episode_info, task_info):
         source_object_id = task_info["target_object_ids"][0]
         episode_success = episode_info._success
 
-        # Put back if you want the images
-        # for i, img in enumerate(self.log_queue):
-        #     image_dir = os.path.join(self.log_dir, time_to_write + '_seq{}.png'.format(str(i)))
-        #     cv2.imwrite(image_dir, img[:,:,[2,1,0]])
-
         episode_success_offset = "succ" if episode_success else "fail"
 
         source_obj_type =  task_info['object_type']
 
         if source_obj_type == 'small':
             source_obj_type = task_info['object_type']
-            # goal_obj_type = task_info['goal_object_type']
 
         room_name = task_info['scene_name']
         gif_name = (
                 f"{time_to_write}_room_{room_name}_to_{source_obj_type}_episode_{episode_success_offset}.gif"
         )
-        # ForkedPdb().set_trace()
-        self.log_queue = put_action_on_image(self.log_queue, self.action_queue[1:])
+        # self.log_queue = put_action_on_image(self.log_queue, self.action_queue[1:])
         addition_texts = ['xxx'] + [str(x) for x in episode_info.agent_body_dist_to_obj]
         if not addition_texts == ['xxx']:
             self.log_queue = put_additional_text_on_image(self.log_queue, addition_texts)
@@ -209,18 +202,12 @@ def finish_episode(self, environment, episode_info, task_info):
         concat_all_images = np.expand_dims(np.stack(self.log_queue, axis=0), axis=1)
         save_image_list_to_gif(concat_all_images, gif_name, self.log_dir)
         this_controller = environment.controller
-        scene = this_controller.last_event.metadata[
-            "sceneName"
-        ]
-        reset_environment_and_additional_commands(this_controller, scene)
-
-        additional_observation_start = []
-        additional_observation_goal = []
-        if 'target_object_mask' in episode_info.get_observations():
-            additional_observation_start.append('target_object_mask')
-        if 'target_location_mask' in episode_info.get_observations():
-            additional_observation_goal.append('target_location_mask')
+        if room_name == 'RealRobothor':
+            scene = None # reset doesn't do anything
+        else:
+            scene = this_controller.last_event.metadata["sceneName"]
 
+        # reset_environment_and_additional_commands(this_controller, scene) # RH: why is there a reset in the visualizer?
 
         self.log_queue = []
         self.action_queue = []
@@ -229,22 +216,21 @@ def finish_episode(self, environment, episode_info, task_info):
 
     def log(self, environment, action_str, obs):
         image_intel = environment.intel_frame
-        depth_intel = environment.intel_depth
         try:
-            image_kinect = environment.kinect_frame
-            depth_kinect = environment.kinect_depth
-            combined_frame = np.concatenate([image_intel, image_kinect, depth_to_rgb(depth_intel), depth_to_rgb(depth_kinect)],axis=1)
-        except:
-            if depth_intel is None or depth_kinect is None:
-                combined_frame = np.concatenate([image_intel, image_kinect],axis=1)
-            else:
+            depth_intel = environment.intel_depth
+            try:
+                image_kinect = environment.kinect_frame
+                depth_kinect = environment.kinect_depth
+                combined_frame = np.concatenate([image_intel, image_kinect, depth_to_rgb(depth_intel), depth_to_rgb(depth_kinect)],axis=1)
+            except:
                 combined_frame = np.concatenate([image_intel, depth_to_rgb(depth_intel)],axis=1)
+        except:
+            combined_frame = np.concatenate([image_intel],axis=1)
 
         # image_intel = intel_reshape(image_intel)
         # kinect_frame = kinect_reshape(kinect_frame)
         
         self.action_queue.append(action_str)
-        # combined_frame = np.concatenate([image_intel, image_kinect, depth_to_rgb(depth_intel), depth_to_rgb(depth_kinect)],axis=1)
         for o in obs:
             if ("rgb" in o) and isinstance(obs[o], np.ndarray):
                 viz_obs = unnormalize_clip_image(obs[o])*255