In [1]:
import sys
import cv2
import h5py
import json
import numpy as np
import random
import skimage.io
from skimage.transform import resize

ACTION_SIZE = 4
SCREEN_WIDTH = 84
SCREEN_HEIGHT = 84
HISTORY_LENGTH = 4

class THORDiscreteEnvironment(object):

  def __init__(self, config=dict()):

    # configurations
    self.scene_name          = config.get('scene_name', 'bathroom_03')
    self.random_start        = config.get('random_start', True)
    self.n_feat_per_locaiton = config.get('n_feat_per_locaiton', 1) # 1 for no sampling
    self.terminal_state_id   = config.get('terminal_state_id', 0)

    self.h5_file_path = config.get('h5_file_path', 'data/%s.h5'%self.scene_name)
    self.h5_file      = h5py.File(self.h5_file_path, 'r')

    self.locations   = self.h5_file['location'][()]
    self.rotations   = self.h5_file['rotation'][()]
    self.n_locations = self.locations.shape[0]

    self.terminals = np.zeros(self.n_locations)
    self.terminals[self.terminal_state_id] = 1
    self.terminal_states, = np.where(self.terminals)

    self.transition_graph = self.h5_file['graph'][()]
    self.shortest_path_distances = self.h5_file['shortest_path_distance'][()]

    self.history_length = HISTORY_LENGTH
    self.screen_height  = SCREEN_HEIGHT
    self.screen_width   = SCREEN_WIDTH

    # we use pre-computed fc7 features from ResNet-50
    # self.s_t = np.zeros([self.screen_height, self.screen_width, self.history_length])
    self.s_t      = np.zeros([2048, self.history_length])
    self.s_t1     = np.zeros_like(self.s_t)
    self.s_target = self._tiled_state(self.terminal_state_id)

    self.reset()

  # public methods

  def reset(self):
    # randomize initial state
    while True:
      k = random.randrange(self.n_locations)
      min_d = np.inf
      # check if target is reachable
      for t_state in self.terminal_states:
        dist = self.shortest_path_distances[k][t_state]
        min_d = min(min_d, dist)
      # min_d = 0  if k is a terminal state
      # min_d = -1 if no terminal state is reachable from k
      if min_d > 0: break

    # reset parameters
    self.current_state_id = k
    self.s_t = self._tiled_state(self.current_state_id)
    self.s_target = self._tiled_state(self.terminal_state_id)

    self.reward = 0
    self.collided = False
    self.terminal = False
    self.s_t_reshape = self.s_t.reshape(-1, 8192)
    self.s_target_reshape = self.s_target.reshape(-1, 8192)


    # return self.s_t_reshape, self.s_target_reshape
    return self.s_t_reshape, self.s_target_reshape, min_d

  def step(self, action):
    assert not self.terminal, 'step() called in terminal state'
    k = self.current_state_id
    if self.transition_graph[k][action] != -1:
      self.current_state_id = self.transition_graph[k][action]
      if self.terminals[self.current_state_id]:
        self.terminal = True
        self.collided = False
      else:
        self.terminal = False
        self.collided = False
    else:
      self.terminal = False
      self.collided = True

    self.reward = self._reward(self.terminal, self.collided)
    self.s_t1 = np.append(self.s_t[:,1:], self.state, axis=1)

    self.s_t1_reshape = self.s_t1.reshape(-1, 8192)
    return self.s_t1_reshape, self.reward, self.terminal, self.current_state_id, self.collided
    

  def update(self):
    self.s_t = self.s_t1

  # private methods

  def _tiled_state(self, state_id):
    k = random.randrange(self.n_feat_per_locaiton)
    f = self.h5_file['resnet_feature'][state_id][k][:,np.newaxis]
    return np.tile(f, (1, self.history_length))

  def _reward(self, terminal, collided):
    # positive reward upon task completion
    if terminal:
      return 10
    elif collided:
      return -0.1
    else:
      return -0.01
  # properties

  @property
  def action_size(self):
    # move forward/backward, turn left/right for navigation
    return ACTION_SIZE 

  @property
  def action_definitions(self):
    action_vocab = ["MoveForward", "RotateRight", "RotateLeft", "MoveBackward"]
    return action_vocab[:ACTION_SIZE]

  @property
  def observation(self):
    obs = self.h5_file['observation'][self.current_state_id]
    # return self.h5_file['observation'][self.current_state_id]
    return obs

  @property
  def state(self):
    # read from hdf5 cache
    k = random.randrange(self.n_feat_per_locaiton)
    return self.h5_file['resnet_feature'][self.current_state_id][k][:,np.newaxis]

  @property
  def target(self):
    return self.s_target

  @property
  def x(self):
    return self.locations[self.current_state_id][0]

  @property
  def z(self):
    return self.locations[self.current_state_id][1]

  @property
  def r(self):
    return self.rotations[self.current_state_id]

if __name__ == "__main__":

  scene_name = 'bathroom_02'
  env = THORDiscreteEnvironment({
    'random_start': True,
    'scene_name': scene_name,
    'h5_file_path': 'data/%s.h5'%scene_name
  })
  # obs = env.observation()
  # print(type(obs))


In [2]:
import pyglet

class SimpleImageViewer(object):

  def __init__(self, display=None):
    self.window = None
    self.isopen = False
    self.display = display

  def imshow(self, arr):
    if self.window is None:
      height, width, channels = arr.shape
      self.window = pyglet.window.Window(width=width, height=height, display=self.display, caption="THOR Browser")
      self.width = width
      self.height = height
      self.isopen = True

    assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape"
    image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3)
    self.window.clear()
    self.window.switch_to()
    self.window.dispatch_events()
    image.blit(0,0)
    self.window.flip()

  def close(self):
    if self.isopen:
      self.window.close()
      self.isopen = False

  def __del__(self):
    self.close()


In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function

import sys
import signal
import argparse
import numpy as np
from PIL import Image

from scene_loader import THORDiscreteEnvironment
# from utils.tools import SimpleImageViewer
from tools import SimpleImageViewer

#
# Navigate the scene using your keyboard
#

def key_press(key, mod):

  global human_agent_action, human_wants_restart, stop_requested
  if key == ord('R') or key == ord('r'): # r/R
    human_wants_restart = True
  if key == ord('Q') or key == ord('q'): # q/Q
    stop_requested = True
  if key == 0xFF52: # up
    human_agent_action = 0
  if key == 0xFF53: # right
    human_agent_action = 1
  if key == 0xFF51: # left
    human_agent_action = 2
  if key == 0xFF54: # down
    human_agent_action = 3

def rollout(env):

  global human_agent_action, human_wants_restart, stop_requested
  human_agent_action = None
  human_wants_restart = False
  while True:
    # waiting for keyboard input
    if human_agent_action is not None:
      # move actions
      env.step(human_agent_action)
      human_agent_action = None

    # waiting for reset command
    if human_wants_restart:
      # reset agent to random location
      env.reset()
      human_wants_restart = False

    # check collision
    if env.collided:
      print('Collision occurs.')
      env.collided = False

    # check quit command
    if stop_requested: break

    viewer.imshow(env.observation)

if __name__ == '__main__':

  parser = argparse.ArgumentParser()
  parser.add_argument("-s", "--scene_dump", type=str, default="./data/bedroom_04.h5",
                      help="path to a hdf5 scene dump file")
  args = parser.parse_args()

  print("Loading scene dump {}".format(args.scene_dump))
  env = THORDiscreteEnvironment({
    'h5_file_path': args.scene_dump
  })

  # manually disable terminal states
  env.terminals = np.zeros_like(env.terminals)
  env.terminal_states, = np.where(env.terminals)
  env.reset()

  human_agent_action = None
  human_wants_restart = False
  stop_requested = False

  img = Image.fromarray(env.observation, 'RGB')
  # img.save("10.jpg") 

  viewer = SimpleImageViewer()
  viewer.imshow(env.observation)
  viewer.window.on_key_press = key_press

  print("Use arrow keys to move the agent.")
  print("Press R to reset agent\'s location.")
  print("Press Q to quit.")

  rollout(env)

  print("Goodbye.")


In [6]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F


def normalized_columns_initializer(weights, std=1.0):
    out = torch.randn(weights.size())
    out *= std / torch.sqrt(out.pow(2).sum(1, keepdim=True))
    return out

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        weight_shape = list(m.weight.data.size())
        fan_in = np.prod(weight_shape[1:4])
        fan_out = np.prod(weight_shape[2:4]) * weight_shape[0]
        w_bound = np.sqrt(6. / (fan_in + fan_out))
        m.weight.data.uniform_(-w_bound, w_bound)
        m.bias.data.fill_(0)
    elif classname.find('Linear') != -1:
        weight_shape = list(m.weight.data.size())
        fan_in = weight_shape[1]
        fan_out = weight_shape[0]
        w_bound = np.sqrt(6. / (fan_in + fan_out))
        m.weight.data.uniform_(-w_bound, w_bound)
        m.bias.data.fill_(0)

class ActorCritic(torch.nn.Module):
    def __init__(self):
        super(ActorCritic, self).__init__()

        # FC and embedding
        self.fc1 = nn.Linear(8192, 512)
        self.embedding_layer = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 512)

        self.critic_linear = nn.Linear(512, 1)
        self.actor_linear = nn.Linear(512, 4)

        self.apply(weights_init)
        self.actor_linear.weight.data = normalized_columns_initializer(self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = normalized_columns_initializer(self.critic_linear.weight.data, 0.5)
        self.critic_linear.bias.data.fill_(0)

    def forward(self, state_input, target_input):
        # siamese
        state_input = state_input.reshape(state_input.size(0), -1)
        target_input = target_input.reshape(target_input.size(0), -1)

        h1_s = F.relu(self.fc1(state_input))
        h1_t = F.relu(self.fc1(target_input))

        input_embedding = torch.cat((h1_s, h1_t), -1)
        h2_e = F.relu(self.embedding_layer(input_embedding))

        h3 = F.relu(self.fc2(h2_e))

        value_output = self.critic_linear(h3)
        logit_output = self.actor_linear(h3)

        return value_output, logit_output


In [8]:
import numpy as np
import torch
from network import ActorCritic
from scene_loader import THORDiscreteEnvironment as Environment
import torch.nn.functional as F
scene_scope = 'bathroom_02'
task_scopes = ['26', '37', '43', '53', '69']

'''
    ls_scene_scope = ['bathroom_02', 'bedroom_04', 'kitchen_02', 'living_room_08']
    TASK_LIST = {
                    'bathroom_02'    : ['26', '37', '43', '53', '69'],
                    'bedroom_04'     : ['134', '264', '320', '384', '387'],
                    'kitchen_02'     : ['90', '136', '157', '207', '329'],
                    'living_room_08' : ['92', '135', '193', '228', '254']
    }
'''
lr = 0.0001
num_episodes = 10
ep_len = 100
gamma = 0.99
return_list = []
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = ActorCritic().to(device)
# checkpoint = torch.load('./model/%s.pth' % (scene_scope))
# model.load_state_dict(checkpoint['state_dict'])
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for episode in range(num_episodes):
    for task_scope in task_scopes:
        env = Environment({'scene_name': scene_scope, 'terminal_state_id': int(task_scope)})
        state, target, _ = env.reset()
        state = torch.tensor(state).to(device)
        target = torch.tensor(target).to(device)
        episode_length = 0
        done = False
        while not done:
            values =[]
            states = []
            targets = []
            actions = []
            probs = []
            rewards = []
            for step in range(ep_len):
                episode_length += 1
                states.append(state)
                targets.append(targets)
                value, logit = model(state, target)
                prob = F.softmax(logit, dim=-1)
                action = prob.multinomial(num_samples=1).detach()

                state, reward, done, current_state_id, collide = env.step(action)
                done = done or episode_length >= 3000
                reward = max(min(reward, 1), -1)

                state = torch.tensor(state).to(device)


                values.append(value)
                actions.append(action)
                probs.append(prob)
                rewards.append(reward)

                if done:
                    break

            R = 0.0
            if not done:
                R, _ = model(state, target)

            values.reverse()
            rewards.reverse()
            actions.reverse()
            states.reverse()
            probs.reverse()

            batch_si = []
            batch_a = []
            batch_td = []
            batch_R = []
            batch_t = []


            for (ai, ri, vi, si, ti) in zip(actions, rewards, values, states, targets):
                R = ri + gamma * R
                td = R - vi
                a = np.zeros(4)
                a[ai] = 1

                batch_a.append(a)
                batch_R.append(R)
                batch_td.append(td)
                batch_si.append(si)
                batch_t.append(ti)

            batch_a = torch.tensor(batch_a).to(device)
            batch_R = torch.tensor(batch_R).to(device)
            batch_td = torch.tensor(batch_td).to(device)

            pi = torch.cat(probs, dim=0)
            log_pi = torch.log(torch.clamp(pi, 1e-20, 1.0))
            entroy = -torch.sum(pi * log_pi, dim=1)
            policy_loss = -torch.sum(torch.sum(log_pi * batch_a, dim=1) * batch_td + 0.01 * entroy)
            value_loss = 0.5 * torch.sum(batch_td)**2

            optimizer.zero_grad()
            (policy_loss + 0.5 * value_loss).backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), 50)
            optimizer.step()

        print('episode:', episode)
        print('epi_length:', episode_length)

# checkpoint = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
# torch.save(checkpoint, './model/%s.pth' % (scene_scope))

  torch.nn.utils.clip_grad_norm(model.parameters(), 50)


episode: 0
epi_length: 1113
episode: 0
epi_length: 202
episode: 0
epi_length: 1482
episode: 0
epi_length: 210
episode: 0
epi_length: 722
episode: 1
epi_length: 296
episode: 1
epi_length: 1105
episode: 1
epi_length: 236
episode: 1
epi_length: 200
episode: 1
epi_length: 33
episode: 2
epi_length: 947
episode: 2
epi_length: 44
episode: 2
epi_length: 697
episode: 2
epi_length: 110
episode: 2
epi_length: 342
episode: 3
epi_length: 629
episode: 3
epi_length: 250
episode: 3
epi_length: 876
episode: 3
epi_length: 150
episode: 3
epi_length: 800
episode: 4
epi_length: 832
episode: 4
epi_length: 136
episode: 4
epi_length: 331
episode: 4
epi_length: 8
episode: 4
epi_length: 384
episode: 5
epi_length: 75
episode: 5
epi_length: 923
episode: 5
epi_length: 112
episode: 5
epi_length: 191
episode: 5
epi_length: 188
episode: 6
epi_length: 699
episode: 6
epi_length: 1131
episode: 6
epi_length: 26
episode: 6
epi_length: 3000
episode: 6
epi_length: 356
episode: 7
epi_length: 1899
episode: 7
epi_length: 220
e

In [9]:
import torch
from network import ActorCritic
from scene_loader import THORDiscreteEnvironment as Environment
import torch.nn.functional as F
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
scope = 'bedroom_04'

scene_scope = 'bedroom_04'
task_scopes = ['134', '264', '320', '384', '387']

'''
    ls_scene_scope = ['bathroom_02', 'bedroom_04', 'kitchen_02', 'living_room_08']
    TASK_LIST = {
                    'bathroom_02'    : ['26', '37', '43', '53', '69'],
                    'bedroom_04'     : ['134', '264', '320', '384', '387'],
                    'kitchen_02'     : ['90', '136', '157', '207', '329'],
                    'living_room_08' : ['92', '135', '190', '228', '254']
    }
'''
model = ActorCritic().to(device)
checkpoint = torch.load('./model/%s.pth' % (scope))
model.load_state_dict(checkpoint['state_dict'])
model.eval()

NUM_EVAL_EPISODES = 10
MAX_STEP = 500
ep_lengths = []
min_length = []

for i in range(NUM_EVAL_EPISODES):
    for task_scope in task_scopes:
        env = Environment({'scene_name': scene_scope, 'terminal_state_id': int(task_scope)})
        state, target, min_dist = env.reset()
        state = torch.tensor(state).to(device)
        target = torch.tensor(target).to(device)
        min_length.append(min_dist)
        episode_length = 0
        for step in range(MAX_STEP):
            episode_length += 1
            with torch.no_grad():
                value, logit = model(state, target)
            prob = F.softmax(logit, dim=-1)
            action = prob.multinomial(num_samples=1).detach()
            state, reward, done, current_state_id, collide = env.step(action[0, 0])
            #print('current_state_id:', current_state_id)
            state = torch.tensor(state).to(device)
            if done:
                break
        ep_lengths.append(episode_length)


num_fail = 0
for jj in range(NUM_EVAL_EPISODES*5):
    if ep_lengths[jj] == 500:
        num_fail = num_fail + 1
SR = 1 - num_fail / NUM_EVAL_EPISODES/5
print('SR:', SR * 100)



SPL = 0
for ii in range(NUM_EVAL_EPISODES * 5):
    if ep_lengths[ii] != 500:
        SPL = SPL + min_length[ii] / ep_lengths[ii]
SPL = SPL / NUM_EVAL_EPISODES * 100/5
print('SPL:', SPL)



SR: 100.0
SPL: 68.5827070611095
