In [1]:
import pickle
import tensorflow as tf
import numpy as np
import tf_util
import gym
import load_policy
import tempfile
import time

In [2]:
def expert(expert_policy_file = "experts/Humanoid-v1.pkl", 
           envname = "Humanoid-v1", render = True, max_timesteps = None, num_rollouts = 20, verbose = False):
    if verbose:
        print('loading and building expert policy')
    policy_fn = load_policy.load_policy( expert_policy_file)
    if verbose:
        print('loaded and built')

    with tf.Session():
        tf_util.initialize()

        import gym
        env = gym.make( envname)
        max_steps =  max_timesteps or env.spec.timestep_limit

        returns = []
        observations = []
        actions = []
        for i in range(num_rollouts):
            if verbose:
                print('-----> iter', i)
            obs = env.reset()
            done = False
            totalr = 0.
            steps = 0
            while not done:
                action = policy_fn(obs[None,:])
                observations.append(obs)
                actions.append(action)
                obs, r, done, _ = env.step(action)
                totalr += r
                steps += 1
                if render:
                    env.render()
                if verbose:
                    if steps % 100 == 0: print("%i/%i"%(steps, max_steps))
                if steps >= max_steps:
                    break
            returns.append(totalr)

        if verbose:
            print('returns', returns)
            print('mean return', np.mean(returns))
            print('std of return', np.std(returns))

        expert_data = {'observations': np.array(observations),
                       'actions': np.array(actions)}
        return expert_data

In [3]:
start = time.time()
expert_data = expert(expert_policy_file = "experts/Humanoid-v1.pkl", 
           envname = "Humanoid-v1", render = False, max_timesteps = None, num_rollouts = 100, verbose = True)
end = time.time()

print("time taken = ", (end-start),"s")

loading and building expert policy
obs (1, 376) (1, 376)
loaded and built
Instructions for updating:
Please use tf.global_variables instead.


[2017-10-22 23:50:30,888] From /media/afakharany93/Common/Online_courses/CS294-berkeleydeeprlcourse/homework/hw1/tf_util.py:91: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.


Instructions for updating:
Use `tf.variables_initializer` instead.


[2017-10-22 23:50:30,909] From /home/afakharany93/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/tf_should_use.py:175: initialize_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Use `tf.variables_initializer` instead.
[2017-10-22 23:50:30,918] Making new env: Humanoid-v1


-----> iter 0
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 1
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 2
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 3
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 4
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 5
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 6
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 7
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 8
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 9
100/1000
200/1000
300/1000
400/1000
500/1

500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 78
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 79
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 80
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 81
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 82
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 83
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 84
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 85
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1000/1000
-----> iter 86
100/1000
200/1000
300/1000
400/1000
500/1000
600/1000
700/1000
800/1000
900/1000
1

In [4]:
obs_shape = expert_data["observations"].shape
action_shape = expert_data["actions"].shape
print(obs_shape)
print(action_shape)

(100000, 376)
(100000, 1, 17)


In [18]:
expert_data["actions"][0]

array([[ 2.03282475,  0.00647837, -0.3805618 , -0.3180261 ,  0.0308816 ,
         2.72997665,  0.25069666, -0.22025736,  0.32001257,  2.45338678,
         1.06576061,  0.59935224, -1.90577698,  0.57928103, -0.61752319,
        -0.03438504,  0.57197738]], dtype=float32)