In [11]:
# Some boilerplate setup
import sys
sys.path.append("../")

import time
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.contrib.learn import infer_real_valued_columns_from_input
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from helpers import EarlyStoppingCallback

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

EPS = 1e-8 # epsilon constant for numeric stability

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# Available agents/environments:
#  Ant-v1.pkl
#  HalfCheetah-v1.pkl
#  Hopper-v1.pkl
#  Humanoid-v1.pkl
#  Reacher-v1.pkl
#  Walker2d-v1.pkl

ENV_NAME = "Ant-v1"

## Expert data loading and preprocessing

In [3]:
# Load the expert data from pickle saved in 'homework1/expert_data'
expert_data = None
with open("../expert_data/{}.pkl".format(ENV_NAME), "rb") as f:
    expert_data = pickle.load(f)

observations, actions = expert_data["observations"], expert_data["actions"]
actions = np.squeeze(actions)

# As a sanity check, print out the size of the training and test data.
print('observations shape: ', observations.shape)
print('actions shape: ', actions.shape)

num_total, D_in = observations.shape
D_out = actions.shape[-1]

observations shape:  (20000, 111)
actions shape:  (20000, 8)


## Train, validation, test split

In [4]:
# Split the data into train, val, and test sets. Also, create a
# small dataset for development purposes.
num_train = 17000
num_val   = 2000
num_test  = 1000
num_dev   = 500

# Split the data into test set and temporary set, which will be 
# split into training and validation sets
X_tmp, X_test, y_tmp, y_test = train_test_split(observations,
                                                actions,
                                                test_size=num_test)

# Split X_tmp into training and validation
X_train, X_val, y_train, y_val = train_test_split(X_tmp,
                                                  y_tmp,
                                                  train_size=num_train)
# Create the development set, which is just a small subset of
# the training set.
mask = np.random.choice(num_train, num_dev, replace=False)
X_dev = X_train[mask]
y_dev = y_train[mask]

print('Train data shape: ', X_train.shape)
assert(X_train.shape[0] == num_train)

print('Train labels shape: ', y_train.shape)
assert(y_train.shape[0] == num_train)

print('Validation data shape: ', X_val.shape)
assert(X_val.shape[0] == num_val)
print('Validation labels shape: ', y_val.shape)
assert(y_val.shape[0] == num_val)

print('Test data shape: ', X_test.shape)
assert(X_test.shape[0] == num_test)
print('Test labels shape: ', y_test.shape)
assert(y_test.shape[0] == num_test)

print('Dev data shape: ', X_dev.shape)
assert(X_dev.shape[0] == num_dev)
print('Dev labels shape: ', y_dev.shape)
assert(y_dev.shape[0] == num_dev)

Train data shape:  (17000, 111)
Train labels shape:  (17000, 8)
Validation data shape:  (2000, 111)
Validation labels shape:  (2000, 8)
Test data shape:  (1000, 111)
Test labels shape:  (1000, 8)
Dev data shape:  (500, 111)
Dev labels shape:  (500, 8)


### Preprocessing: zero mean and unit variance

In [5]:
# Preprocessing: subtract the mean image
# first: compute the mean and standard deviation from training X
mean = np.nanmean(X_train, axis=0)
std = np.std(X_train, axis=0) + EPS

# second: subtract mean and std from datasets
for X in (X_train, X_val, X_test, X_dev):
    X -= mean
    X /= std

In [6]:
# tf Graph input
X = tf.placeholder("float", [None, D_in])
y = tf.placeholder("float", [None, D_out])

with tf.name_scope('Network'): 
    # Weights and biases
    params = {
        'W1': tf.Variable(tf.random_normal([D_in, N_h1])),
        'W2': tf.Variable(tf.random_normal([N_h1, N_h2])),
        'Wout': tf.Variable(tf.random_normal([N_h2, D_out])),

        'b1': tf.Variable(tf.random_normal([N_h1])),
        'b2': tf.Variable(tf.random_normal([N_h2])),
        'bout': tf.Variable(tf.random_normal([D_out]))
    }

    # Hidden layer with ReLU non-linearity
    h1 = tf.add(tf.matmul(X, params['W1']), params['b1'])
    z1 = tf.nn.relu(h1)
    # Hidden layer with RELU non-linearity
    h2 = tf.add(tf.matmul(z1, params['W2']), params['b2'])
    z2 = tf.nn.relu(h2)
    # Linear output layer
    out = tf.add(tf.matmul(z2, params['Wout']), params['bout'])

# Loss and optimizer
loss = tf.reduce_mean(tf.squared_difference(out, y))
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)

# Initialize tf the variables
init = tf.global_variables_initializer()

In [None]:
NUM_EPOCHS = 20
BATCH_SIZE = 256
NUM_BATCHES = int(num_train/BATCH_SIZE)
DISPLAY_EVERY = 10

HIDDEN_SIZE = [20, 20]
DROPOUT = 0.0

features = infer_real_valued_columns_from_input(X_train)
model_dir = "./checkpoints/DNNRegressor-{}".format(
    "-".join(map(str, HIDDEN_SIZE)))

model = tf.contrib.learn.DNNRegressor(
    model_dir=model_dir,
    feature_columns=features,
    hidden_units=HIDDEN_SIZE,
    label_dimension=D_out,
    activation_fn=tf.nn.relu,
    dropout=DROPOUT,
    optimizer=tf.train.ProximalAdagradOptimizer(
      learning_rate=1e-2,
      l1_regularization_strength=0.01
    )
)

model.fit(x=X_train, y=y_train)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x13f6d0ef0>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': None}
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving int

  equality = a == b


INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into ./checkpoints/DNNRegressor-20-20/model.ckpt.
INFO:tensorflow:loss = 0.262501, step = 1
INFO:tensorflow:global_step/sec: 12.277
INFO:tensorflow:loss = 0.0773061, step = 101 (8.146 sec)
INFO:tensorflow:global_step/sec: 12.2575
INFO:tensorflow:loss = 0.0690636, step = 201 (8.159 sec)
INFO:tensorflow:global_step/sec: 15.2339
INFO:tensorflow:loss = 0.0684239, step = 301 (6.564 sec)
INFO:tensorflow:global_step/sec: 14.6122
INFO:tensorflow:loss = 0.0704684, step = 401 (6.844 sec)
INFO:tensorflow:global_step/sec: 17.1918
INFO:tensorflow:loss = 0.0730132, step = 501 (5.816 sec)
INFO:tensorflow:global_step/sec: 15.4213
INFO:tensorflow:loss = 0.0757087, step = 601 (6.484 sec)
