In [53]:
# Some boilerplate setup

import time
import pickle
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

EPS = 1e-8 # epsilon constant for numeric stability

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [54]:
# Available agents/environments:
#  Ant-v1.pkl
#  HalfCheetah-v1.pkl
#  Hopper-v1.pkl
#  Humanoid-v1.pkl
#  Reacher-v1.pkl
#  Walker2d-v1.pkl

ENV_NAME = "Ant-v1"

## Expert data loading and preprocessing

In [55]:
# Load the expert data from pickle saved in 'homework1/expert_data'
expert_data = None
with open("../expert_data/{}.pkl".format(ENV_NAME), "rb") as f:
    expert_data = pickle.load(f)

observations, actions = expert_data["observations"], expert_data["actions"]
actions = np.squeeze(actions)

# As a sanity check, print out the size of the training and test data.
print('observations shape: ', observations.shape)
print('actions shape: ', actions.shape)

num_total, N_in = observations.shape
N_out = actions.shape[-1]

observations shape:  (20000, 111)
actions shape:  (20000, 8)


## Train, validation, test split

In [56]:
# Split the data into train, val, and test sets. Also, create a
# small dataset for development purposes.
num_train = 17000
num_val   = 2000
num_test  = 1000
num_dev   = 500

# Split the data into test set and temporary set, which will be 
# split into training and validation sets
X_tmp, X_test, y_tmp, y_test = train_test_split(observations,
                                                actions,
                                                test_size=num_test)

# Split X_tmp into training and validation
X_train, X_val, y_train, y_val = train_test_split(X_tmp,
                                                  y_tmp,
                                                  train_size=num_train)
# Create the development set, which is just a small subset of
# the training set.
mask = np.random.choice(num_train, num_dev, replace=False)
X_dev = X_train[mask]
y_dev = y_train[mask]

print('Train data shape: ', X_train.shape)
assert(X_train.shape[0] == num_train)

print('Train labels shape: ', y_train.shape)
assert(y_train.shape[0] == num_train)

print('Validation data shape: ', X_val.shape)
assert(X_val.shape[0] == num_val)
print('Validation labels shape: ', y_val.shape)
assert(y_val.shape[0] == num_val)

print('Test data shape: ', X_test.shape)
assert(X_test.shape[0] == num_test)
print('Test labels shape: ', y_test.shape)
assert(y_test.shape[0] == num_test)

print('Dev data shape: ', X_dev.shape)
assert(X_dev.shape[0] == num_dev)
print('Dev labels shape: ', y_dev.shape)
assert(y_dev.shape[0] == num_dev)

Train data shape:  (17000, 111)
Train labels shape:  (17000, 8)
Validation data shape:  (2000, 111)
Validation labels shape:  (2000, 8)
Test data shape:  (1000, 111)
Test labels shape:  (1000, 8)
Dev data shape:  (500, 111)
Dev labels shape:  (500, 8)


### Preprocessing: zero mean and unit variance

In [57]:
# Preprocessing: subtract the mean image
# first: compute the mean and standard deviation from training X
mean = np.nanmean(X_train, axis=0)
std = np.std(X_train, axis=0) + EPS

# second: subtract mean and std from datasets
for X in (X_train, X_val, X_test, X_dev):
    X -= mean
    X /= std

In [58]:
# tf Graph input
X = tf.placeholder("float", [None, N_in])
y = tf.placeholder("float", [None, N_out])

N_h1 = 20
N_h2 = 20

# Weights and biases
params = {
    'W1': tf.Variable(tf.random_normal([N_in, N_h1])),
    'W2': tf.Variable(tf.random_normal([N_h1, N_h2])),
    'Wout': tf.Variable(tf.random_normal([N_h2, N_out])),
    
    'b1': tf.Variable(tf.random_normal([N_h1])),
    'b2': tf.Variable(tf.random_normal([N_h2])),
    'bout': tf.Variable(tf.random_normal([N_out]))
}

# Hidden layer with ReLU non-linearity
h1 = tf.add(tf.matmul(X, params['W1']), params['b1'])
z1 = tf.nn.relu(h1)
# Hidden layer with RELU non-linearity
h2 = tf.add(tf.matmul(z1, params['W2']), params['b2'])
z2 = tf.nn.relu(h2)
# Linear output layer
out = tf.matmul(z2, params['Wout']) + params['bout']


# Loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)

# Initialize tf the variables
init = tf.global_variables_initializer()

In [59]:
print(y_batch.shape)

(128, 1, 8)


In [68]:
NUM_EPOCHS = 10
BATCH_SIZE = 128
DISPLAY_EVERY = 1

# Run the graph
with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(NUM_EPOCHS):
        avg_cost = 0.0
        num_batches = int(num_train/BATCH_SIZE)

        for i in range(num_batches):
            X_batch = X_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
            y_batch = y_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE]

            _, c = sess.run([optimizer, cost],
                            feed_dict={
                                X: X_batch,
                                y: y_batch})
            avg_cost += c / num_batches

        if epoch % DISPLAY_EVERY == 0:
            print("Epoch: {:04d}, cost: {:.9f}".format(epoch+1, avg_cost))

    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(out, 1), tf.argmax(y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    accuracy_num = accuracy.eval({X: X_test, y: y_test})
    print("Accuracy: {}".format(accuracy_num))

Epoch: 0001, cost: 13.675855044
Epoch: 0002, cost: 7.500029008
Epoch: 0003, cost: 1.768195194
Epoch: 0004, cost: -3.504271288
Epoch: 0005, cost: -8.303980984
Epoch: 0006, cost: -12.719132734
Epoch: 0007, cost: -16.787406519
Epoch: 0008, cost: -20.574495623
Epoch: 0009, cost: -24.178288713
Epoch: 0010, cost: -27.672805129
Optimization Finished!
Accuracy: 0.20600000023841858
