In [10]:
# Some boilerplate setup

import time
import pickle
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

EPS = 1e-8 # epsilon constant for numeric stability

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
# Available agents/environments:
#  Ant-v1.pkl
#  HalfCheetah-v1.pkl
#  Hopper-v1.pkl
#  Humanoid-v1.pkl
#  Reacher-v1.pkl
#  Walker2d-v1.pkl

ENV_NAME = "Ant-v1"

## Expert data loading and preprocessing

In [13]:
# Load the expert data from pickle saved in 'homework1/expert_data'
expert_data = None
with open("../expert_data/{}.pkl".format(ENV_NAME), "rb") as f:
    expert_data = pickle.load(f)

observations, actions = expert_data["observations"], expert_data["actions"]

# As a sanity check, print out the size of the training and test data.
print('observations shape: ', observations.shape)
print('actions shape: ', actions.shape)

num_total = observations.shape[0]

observations shape:  (19941, 111)
actions shape:  (19941, 1, 8)


## Train, validation, test split

In [65]:
# Split the data into train, val, and test sets. Also, create a
# small dataset for development purposes.
num_train = 17000
num_val   = 2000
num_test  = 1000
num_dev   = 500

# Split the data into test set and temporary set, which will be 
# split into training and validation sets
X_tmp, X_test, y_tmp, y_test = train_test_split(observations,
                                                actions,
                                                test_size=num_test)

# Split X_tmp into training and validation
X_train, X_val, y_train, y_val = train_test_split(X_tmp,
                                                  y_tmp,
                                                  train_size=num_train)

# Create the development set, which is just a small subset of
# the training set.
mask = np.random.choice(num_train, num_dev, replace=False)
X_dev = X_train[mask]
y_dev = y_train[mask]

print('Train data shape: ', X_train.shape)
assert(X_train.shape[0] == num_train)

print('Train labels shape: ', y_train.shape)
assert(y_train.shape[0] == num_train)

print('Validation data shape: ', X_val.shape)
assert(X_val.shape[0] == num_val)
print('Validation labels shape: ', y_val.shape)
assert(y_val.shape[0] == num_val)

print('Test data shape: ', X_test.shape)
assert(X_test.shape[0] == num_test)
print('Test labels shape: ', y_test.shape)
assert(y_test.shape[0] == num_test)

print('Dev data shape: ', X_dev.shape)
assert(X_dev.shape[0] == num_dev)
print('Dev labels shape: ', y_dev.shape)
assert(y_dev.shape[0] == num_dev)

Train data shape:  (17000, 111)
Train labels shape:  (17000, 1, 8)
Validation data shape:  (2000, 111)
Validation labels shape:  (2000, 1, 8)
Test data shape:  (1000, 111)
Test labels shape:  (1000, 1, 8)
Dev data shape:  (500, 111)
Dev labels shape:  (500, 1, 8)


### Preprocessing: zero mean and unit variance

In [66]:
# Preprocessing: subtract the mean image
# first: compute the mean and standard deviation from training X
mean = np.nanmean(X_train, axis=0)
std = np.std(X_train, axis=0) + EPS

print(mean, std)

# second: subtract mean and std from datasets
for X in (X_train, X_val, X_test, X_dev):
    X -= mean
    X /= std

# X_train -= mean
# X_train /= std

# X_val -= mean
# X_val /= std

# X_test -= mean
# X_test /= std

# X_dev -= mean
# X_dev /= std

# print(np.mean(X_train, axis=0))
# print(np.mean(X_val, axis=0))
# print(np.mean(X_test, axis=0))
# print(np.mean(X_dev, axis=0))

[  5.64472242e-01   9.39038061e-01   9.80446661e-03  -5.89809492e-02
   3.14805490e-01  -5.80722847e-02   6.59134168e-01  -3.14160695e-01
  -5.23022658e-01   1.20765490e-01  -5.22644439e-01   3.58371705e-01
   7.19398046e-01   4.19138175e+00  -2.09986642e-01  -4.71405981e-03
  -5.49128675e-03  -1.36528247e-02   2.06050783e-02  -4.23728663e-03
   8.81252797e-03   2.44797280e-03  -8.96436481e-03  -8.94596010e-03
  -9.69038608e-03   1.24473021e-02   1.65523978e-03   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   9.37919939e-02
  -7.98819630e-02  -7.19467402e-02   5.73737393e-02  -5.14261427e-04
   9.40267173e-02   0.00000000e+00

  # This is added back by InteractiveShellApp.init_path()


[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
