# Training and Testing an MDRNN on Sample Data

This script trains and tests a mixture density recurrent neural network (MDRNN) on 

In [1]:
%matplotlib inline
import random
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt

from context import *
import keras

Using TensorFlow backend.


In [2]:
# Model Hyperparameters
SEQ_LEN = 100
SEQ_STEP = 1
HIDDEN_UNITS = 32
N_LAYERS = 2
NUMBER_MIXTURES = 5
TIME_DIST = True

# Training Hyperparameters:
BATCH_SIZE = 64
EPOCHS = 100
VAL_SPLIT = 0.10

# Set random seed for reproducibility
SEED = 2345
random.seed(SEED)
np.random.seed(SEED)

In [3]:
# Fake data:
corpus = empi_mdrnn.generate_data()

Generating 50000 toy data samples.
                  t             x
count  50000.000000  50000.000000
mean       0.099998      0.000130
std        0.007111      0.709363
min        0.000100     -1.183651
25%        0.095219     -0.704799
50%        0.099997      0.000395
75%        0.104798      0.706387
max        0.129144      1.163638


## Test Training

In [None]:
# Import Human Data CSV and create dt 
perf_df = pd.read_csv('../data/2018-01-25T14-04-35-rnnbox.csv', header=0, index_col=0, parse_dates=['date'])
perf_df['time'] = perf_df.index
perf_df['seconds'] = perf_df.index
perf_df.time = perf_df.time.diff()
perf_df.time = perf_df.time.dt.total_seconds()
perf_df = perf_df.dropna()
perf_df.value = perf_df.value / 255.0
corpus_df = pd.DataFrame({'t': perf_df.time, 'x': perf_df.value})
corpus = np.array(corpus_df)
print("Shape of corpus array:", corpus.shape)
corpus_df.describe()
perf_df.seconds = perf_df.seconds - perf_df.seconds[0]
perf_df.seconds = perf_df.seconds.dt.total_seconds()
display(perf_df.describe())

In [None]:
# Turn corpus into training examples.
slices = empi_mdrnn.slice_sequence_examples(corpus, SEQ_LEN+1, step_size=SEQ_STEP)
X, y = empi_mdrnn.seq_to_overlapping_format(slices)
X = np.array(X) * empi_mdrnn.SCALE_FACTOR
y = np.array(y) * empi_mdrnn.SCALE_FACTOR
print("Number of training examples:")
print("X:", X.shape)
print("y:", y.shape)

In [None]:
# Setup Training Model
model = empi_mdrnn.build_model(seq_len=SEQ_LEN, hidden_units=HIDDEN_UNITS, num_mixtures=NUMBER_MIXTURES, layers=2, time_dist=TIME_DIST, inference=False, compile_model=True, print_summary=True)

In [None]:
# Setup callbacks
model_path = "empi_mdrnn" + "-layers" + str(N_LAYERS) + "-units" + str(HIDDEN_UNITS) + "-mixtures" + str(NUMBER_MIXTURES) + "-scale" + str(empi_mdrnn.SCALE_FACTOR)
filepath = model_path + "-E{epoch:02d}-VL{val_loss:.2f}.hdf5"
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
terminateOnNaN = keras.callbacks.TerminateOnNaN()
tboard = keras.callbacks.TensorBoard(log_dir='./logs/'+model_path, histogram_freq=2, batch_size=32, write_graph=True, update_freq='epoch')

# Train
history = model.fit(X, y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=VAL_SPLIT, callbacks=[checkpoint, terminateOnNaN, tboard])
#history = model.fit_generator(generator, steps_per_epoch=300, epochs=100, verbose=1, initial_epoch=0)

# Save final Model
model.save('model_path' + '-final.hdf5')  # creates a HDF5 file of the model

# Test Running Network

In [None]:
#model_file = "empi_mdrnn-layers2-units128-mixtures5-scale10-E55-VL-0.71.hdf5"
model_file = "../models-human/empi_mdrnn-layers2-units128-mixtures5-scale10-E84-VL-3.68.hdf5"

#model = empi_mdrnn.build_model(seq_len=SEQ_LEN, hidden_units=HIDDEN_UNITS, num_mixtures=NUMBER_MIXTURES, layers=2, time_dist=TIME_DIST, inference=False, compile_model=True, print_summary=True)
decoder = empi_mdrnn.build_model(seq_len=1, hidden_units=HIDDEN_UNITS, num_mixtures=NUMBER_MIXTURES, layers=2, time_dist=False, inference=True, compile_model=False, print_summary=True)
decoder.load_weights(model_file)

In [None]:
decoder.reset_states()
num_steps = 1500
mixture_temp = 1.5
sigma_temp = 0.01
t = np.array([(0.01 + (np.random.rand())*0.005), np.random.rand()]) #robojam.random_touch()
p = empi_mdrnn.generate_performance(decoder, NUMBER_MIXTURES, t, temp=mixture_temp, sigma_temp=sigma_temp, steps_limit=num_steps)


perf_df = pd.DataFrame({'t':p.T[0], 'x':p.T[1]})
perf_df['time'] = perf_df.t.cumsum()
#plt.show(perf_df.plot('time','x',kind='scatter'))
plt.plot(perf_df.time, perf_df.x, '.r-')
plt.show()
print(perf_df.describe())

In [None]:
# Generate slightly nicer plot.
ax = perf_df.plot(x='time', y='x', kind="line", color='r', figsize=(15,4), legend=False)
ax.set_xlabel("seconds")
ax.set_ylabel("position")
ax.get_figure().savefig('human_data_output.pdf', dpi=300, bbox_inches="tight")
ax.get_figure().savefig('human_data_output.png', dpi=300, bbox_inches="tight")

In [None]:
## Investigate Output
window = 100
for n in [1000,2000,3000,4000,5000,6000]:
    print("Window:", str(n),'to',str(n+window))
    plt.plot(perf_df[n:n+window].time, perf_df[n:n+window].x, '.r-')
    plt.show()