In [None]:
%load_ext autoreload
%autoreload 2

# set a seed to control all randomness
from tensorflow import set_random_seed
from numpy.random import seed

set_random_seed(1)
seed(1)

# Examine Inputs

Let's load some local data and examine its contents. We'll play a small animation that contains the first few temporal frames of the input data below.

In [None]:
import numpy as np

# X.shape = n_body_parts, n_time_intervals, n_dimensions (3)
# each dimension's features are centered 0:1
X = np.load('data/npy/dance.npy')

# store the shapes of X's values
n_vertices, n_time, n_dims = X.shape

# load data from pose estimation output
#X = np.swapaxes(np.load('data/npy/X.npy'), 0, 1)

# labels[i] is a label for the ith body_part
labels = ['AnnaC7.position', 'AnnaRFSH.position', 'AnnaRSHN.position', 'AnnaLIWR.position', 'AnnaRBWT.position', 'AnnaLOHAND.position', 'AnnaRFRM.position', 'AnnaT10.position', 'AnnaLMT5.position', 'AnnaLKNI.position', 'AnnaRIEL.position', 'AnnaRBHD.position', 'AnnaSolvingHips.position', 'AnnaLBHD.position', 'AnnaRIHAND.position', 'AnnaLBWT.position', 'AnnaRUPA.position', 'AnnaLFRM.position', 'AnnaRFHD.position', 'AnnaRIWR.position', 'AnnaLUPA.position', 'AnnaLFHD.position', 'AnnaRKNE.position', 'AnnaLFWT.position', 'AnnaLSHN.position', 'AnnaRTOE.position', 'AnnaLHEL.position', 'AnnaRKNI.position', 'AnnaCLAV.position', 'AnnaRHEL.position', 'AnnaMBWT.position', 'AnnaRMT5.position', 'AnnaARIEL.position', 'AnnaRBSH.position', 'AnnaLIHAND.position', 'AnnaLMT1.position', 'AnnaLTOE.position', 'AnnaRFWT.position', 'AnnaLabelingHips.position', 'AnnaMFWT.position', 'AnnaRANK.position', 'AnnaLOWR.position', 'AnnaLIEL.position', 'AnnaROHAND.position', 'AnnaRMT1.position', 'AnnaRTHI.position', 'AnnaLBSH.position', 'AnnaRELB.position', 'AnnaROWR.position', 'AnnaLANK.position', 'AnnaSTRN.position', 'AnnaLELB.position', 'AnnaLTHI.position', 'AnnaLFSH.position', 'AnnaLKNE.position']

# test that the data is properly centered 0:1 on each dimensional axis
for i in range(int(X.shape[2])):
  assert np.min(X[:,:,i]) == 0.0
  assert np.max(X[:,:,i]) == 1.0

In [None]:
from math import floor

# define functions to flatten and unflatten data

def flatten(df, run_tests=True):
  '''
  df is a numpy array with the following three axes:
    df.shape[0] = the index of a vertex
    df.shape[1] = the index of a time stamp
    df.shape[2] = the index of a dimension (x, y, z)
  
  So df[1][0][2] is the value for the 1st vertex (0-based) at time 0 in dimension 2 (z).
  
  To flatten this dataframe will mean to push the data into shape:
    flattened.shape[0] = time index
    flattened.shape[1] = [vertex_index*3] + dimension_vertex
    
  So flattened[1][3] will be the 3rd dimension of the 1st index (0-based) at time 1. 
  '''
  if run_tests:
    assert df.shape == X.shape and np.all(df == X)
  
  # reshape X such that flattened.shape = time, [x0, y0, z0, x1, y1, z1, ... xn-1, yn-1, zn-1]
  flattened = X.swapaxes(0, 1).reshape( (df.shape[1], df.shape[0] * df.shape[2]), order='C' )

  if run_tests: # switch to false to skip tests
    for idx, i in enumerate(df):
      for jdx, j in enumerate(df[idx]):
        for kdx, k in enumerate(df[idx][jdx]):
          assert flattened[jdx][ (idx*df.shape[2]) + kdx ] == df[idx][jdx][kdx]
          
  return flattened

def unflatten(df, run_tests=True, start_time_index=0):
  '''
  df is a numpy array with the following two axes:
    df.shape[0] = time index
    df.shape[1] = [vertex_index*3] + dimension_vertex
    
  To unflatten this dataframe will mean to push the data into shape:
    unflattened.shape[0] = the index of a vertex
    unflattened.shape[1] = the index of a time stamp
    unflattened.shape[2] = the index of a dimension (x, y, z)
    
  So df[2][4] == unflattened[1][2][0]
  '''
  if run_tests:
    assert (len(df.shape) == 2) and (df.shape[1] == X.shape[0] * X.shape[2])
  
  unflattened = np.zeros(( X.shape[0], df.shape[0], X.shape[2] ))

  for idx, i in enumerate(df):
    for jdx, j in enumerate(df[idx]):
      kdx = int(floor(jdx / 3))
      ldx = int(jdx % 3)
      unflattened[kdx][idx][ldx] = df[idx][jdx]

  if run_tests: # set to false to skip tests
    for idx, i in enumerate(unflattened):
      for jdx, j in enumerate(unflattened[idx]):
        for kdx, k in enumerate(unflattened[idx][jdx]):
          assert( unflattened[idx][jdx][kdx] == X[idx][int(start_time_index)+jdx][kdx] )

  return unflattened

In [None]:
flat = flatten(X)
unflat = unflatten(flat)

In [None]:
import mpl_toolkits.mplot3d.axes3d as p3
from mpl_toolkits.mplot3d.art3d import juggle_axes
import matplotlib.pyplot as plt
from IPython.display import HTML
from matplotlib import animation
import matplotlib

# ask matplotlib to plot up to 2^128 frames in animations
matplotlib.rcParams['animation.embed_limit'] = 2**128

def update_points(time, points, df):
  '''
  Callback function called by plotting function below. Mutates the vertex
  positions of each value in `points` so the animation moves
  @param int time: the index of the time slice to visualize within `df`
  @param mpl_toolkits.mplot3d.art3d.Path3DCollection points: the actual
    geometry collection whose internal values this function mutates to move
    the displayed points
  @param numpy.ndarray df: a numpy array with the following three axes:
    df.shape[0] = n_vertices
    df.shape[1] = n_time_slices
    df.shape[2] = n_dimensions
  '''
  points._offsets3d = juggle_axes(df[:,time,0], df[:,time,1], df[:,time,2], 'z')

def get_plot(df, axis_min=0, axis_max=1, frames=200, speed=45, start_time_index=0, run_tests=True):
  '''
  General function that can plot numpy arrays in either of two shapes.
  @param numpy.ndarray df: a numpy array with either of the following two shapes:
    Possibility one:
      df.shape[0] = n_vertices
      df.shape[1] = n_time_slices
      df.shape[2] = n_dimensions
    Possibility two:
      df.shape[0] = n_time_slices
      df.shape[1] = [x0, y0, z0, x1, y1, z1, ... xn-1, yn-1, zn-1]
    If the latter is received, we "unflatten" the df into the three dimensional variant
  @param int axis_min: the minimum value of each axis scale
  @param int axis_max: the maximum value of each axis scale
  @param int frames: the number of time slices to animate.
  @param int speed: the temporal duration of each frame. Increase to boost fps.
  @param int start_time_index: the index position of the first frame in df within X. In other
    words, if df starts at the nth time frame from X, start_time_index = n.
  @param bool run_tests: boolean indicating whether we'll run the data validation
    tests, should we need to unflatten the array. Should be set to False if we're passing
    in predicted values, as they'll differ from X values.
  '''
  if len(df.shape) == 2:
    df = unflatten(df, start_time_index=start_time_index, run_tests=run_tests)
  fig = plt.figure()
  ax = p3.Axes3D(fig)
  ax.set_xlim(axis_min, axis_max)
  ax.set_ylim(axis_min, axis_max)
  ax.set_zlim(axis_min, axis_max*1.5)
  points = ax.scatter(df[:,0,0], df[:,0,1], df[:,0,2], depthshade=False) # x,y,z vals
  return animation.FuncAnimation(fig,
    update_points,
    frames,
    interval=speed,
    fargs=(points, df),
    blit=False  
  ).to_jshtml()

HTML(get_plot(unflat, frames=150))

In [None]:
# train_x has shape: n_samples, look_back, n_vertices*3
look_back = 10 # number of previous time slices to use to predict the time positions at time `i`
train_x = []
train_y = []

# each i is a time slice; these time slices start at idx `look_back` (so we can look back `look_back` slices)
for i in range(look_back, n_time-1, 1):
  train_x.append( flat[i-look_back:i, :] )
  train_y.append( flat[i:i+1] )
  
train_x = np.array(train_x)
train_y = np.concatenate(train_y, axis=0)

train_x.shape, train_y.shape

In [None]:
# visually confirm that the train_y is a valid dance sequence
HTML(get_plot(train_y, frames=20, start_time_index=look_back))

In [None]:
# visually confirm the train_x data is properly formatted - (only if look_back is 1)
if look_back == 1:
  HTML(get_plot(train_x.squeeze(), frames=20, start_time_index=look_back-1))

# Build the Model

In [None]:
from utils.mdn import MDN
from keras.models import Sequential, Model
from keras.layers import Dense, LSTM, Dropout, Activation, CuDNNLSTM
from keras.layers.advanced_activations import LeakyReLU
from keras.losses import mean_squared_error
from keras.optimizers import Adam
from keras import backend as K
import keras, os

# config
cells = [32, 32, 32, 32] # number of cells in each lstm layer
output_dims = int(X.shape[0]) * int(X.shape[2]) # number of coordinate values to be predicted by each gaussian model
input_shape = (look_back, output_dims) # shape of each input feature
use_mdn = True # whether to use the MDN final layer or not
n_mixes = 2 # number of gaussian models to build if use_mdn == True

# optimizer params
lr = 0.00001 # the learning rate of the model
optimizer = Adam(lr=lr, clipvalue=0.5)

# use tensorflow backend
os.environ['KERAS_BACKEND'] = 'tensorflow'

# determine the LSTM cells to use (hinges on whether GPU is available to keras)
gpus = K.tensorflow_backend._get_available_gpus()
LSTM_UNIT = CuDNNLSTM if len(gpus) > 0 else LSTM
print('GPUs found:', gpus)

# build the model
model = Sequential()
model.add(LSTM_UNIT(cells[0], return_sequences=True, input_shape=input_shape, ))
model.add(LSTM_UNIT(cells[1], return_sequences=True, ))
model.add(LSTM_UNIT(cells[2], ))
model.add(Dense(cells[3]), )

if use_mdn:
  mdn = MDN(output_dims, n_mixes)
  model.add(mdn)
  model.compile(loss=mdn.get_loss_func(), optimizer=optimizer, metrics=['accuracy'])
else:
  model.add(Dense(output_dims, activation='tanh'))
  model.compile(loss=mean_squared_error, optimizer=optimizer, metrics=['accuracy'])

model.summary()

In [None]:
# check untrained (baseline) accuracy
samples = 1
model.evaluate(train_x[:samples], train_y[:samples])

# Train the model

In [None]:
from keras.callbacks import TerminateOnNaN
from livelossplot import PlotLossesKeras
from datetime import datetime
import time, keras, os, json
  
class Logger(keras.callbacks.Callback):
  '''Save the model and its weights every `self.save_frequency` epochs'''
  def __init__(self):
    self.epoch = 0 # stores number of completed epochs
    self.save_frequency = 1 # configures how often we'll save the model and weights
    self.date = datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H:%M')
    if not os.path.exists('snapshots'): os.makedirs('snapshots')
    self.save_config()
    
  def save_config(self):
    with open('snapshots/' + self.date + '-config.json', 'w') as out:
      json.dump({
        'look_back': look_back,
        'cells': cells,
        'use_mdn': use_mdn,
        'n_mixes': n_mixes,
        'lr': lr,
      }, out)
  
  def on_batch_end(self, batch, logs={}, shape=train_x.shape):
    if (batch+1 == shape[0]): # batch value is batch index, which is 0-based
      self.epoch += 1
      if (self.epoch > 0) and (self.epoch % self.save_frequency == 0):
        path = 'snapshots/' + self.date + '-' + str(batch)
        model.save(path + '.model')
        model.save_weights(path + '.weights')

#K.set_value(optimizer.lr, 0.00001)
callbacks = [Logger(), TerminateOnNaN()]
history = model.fit(train_x, train_y, epochs=500, batch_size=1, shuffle=False, callbacks=callbacks)

In [None]:
# grok the layer weights
for layer_idx, layer in enumerate(model.layers): 
  for weight_block in layer.get_weights():
    print(np.min(weight_block), np.max(weight_block), weight_block.shape, )

In [None]:
save = False
load = True
model_path = 'snapshots/latest'

if save:
  model.save(model_path + '.model')
  model.save_weights(model_path + '.weights')
  
if load:
  model.load_weights(model_path + '.weights')

# Visualize Results

First let's analyze how well the model learned the input sequence

### Assess Model Performance on Inputs

In [None]:
# visualize how well the model learned the input sequence
n_frames = 500 # n frames of time slices to generate
frames = []

test_x = train_x[:n_frames] # data to pass into forward prop through the model
y_pred = model.predict(test_x) # output with shape (n_frames, (output_dims+2) * n_mixes )

# partition out the mus, sigs, and mixture weights
for i in range(n_frames):
  y = y_pred[i].squeeze()
  mus = y[:n_mixes*output_dims]
  sigs = y[n_mixes*output_dims:n_mixes*output_dims + n_mixes]
  alphas = y[-n_mixes:]

  # find the most likely distribution - then disregard that number and use the first Gaussian :)
  alpha_idx = np.argmax(alphas)
  alpha_idx = 0
  
  # pull out the mus that correspond to the selected alpha index
  positions = mus[alpha_idx * output_dims:(alpha_idx+1) * output_dims]
  frames.append(positions)
  
frames = np.array(frames)

In [None]:
# skip the first look_back frames
HTML(get_plot(frames, frames=n_frames, run_tests=False))

### Assess Model's Ability to Generate New Sequences

In [None]:
def softmax(x):
  """Compute softmax values for each sets of scores in x."""
  r = np.exp(x - np.max(x))
  return r / r.sum()

n_frames = 250 # n frames of time slices to generate
frames = []

seed = np.random.randint(0, len(train_x)-1)
x = np.expand_dims(train_x[seed], axis=0)
print(' * seeding with', seed)

for i in range(n_frames):
  y = model.predict(x).squeeze()
  mus = y[:n_mixes*output_dims]
  sigs = y[n_mixes*output_dims:-n_mixes]
  alphas = softmax(y[-n_mixes:])
  
  # select the alpha channel to use
  alpha_idx = np.random.choice([idx for idx,_ in enumerate(alphas)], p=alphas)
  alpha_idx = 1
  
  # grab the mus and sigs associated with the selected alpha_idx
  frame_mus = mus.ravel()[alpha_idx*output_dims : (alpha_idx+1)*output_dims]
  frame_sig = sigs[alpha_idx] / 100
  
  # now sample from each Gaussian
  positions = [np.random.normal(loc=m, scale=frame_sig) for m in frame_mus]
  positions = frame_mus
  
  # add these positions to the results
  frames.append(positions)
  
  # pull out a new training example - stack the new result on
  # all values after the first from the bottom-most value in the x's
  start = x[:,1:,:]
  end = np.expand_dims( np.expand_dims(positions, axis=0), axis=0 )
  x = np.concatenate((start, end), axis=1)
  
frames = np.array(frames)

In [None]:
HTML(get_plot(frames, frames=100, run_tests=False))