# Training and Testing an MDRNN on Human Data

This script trains and tests a mixture density recurrent neural network (MDRNN) on a 10-minute human sourced performance.

In [2]:
%matplotlib inline
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import time
from context import *

Using TensorFlow backend.


In [35]:
def transform_log_to_sequence_example(logfile, dimension):
    data_names = ['x'+str(i) for i in range(dimension-1)]
    column_names = ['date', 'source'] + data_names
    perf_df = pd.read_csv(logfile,
                          header=None, parse_dates=True,
                          index_col=0, names=column_names)
    #  Filter out RNN lines, just keep 'interface'
    if "rnnbox" in logfile:
        print("Old RNNbox file, dividing by 255")
        perf_df.x0 /= 255
    perf_df = perf_df[perf_df.source == 'interface']
    #  Process times.
    perf_df['t'] = perf_df.index
    perf_df.t = perf_df.t.diff()
    perf_df.t = perf_df.t.dt.total_seconds()
    perf_df = perf_df.dropna()
    return np.array(perf_df[['t']+data_names])


# Load up the performances
log_location = "../rnn_box_data/"
log_file_ending = ".log" #"-" + str(args.dimension) + "d-mdrnn.log"
log_arrays = []

for local_file in os.listdir(log_location):
    if local_file.endswith(log_file_ending):
        print("Processing:", local_file)
        try:
            log = transform_log_to_sequence_example(log_location + local_file,2)
            log_arrays.append(log)
        except Exception:
            print("Processing failed for", local_file)        

# Save Performance Data in a compressed numpy file.
dataset_location = 'datasets/'
dataset_filename = 'empi-training-dataset-2d.npz'

# Input format is:
# 0. 1. 2. ... n.
# dt x1 x2 ... xn

raw_perfs = []

acc = 0
time = 0
interactions = 0
for l in log_arrays:
    acc += l.shape[0] * l.shape[1]
    interactions += l.shape[0]
    time += l.T[0].sum()
    raw = l.astype('float32')  # dt, x_1, ... , x_n
    raw_perfs.append(raw)

print("total number of values:", acc)
print("total number of interactions:", interactions)
print("total time represented:", time)
print("total number of perfs in raw array:", len(raw_perfs))
raw_perfs = np.array(raw_perfs)
np.savez_compressed("../" + dataset_location + dataset_filename, perfs=raw_perfs)
print("done saving:", dataset_location + dataset_filename)

Processing: 2019-01-24T08-58-07-rnnbox.log
Old RNNbox file, dividing by 255
Processing: 2019-01-24T08-40-36-rnnbox.log
Old RNNbox file, dividing by 255
Processing: 2019-01-24T09-02-01-rnnbox.log
Old RNNbox file, dividing by 255
Processing: 2019-01-24T08-43-02-rnnbox.log
Old RNNbox file, dividing by 255
Processing: 2019-01-24T08-39-29-rnnbox.log
Old RNNbox file, dividing by 255
Processing: 2018-01-25T14-04-35-rnnbox.log
Old RNNbox file, dividing by 255
Processing: 2019-01-24T08-48-51-rnnbox.log
Old RNNbox file, dividing by 255
total number of values: 93828
total number of interactions: 46914
total time represented: 2395.7481309999994
total number of perfs in raw array: 7
done saving: datasets/empi-training-dataset-2d.npz
