In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook

In [44]:
data = pd.read_csv("2018-06-21-block-perf1.txt")
data['dt'] = data.time.diff().fillna(0)

In [45]:
data.columns

Index(['time', 'ip', 'message', 'dt'], dtype='object')

In [46]:
data['x'] = data.message.apply((lambda x: int(x.split(' ')[2])))
data['y'] = data.message.apply((lambda x: int(x.split(' ')[3])))
data['z'] = data.message.apply((lambda x: int(x.split(' ')[4])))

In [75]:
cleaned_data = data[['dt', 'x', 'y', 'z']]
cleaned_data.describe()

Unnamed: 0,dt,x,y,z
count,10730.0,10730.0,10730.0,10730.0
mean,0.029337,64.899068,62.779683,55.443989
std,0.08548,32.687723,31.414097,30.857785
min,0.0,0.0,0.0,0.0
25%,0.027402,35.0,37.25,35.0
50%,0.028018,65.0,59.0,49.0
75%,0.028571,93.0,89.0,68.0
max,8.811069,127.0,127.0,127.0


In [76]:
array_version = np.array(data[['x','y','z']])
array_version.shape

(10730, 3)

In [77]:
# Test saving
np.savez('roli-block-session-data.npz', array_version)

In [89]:
# Test loading
with np.load('roli-block-session-data.npz') as data:
    dataset = data['arr_0']
    
dataset.shape

(10730, 3)

In [112]:
# Look at Gesture-RNN version:
# URL: https://github.com/lamtharnhantrakul/GestureRNN-ML4Lightpad/blob/master/data/test_data.txt
# Download (if needed)
import urllib.request
url = 'https://github.com/lamtharnhantrakul/GestureRNN-ML4Lightpad/raw/master/data/test_data.txt'
urllib.request.urlretrieve(url, './gesture-rnn-data.txt')
# Parse the file
grnn_df = pd.read_csv('./gesture-rnn-data.txt', delim_whitespace=True, header=None, lineterminator=';')
grnn_df = grnn_df[[1,2,3]]
grnn_df.columns = ['x','y','z']
# Prove it works
print(grnn_df.head())
# Do some analysis
print(grnn_df.describe())

grnn_array = np.array(grnn_df)
print(grnn_array.shape)
np.savez('grnn-data.npz', grnn_array)
#data['x'] = data.message.apply((lambda x: int(x.split(' ')[2])))
#data['y'] = data.message.apply((lambda x: int(x.split(' ')[3])))
#data['z'] = data.message.apply((lambda x: int(x.split(' ')[4])))

          x         y    z
0  0.461538  0.615385  0.0
1  0.461538  0.615385  0.0
2  0.461538  0.615385  0.0
3  0.461538  0.615385  0.0
4  0.461538  0.615385  0.0
                 x            y            z
count  3805.000000  3805.000000  3805.000000
mean      0.520070     0.480631     0.218433
std       0.297183     0.281762     0.279394
min       0.000000     0.000000     0.000000
25%       0.234676     0.230769     0.000000
50%       0.538462     0.495482     0.113725
75%       0.769231     0.692308     0.305882
max       0.999950     0.999950     1.000000
(3806, 3)


In [113]:
# Test loading
with np.load('grnn-data.npz') as data:
    dataset = data['arr_0']
dataset.shape

(3806, 3)

In [129]:
import keras

# Training Hyperparameters:
SEQ_LEN = 30
BATCH_SIZE = 256
HIDDEN_UNITS = 64
EPOCHS = 30
VAL_SPLIT=0.2

# These settings train for 2.1 epochs which is pretty good!
SEED = 2345  # 2345 seems to be good.
np.random.seed(SEED)
# tf.set_random_seed(5791)  # only works for current graph.

encoder = keras.Sequential()
encoder.add(keras.layers.LSTM(HIDDEN_UNITS, batch_input_shape=(None,SEQ_LEN,3), return_sequences=True))
encoder.add(keras.layers.LSTM(HIDDEN_UNITS))
encoder.add(keras.layers.Dense(3, activation='relu'))
encoder.compile(loss='mse', optimizer=keras.optimizers.Adam())
encoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 30, 64)            17408     
_________________________________________________________________
lstm_6 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 195       
Total params: 50,627
Trainable params: 50,627
Non-trainable params: 0
_________________________________________________________________


In [120]:
def slice_sequence_examples(sequence, num_steps):
    xs = []
    for i in range(len(sequence) - num_steps - 1):
        example = sequence[i: i + num_steps]
        xs.append(example)
    print("Total training examples:", str(len(xs)))
    return xs

def seq_to_singleton_format(examples):
    """
    Return the examples in seq to singleton format.
    """
    xs = []
    ys = []
    for ex in examples:
        xs.append(ex[:-1])
        ys.append(ex[-1])
    return (xs,ys)


X, y = seq_to_singleton_format(slice_sequence_examples(grnn_array, SEQ_LEN+1))
X = np.array(X)
y = np.array(y)

print("X:", X.shape)
print("y:", y.shape)

Total training examples: 3774
X: (3774, 30, 3)
y: (3774, 3)


In [130]:
history = encoder.fit(X, y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=VAL_SPLIT)

Train on 8558 samples, validate on 2140 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [127]:
X, y = seq_to_singleton_format(slice_sequence_examples(array_version, SEQ_LEN+1))
X = np.array(X)
y = np.array(y)

Total training examples: 10698


In [125]:
array_version = array_version

array([[29, 88,  4],
       [29, 30,  1],
       [29, 34,  8],
       ..., 
       [ 2, 12, 48],
       [ 2, 12, 45],
       [ 0, 12, 35]])

In [126]:
array_version = array_version / 128
array_version

array([[ 0.2265625,  0.6875   ,  0.03125  ],
       [ 0.2265625,  0.234375 ,  0.0078125],
       [ 0.2265625,  0.265625 ,  0.0625   ],
       ..., 
       [ 0.015625 ,  0.09375  ,  0.375    ],
       [ 0.015625 ,  0.09375  ,  0.3515625],
       [ 0.       ,  0.09375  ,  0.2734375]])

# Comments!

Interesting, it looks as if the GRNN data is very easy to train, loss below 0.01 in 30 epochs (30 seconds) of training.

What kind of analysis can be done to investigate this?