In [None]:
from __future__ import print_function
import os
import time
import pandas as pd
import numpy as np
import tensorflow as tf
import time
import os
import datetime
import pickle
from urllib import urlretrieve
import matplotlib.pyplot as plt
import random
from itertools import permutations

URL = "https://github.com/anucc/metatone-analysis/raw/master/metadata/"
PICKLE_FILE = "metatone_performances_dataframe.pickle"

if not os.path.exists(PICKLE_FILE):
    urlretrieve(URL + PICKLE_FILE, PICKLE_FILE)

with open(PICKLE_FILE, 'rb') as f:
        metatone_dataset = pickle.load(f)
        
## Int values for Gesture codes.
NUMBER_GESTURES = 9
GESTURE_CODES = {
    'N': 0,
    'FT': 1,
    'ST': 2,
    'FS': 3,
    'FSA': 4,
    'VSS': 5,
    'BS': 6,
    'SS': 7,
    'C': 8}

vocabulary_size = len(GESTURE_CODES)

In [None]:
"""
Encodes (and decodes) multiple metatone performance gesture codes into single natural numbers.
Given n gesture codes g_1,g_2,\ldots,g_n in the range [1,j-1], these can be encoded as a unique integer:
g_1j^0 + g_2j^1 + \ldots + g_nj^(n-1)
And subsequently decoded into the original ordered set.
"""

# Int values for Gesture codes.
NUMBER_GESTURES = 9
GESTURE_CODES = {
    'N': 0,
    'FT': 1,
    'ST': 2,
    'FS': 3,
    'FSA': 4,
    'VSS': 5,
    'BS': 6,
    'SS': 7,
    'C': 8}


def encode_ensemble_gestures(gestures):
    """Encode multiple natural numbers into one"""
    encoded = 0
    for i, g in enumerate(gestures):
        encoded += g * (len(GESTURE_CODES) ** i)
    return encoded


def decode_ensemble_gestures(num_perfs, code):
    """Decodes ensemble gestures from a single int"""
    # TODO: Check that this works correctly now.
    gestures = []
    for i in range(num_perfs):
        part = code % (len(GESTURE_CODES) ** (i + 1))
        gestures.append(part // (len(GESTURE_CODES) ** i))
    return gestures


In [None]:
## Isolate Individual Performances
improvisations = metatone_dataset[
    (metatone_dataset["performance_type"] == "improvisation") &
    (metatone_dataset["performance_context"] != "demonstration") &
    (metatone_dataset["number_performers"] != 4)]
gesture_data = improvisations['gestures']
individual_improvisations = []
for perf in gesture_data.tolist():
    for one_perf in perf.T:
        individual_improvisations.append(one_perf)
print("Number of performances for testing: ", len(individual_improvisations))
        
## Isolate the Interesting Ensemble Performances
improvisations = metatone_dataset[
    (metatone_dataset["performance_type"] == "improvisation") &
    (metatone_dataset["performance_context"] != "demonstration") &
    (metatone_dataset["number_performers"] == 4)]
gesture_data = improvisations['gestures']
#metatone_dataset["number_performers"]

num_input_performers = 4
num_output_performers = 3

ensemble_improvisations = gesture_data.tolist()

## Setup the epochs
## Each batch is of single gestures as input and tuples of remaining performers as output
def generate_epochs(num_epochs, num_steps, batch_size):
    ## Setup the inputs and label sets
    imp_xs = []
    imp_ys = []
    
    for imp in ensemble_improvisations:
        for i in range(len(imp)-num_steps-1):
            imp_slice = imp[i:i+num_steps+1]
            for j in range(len(imp_slice.T)):
                lead = imp_slice[1:].T[j] # lead gestures (post steps)
                ensemble = imp_slice.T[np.arange(len(imp_slice.T)) != j] # rest of the players indexed by player
                for ens_perm in permutations(ensemble): # consider all permutations of the players
                    ens_pre = np.array(ens_perm).T[:-1] # indexed by time slice
                    ens_post = np.array(ens_perm).T[1:] # indexed by time slice
                    y = map(encode_ensemble_gestures,ens_post)
                    #y = ens_post # test just show the gestures
                    x = map(encode_ensemble_gestures,zip(lead,*(ens_pre.T))) # encode ensemble state
                    #x = zip(lead,*(ens_pre.T)) # test just show the gestures
                    imp_xs.append(x) # append the inputs
                    imp_ys.append(y) # append the outputs
    dataset = zip(imp_xs,imp_ys)
    print("Total Training Examples: " + str(len(imp_xs)))
    print("Total Training Labels: " + str(len(imp_ys)))
    epochs = []
    for j in range(num_epochs):
        # shutffle the big list
        np.random.shuffle(dataset)
        dataset_size = len(dataset)
        batches = []
        for i in range(dataset_size / batch_size):
            ## Setup the batches
            batch = dataset[i*batch_size:(i+1)*batch_size]
            bx,by = zip(*batch)
            batches.append((np.array(bx),np.array(by)))
        epochs.append(batches)
    return epochs

In [3]:
import numpy as np
import h5py

num_steps = 120

examples_file = "../MetatoneQuartetExamples-" + str(num_steps) + "steps" + ".h5"

with h5py.File(examples_file, 'r') as data_file:
        dataset = data_file['examples'][:]
        validation_set = data_file['validation'][:]

In [42]:
# dataset file has examples of shape (2, num_steps), 
#where the first row is the input, and second 
# is the correct output sequence.
# First step is to adapt it into many-to-one format
X = dataset[:,0,:] # take all the input sequences
y = dataset[:,1,-1] # just select the last one of each output sequence
y = y.reshape((-1,1)) # reshape to (n,1)
print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (343482, 120)
y shape: (343482, 1)


In [12]:
import keras

Using TensorFlow backend.


In [44]:
num_layers = 1
batch_size = 64
num_units = 32
num_steps = 120

num_input_performers = 4
num_output_performers = 3

vocabulary_size = 9 # len(GESTURE_CODES)
num_input_classes = vocabulary_size ** num_input_performers
num_output_classes = vocabulary_size ** num_output_performers

training_model = keras.models.Sequential()
training_model.add(keras.layers.Embedding(num_input_classes, num_units, input_length=num_steps))
for n in range(num_layers - 1):
    training_model.add(keras.layers.LSTM(num_units, return_sequences=True))
training_model.add(keras.layers.LSTM(num_units))
training_model.add(keras.layers.Dense(num_output_classes, activation='softmax'))
# model.add(Activation('softmax'))

training_model.compile(loss='sparse_categorical_crossentropy', optimizer='Adam')
training_model.summary()

# Notes:
# Difficulty of this task is learning from a relatively large input space:
print("Number of input classes:", num_input_classes)
print("Number of output classes", num_output_classes)
# It's handy to use an Embedding layer so that we can learn from integer
# inputs (not one-hot)
# This means that for lower 'num_units', the parameters used for the input 
# embedding outnumber the LSTM layers.

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_15 (Embedding)     (None, 120, 32)           209952    
_________________________________________________________________
lstm_23 (LSTM)               (None, 32)                8320      
_________________________________________________________________
dense_12 (Dense)             (None, 729)               24057     
Total params: 242,329
Trainable params: 242,329
Non-trainable params: 0
_________________________________________________________________
Number of input classes: 6561
Number of output classes 729


In [45]:
training_model.fit(X, y, batch_size=batch_size)

Train on 291959 samples, validate on 51523 samples
Epoch 1/1

KeyboardInterrupt: 