### Development Notebook

This notebook contains development code of the NLP system data pipe for tensor_synth. The system consists of three main pieces:
- Encoding/Cleaning
- Model Training and Sequence Generation
- Decoding


In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf


#### Encoding/Cleaning
- load json data / basic cleaning (sort, drop id, reset indices etc.)
- calculate event duration and interarrival times
- convert and bin values
- concatenate rows into strings (words for NLP) and return as a pd.series

In [252]:
# sort, drop id column and reset index (supercollider seems to occasionally drop events )
df_raw = pd.read_json('./data/grain_improv2.json',orient='index').sort_values('noteOn_timestamp').drop('id',axis=1).reset_index(drop=True)

#calculate duration: noteOn[i] - noteOff[i]
df_raw['duration'] = df_raw.apply(lambda row: row['noteOff_timestamp'] - row['noteOn_timestamp'],axis=1)

# calculate interevent duration (wait time between events): noteOn[i] - noteOn[i-1] ...
# shifting and then dividing by -1 
df_raw['inter_event_duration'] = (df_raw['noteOn_timestamp'] - df_raw['noteOn_timestamp'].shift(-1))/ -1

# timestamps no longer needed
df_raw = df_raw.drop(['noteOn_timestamp','noteOff_timestamp'],axis=1)

#new_index = 
df_raw

Unnamed: 0,amp,freq,freq_dev,grain_dur,grain_dur_dev,grain_rate,grain_rate_dev,n_voices,rel,duration,inter_event_duration
0,102,69,0,0,0,30,0,0,0,0.014984,0.734959
1,102,68,0,0,0,30,0,0,0,0.094034,0.847042
2,102,74,0,0,0,30,0,0,0,5.391998,7.369022
3,102,79,0,0,0,30,0,0,0,3.685883,4.939011
4,102,72,0,0,0,30,0,0,0,0.001258,0.869961
5,102,77,0,0,0,30,0,0,0,0.273134,2.703010
6,102,79,0,0,0,30,0,0,0,0.012025,0.037994
7,102,71,0,0,0,30,0,0,0,2.350905,3.969937
8,102,73,0,0,0,30,0,0,0,3.948029,6.248043
9,102,72,0,0,0,30,0,0,0,3.230056,5.568060


In [3]:
import string
#used for amplitude
bin4 = [i for i in range(0,127+32,32)]  # need to add one 'step size' 
label4 = [string.ascii_lowercase[i] for i in range(4)]
# used for voices
bin8 = [i for i in range(0,127+16,16)]
label8 = [string.ascii_lowercase[i] for i in range(8)]

# used for grain rate/dur and deviations
bin16 = [i for i in range(0,127+8,8)]
label16 = [string.ascii_lowercase[i] for i in range(16)]

dur_bins = np.zeros(12)
dur_bins[1:] = np.logspace(-1,1.25,11)  # need to test edge cases for this one.. 
dur_labels = [string.ascii_lowercase[i] for i in range(11)]

In [7]:
binned_df = pd.DataFrame(columns=df_raw.columns)

In [8]:
# load the binned df using cut -- keep freq as integer value (for now.. )
binned_df['amp'] = pd.cut(df_raw['amp'], bin4,labels=label4, include_lowest=True)
binned_df['freq_dev'] = pd.cut(df_raw['freq_dev'], bin16,labels=label16,include_lowest=True)
binned_df['grain_dur'] = pd.cut(df_raw['grain_dur'], bin16,labels=label16,include_lowest=True)
binned_df['grain_dur_dev'] = pd.cut(df_raw['grain_dur_dev'],bin16,labels=label16,include_lowest=True)
binned_df['grain_rate']  = pd.cut(df_raw['grain_rate'],bin16,labels=label16,include_lowest=True)
binned_df['grain_rate_dev'] = pd.cut(df_raw['grain_rate_dev'],bin16,labels=label16,include_lowest=True)
binned_df['n_voices'] = pd.cut(df_raw['n_voices'],bin8,labels=label8,include_lowest=True)
binned_df['rel'] = pd.cut(df_raw['rel'], bin4,labels=label4,include_lowest=True)

binned_df['duration'] = pd.cut(df_raw['duration'],dur_bins,labels=dur_labels,include_lowest=True)
binned_df['inter_event_duration'] = pd.cut(df_raw['inter_event_duration'],dur_bins,labels=dur_labels,include_lowest=True)

binned_df['freq'] = df_raw['freq']
pd.cut(df_raw['amp'], bin4,labels=label4)

binned_df

Unnamed: 0,amp,freq,freq_dev,grain_dur,grain_dur_dev,grain_rate,grain_rate_dev,n_voices,rel,duration,inter_event_duration
0,d,69,a,a,a,d,a,a,a,a,e
1,d,68,a,a,a,d,a,a,a,a,f
2,d,74,a,a,a,d,a,a,a,i,j
3,d,79,a,a,a,d,a,a,a,h,i
4,d,72,a,a,a,d,a,a,a,a,f
5,d,77,a,a,a,d,a,a,a,c,h
6,d,79,a,a,a,d,a,a,a,a,a
7,d,71,a,a,a,d,a,a,a,h,i
8,d,73,a,a,a,d,a,a,a,i,i
9,d,72,a,a,a,d,a,a,a,h,i


In [9]:
cols = list(binned_df)
cols[1], cols[0] = cols[0], cols[1]
binned_df = binned_df.ix[:,cols]  # works for this sample, but may need to reorder if json file inputs differently
binned_df = binned_df.dropna().reset_index(drop=True)

In [10]:

word_series = binned_df.iloc[:,0].astype(str) + '_' # freq first then string of categories 
for i in range(1, len(binned_df.columns)):
    word_series += binned_df.iloc[:,i].astype(str)

In [11]:
# This example code persists the bins used for model inputs - translate.binning_specs.py
# The application will encode this bin list for future decoding -- translate.decoder.py 
word_series

import re
bin_list = pd.cut(df_raw['grain_dur'], bin16).cat.categories.tolist()

test = '[0.1, 0.5)'
re.findall('[-+]?\d+[\.]?\d*',test)
# [i for j in xx for i in k]
bin_list = [re.findall('[-+]?\d+[\.]?\d*',i) for i in bin_list]

for i in range(len(bin_list)):
    for j in range(len(bin_list[i])):
        bin_list[i][j] = int(bin_list[i][j])
    bin_list[i] = tuple(bin_list[i])
bin_list

[(0, 8),
 (8, 16),
 (16, 24),
 (24, 32),
 (32, 40),
 (40, 48),
 (48, 56),
 (56, 64),
 (64, 72),
 (72, 80),
 (80, 88),
 (88, 96),
 (96, 104),
 (104, 112),
 (112, 120),
 (120, 128)]

#### Training a Skip-Gram Tensorflow model and Generating a Sequence
- The word series is passed to an API that trains a Skip-Gram Model
- The final embedded vectors are evaluated and stored as a top level variable
- A string sequence of user specified length is generated using k-nearest neighbors


In [12]:
word_series # this is the format of the word_series input into the training API

0      69_daaadaaaae
1      68_daaadaaaaf
2      74_daaadaaaij
3      79_daaadaaahi
4      72_daaadaaaaf
5      77_daaadaaach
6      79_daaadaaaaa
7      71_daaadaaahi
8      73_daaadaaaii
9      72_daaadaaahi
10     78_daaadaaaaf
11     81_daaadaaaeg
12     75_daaadaaaii
13     76_daaadaaaae
14     82_daaadaaaae
15     68_daaadaaaik
16     78_dadcdaaaae
17     80_dadcdaaaeg
18     74_dadcdaaaag
19     75_dadcdaaagi
20     58_dadcdaaaae
21     63_dadcdaaabf
22     65_dadcdaaaaf
23     70_dadcdaaahh
24     58_dadcdaaaaf
25     63_dadcdaaaaf
26     62_dadcdaaaaf
27     70_dadcdaaahi
28     57_dadcdaaaae
29     63_dadcdaaaaf
           ...      
241    84_dcefddaaeg
242    84_dcefddaaac
243    94_dcefddaabd
244    90_dcefddaaac
245    89_dcefddaace
246    87_dcefddaace
247    96_dcefddaade
248    83_dcefddaaff
249    85_dcefddaafh
250    85_dcigddaaac
251    87_dcigddaabc
252    88_dcigddaabe
253    85_dcigddaaee
254    87_dcigddaaae
255    88_dcigddaacc
256    87_dcigddaace
257    88_dci

In [13]:
# this prepares the batch for processing
def prepare_batch(word_series):
    # join unique ids in order to create a sequence to integer dataframe
    # creates sentences
    word_series.name = None
    sentence_df = pd.DataFrame(word_series,columns=['words'])
    sentence_df['word_counts'] = sentence_df.groupby('words')['words'].transform('count')

    unique_words_df = pd.DataFrame(word_series.unique(),columns=['words']).reset_index(drop=True)
    sentence_df['word_counts'] = sentence_df.groupby('words')['words'].transform('count')

    unique_words_df = pd.merge( unique_words_df, sentence_df, 
        how='inner',on=['words']).drop_duplicates() \
        .sort_values('word_counts',ascending=False).reset_index(drop=True).reset_index()
    
    unique_words_sub = unique_words_df[['index','words']]
    seq_to_integer = pd.merge(sentence_df,unique_words_sub,how='left',on='words').drop('word_counts', axis=1)

    word_ints = np.array(seq_to_integer['index']) 
    return word_ints, seq_to_integer, unique_words_df
    

In [14]:
targets, target_index_map_df, unique_words_df = prepare_batch(word_series)
# This creates the unique words mapping
key_word_dict = unique_words_df['words'].to_dict()
word_key_dict = dict(zip(key_word_dict.values(),key_word_dict.keys()))
unique_words_df

Unnamed: 0,index,words,word_counts
0,0,66_dcdcdaaacc,4
1,1,84_dcdcdaaaeg,3
2,2,72_dcdcdaaaeg,3
3,3,78_dcdcdaaacc,3
4,4,65_dcdcdaaacc,3
5,5,65_dadcdaaaaf,3
6,6,68_dcdcdaaacd,3
7,7,90_dcefddaaac,3
8,8,89_dcccdcaade,2
9,9,64_dcdcdaaacc,2


In [15]:
def generate_batch_labels(targets, span=2):
    ''' 
    generates tuples of context words given a target
    span is the size of the target tuple: 
    ex: span = 2
    [anarchism, originated, as] --> originated, (anarchism,as)
    
    '''    
    batch = np.repeat(targets[span//2:-span//2],span)
    labels = []
    
    if span == 2:
        for i in range(0, len(targets)-span):  
            labels.append(targets[i])           # grabs i
            labels.append(targets[i+span])      # skips over target and grabs that span

    if span == 4:
        for i in range(0, len(targets)-span):
            labels.append(targets[i])
            labels.append(targets[i+1])
            labels.append(targets[i+span-1])
            labels.append(targets[i+span])
            
    labels = np.array(labels).reshape(len(labels),1) # need to add a dimension for nce loss
    return batch, labels

In [16]:
batch, labels = generate_batch_labels(targets,span=2)
len(labels)

538

In [29]:
# Tensorflow training script based on google's graph model in word2vec_basic.py

batch_size = len(batch)   # these all become class variabes
embedding_size = len(batch)
vocabulary_size = len(unique_words_df)
num_sampled = 24  # for nce negative sampling


graph = tf.Graph()
with graph.as_default():

    # Input data.
    train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
    train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
    valid_inputs = tf.constant(batch, dtype=tf.int32) # for computing cosine similarity 

    with tf.device("/cpu:0"):
        embeddings = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
        embed = tf.nn.embedding_lookup(embeddings, train_inputs)

        # Construct the variables for the NCE loss
        nce_weights = tf.Variable(tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / (embedding_size**0.5)))
        nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

          # Compute the average NCE loss for the batch.
          # tf.nce_loss automatically draws a new sample of the negative labels each
          # time we evaluate the loss.
        loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weights,
                         biases=nce_biases,
                         labels=train_labels,
                         inputs=embed,
                         num_sampled=num_sampled,
                         num_classes=vocabulary_size))
         # Construct the SGD optimizer using a learning rate of 1.0.
        optimizer = tf.train.GradientDescentOptimizer(2.0).minimize(loss)
        
          # Construct the SGD optimizer using a learning rate of 1.0.
        norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
        normalized_embeddings = embeddings / norm
        valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_inputs)
        similarity = tf.matmul(valid_embeddings, tf.transpose(normalized_embeddings))
        
        init = tf.global_variables_initializer()


In [30]:
with tf.Session(graph=graph) as session:
    init.run()
    print('tf.session initialized')
    
    average_loss = 0
    for step in range(1000):
        _, loss_val = session.run([optimizer,loss],feed_dict={train_inputs:batch, train_labels:labels})
    
        average_loss += loss_val  # this code tracks the loss and training progress
        if step%200 == 0:
            if step > 0:
                average_loss /= 100
            print('Average loss at step ', step,": ",average_loss)
            average_loss = 0
    
    final_embeddings = normalized_embeddings.eval()
    final_cos_similarity = similarity.eval()

tf.session initialized
Average loss at step  0 :  52.0377845764
Average loss at step  200 :  8.41405323625
Average loss at step  400 :  1.61588158429
Average loss at step  600 :  1.58885053158
Average loss at step  800 :  1.56718878686


In [52]:
def get_knn_of_target(target, size=8):
    ''' 
    helper function that generates a list of nearest neighbors for a given input
    target is a word string that gets converted
    size is number of neighbors
    '''
    target_word_key = word_key_dict[target]  # gets key from input string - word_key instance var
    top_k = size
    nearest = (-final_cos_similarity[i,:].argsort()[1:top_k + 1])   # key word instance var
    knn = np.array([key_word_dict[-nearest[k]] for k in range(top_k)])
    return knn


def generate_word_sequence(num_output_words):
    ''' 
    generates a simple word sequence of size n using cosine similarity from each previous word
    num_ouput_words is the length of the output sequence
    '''
    word_seq = [] # the string values
    for i in range(0,num_output_words):
        if i == 0:
            word_seq.append(key_word_dict[np.random.choice(list(key_word_dict.keys()))])
            continue
        knn = get_knn_of_target(word_seq[i-1])
        word_seq.append(key_word_dict[word_key_dict[np.random.choice(knn)]])
        
    return word_seq
    
    

In [53]:
get_knn_of_target('66_dcdcdaaacc')

array(['77_dcdcdaaacd', '75_dcdcdaaacc', '70_dadcdaaahh', '67_dcdcdbaacc',
       '67_dcdcdaaabc', '72_daaadaaahi', '77_dcdcdaaabc', '68_daaadaaaaf'], 
      dtype='<U13')

In [54]:
np.random.choice(get_knn_of_target('66_dcdcdaaacc'))


'77_dcdcdaaacd'

In [55]:
generate_word_sequence(10)  # output word_sequence

['79_dcdcdbaaaa',
 '68_daaadaaaaf',
 '67_dcdcdbaacc',
 '68_daaadaaaaf',
 '77_dcdcdaaacd',
 '67_dcdcdaaabc',
 '77_dcdcdaaacd',
 '77_dcdcdaaacd',
 '67_dcdcdaaabc',
 '68_daaadaaaaf']

#### Decoding

- Specs for the binnings need to be stored in a config file/data structure 
- After tensorflow produces results they need to be decoded back to midi-json format
- midi values are randomized within the binned ranges and returned to supercollider


In [302]:
# split the string up into a list with number, letters

word_seq_output = generate_word_sequence(20)

In [304]:
word_seq_output

['70_dadcdaaagh',
 '82_dcdcdaaade',
 '79_dcdcdaaacd',
 '76_daaadaaaae',
 '66_dcdcdaaadd',
 '75_dcdcdaaabc',
 '75_dcdcdaaabc',
 '76_daaadaaaae',
 '82_dcdcdaaade',
 '79_dcdcdaaacd',
 '80_dcdcdaaaee',
 '75_dcdcdaaabc',
 '82_dcdcdaaade',
 '68_dcdcdbaabe',
 '66_dcdcdaaadd',
 '79_dcdcdaaacd',
 '80_dcdcdaaaee',
 '76_daaadaaaae',
 '68_dcdcdbaabe',
 '79_dcdcdaaacd']

In [305]:

def split_word(words):
    ''' splits up the word strings'''
    words = [i.split('_') for i in words]
    for i in range(len(words)):
        words[i][0] = [words[i][0]]
        words[i][1] = list(words[i][1])
        words[i] = words[i][0] + words[i][1]
    return np.array(words)   

In [306]:
split_arr = split_word(word_seq_output)

output_df = pd.DataFrame(split_arr, columns=[
    'freq','amp','freq_dev','grain_dur',
    'grain_dur_dev','grain_rate','grain_rate_dev',
    'n_voices','rel','duration','inter_event_duration'
])

In [307]:
# unbin the dataframe using the model's config.json file

import json
# first open config file and read in the embeddings
def load_config_json_file(config_file_path):
    with open(config_file_path, 'r') as config_data:
        config = json.load(config_data)
    
    return config

config = load_config_json_file('./data/test_config_output.json')
encoding_dict = config['encodings']    # just the encodings 



In [308]:
import string 

def unbin_int_callback(row, bin_data, col_name):
    inputs = [string.ascii_lowercase[i] for i in range(len(bin_data))]
    bin_indx = inputs.index(row[col_name])  # gets replaced with iterator
    low, high = bin_data[bin_indx][0], bin_data[bin_indx][1]
    if col_name == 'duration' or col_name == 'inter_event_duration':
        return np.random.uniform(low, high)
    return np.random.randint(low,high)


In [309]:
for k,v in encoding_dict.items():
    if k == 'freq':
        continue
    output_df[k] = output_df.apply(unbin_int_callback,args=(v,k,),axis=1)
output_df

Unnamed: 0,freq,amp,freq_dev,grain_dur,grain_dur_dev,grain_rate,grain_rate_dev,n_voices,rel,duration,inter_event_duration
0,70,98,0,26,20,29,6,11,23,2.136798,2.714013
1,82,118,21,29,16,25,1,8,27,0.383234,0.591628
2,79,97,21,24,21,24,2,7,24,0.280902,0.322052
3,76,122,7,0,3,28,0,2,31,0.050013,0.558914
4,66,97,20,24,17,24,4,15,6,0.461238,0.437519
5,75,111,21,29,19,24,1,2,24,0.156919,0.20647
6,75,113,23,30,17,29,3,1,21,0.110496,0.229511
7,76,126,6,6,4,29,6,0,24,0.075901,0.733226
8,82,125,18,26,18,28,0,5,28,0.399129,0.623104
9,79,117,21,27,16,26,5,10,14,0.247583,0.472013


In [310]:
output_df.to_json('./data/tensor_synth_midi_output.json',orient='index')