# Setup

In [1]:
import numpy as np
import tensorflow as tf

# Helper Functions
- One-Hot Encoder

In [2]:
def one_hot(x):
    
    x = tf.keras.backend.argmax(x)
    x = tf.one_hot(indices=x, depth=78)
    x = tf.keras.layers.RepeatVector(n=1)(x)
    
    return x

# Data
## Overview
- `X`: (m, $T_x$, 78) dimensional array. 
    - We have m training examples, each of which is a snippet of $T_x =30$ musical values. 
    - At each time step, the input is one of 78 different possible values, represented as a one-hot vector. 
        - For example, X[i,t,:] is a one-hot vector representing the value of the i-th example at time t. 

- `Y`: $(T_y, m, 78)$ dimensional array
    - This is essentially the same as `X`, but shifted one step to the left (to the past). 
    - Notice that the data in `Y` is **reordered** to be dimension $(T_y, m, 78)$, where $T_y = T_x$. This format makes it more convenient to feed into LSTM later.
    - We're using the previous values to predict the next value.
        - So our sequence model will try to predict $y^{\langle t \rangle}$ given $x^{\langle 1\rangle}, \ldots, x^{\langle t \rangle}$. 

- `n_x`: The number of unique values in this dataset. This should be 78. 

- `idx_to_values`: python dictionary mapping integers 0 through 77 to musical values.

In [3]:
X, Y, n_x, idx_to_values = np.load('jazz_data.npy')

print('number of training examples:', X.shape[0])
print('Tx (length of sequence):', X.shape[1])
print('total # of unique values:', n_x)
print('shape of X:', X.shape)
print('Shape of Y:', Y.shape)

number of training examples: 60
Tx (length of sequence): 30
total # of unique values: 78
shape of X: (60, 30, 78)
Shape of Y: (30, 60, 78)


# Model
* $X = (x^{\langle 1 \rangle}, x^{\langle 2 \rangle}, \cdots, x^{\langle T_x \rangle})$ is a window of size $T_x$ scanned over the musical corpus. 
* Each $x^{\langle t \rangle}$ is an index corresponding to a value.
* $\hat{y}^{t}$ is the prediction for the next value.
* We will be training the model on random snippets of 30 values taken from a much longer piece of music. 
    - Thus, we won't bother to set the first input $x^{\langle 1 \rangle} = \vec{0}$, since most of these snippets of audio start somewhere in the middle of a piece of music. 
    - We are setting each of the snippets to have the same length $T_x = 30$ to make vectorization easier.

1. Build model

In [4]:
# number of total time-steps & hidden units of LSTM cell
T_x, n_a = 30,  64

# initialize different layers
Reshapor = tf.keras.layers.Reshape((1, n_x))
LSTM = tf.keras.layers.LSTM(units=n_a, return_state=True)
Densor = tf.keras.layers.Dense(units=n_x, activation='softmax')

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [5]:
def jazz_model(T_x, n_a, n_x):
    
    # Input layers: 
    X = tf.keras.layers.Input(shape=(T_x, n_x))
    
    # Initialize hidden state a0 and cell state c0
    a0 = tf.keras.layers.Input(shape=(n_a, ), name='a0')
    c0 = tf.keras.layers.Input(shape=(n_a, ), name='c0')
    a = a0
    c = c0
    
    # Initialize output
    outputs = []
    
    # Loop
    for t in range(T_x):
        
        ## select the t-th time-step vector from X
        x = tf.keras.layers.Lambda(lambda z: z[:, t, :])(X)
        
        ## reshape x to shape (1, n_x)
        x = Reshapor(x)
        
        ## one step of LSTM
        a, _, c = LSTM(inputs=x, initial_state=[a, c])
        
        ## compute output
        output = Densor(a)
        
        ## record output
        outputs.append(output)
    
    # Create model instance
    model = tf.keras.models.Model(inputs=[X, a0, c0], outputs=outputs)
    
    return model

In [6]:
model = jazz_model(T_x, n_a, n_x)

model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 30, 78)]     0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 78)           0           input_1[0][0]                    
__________________________________________________________________________________________________
reshape (Reshape)               (None, 1, 78)        0           lambda[0][0]                     
                                                                 lambda_1[0][0]                   
                                                                 lambda_2[0][0]                   
                                                                 lambda_3[0][0]               

2. Compile model

In [7]:
# set Adam optimizer
optimizer = tf.keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, decay=0.01)

# compile model
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

3. Training

In [8]:
# initialize hidden state and cell state to be zero
m = X.shape[0]
a0 = np.zeros((m, n_a))
c0 = np.zeros((m, n_a))

# training
## list(Y) is to convert Y to a list with 30 items, each of shape (60, 78)
model.fit(x=[X, a0, c0], y=list(Y), epochs=100)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100


Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100


Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100


Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100


Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100


Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100


Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100


Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100


Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100


Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100


Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100


Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100


Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100


Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100


Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100


Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100


Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x7fad07fbcc18>

# Generate Jazz
1. Build model: generate $T_y$ time-steps of music

In [9]:
def generate_music_model(LSTM, Densor, n_x, n_a, T_y):
    
    # Input layer
    x0 = tf.keras.layers.Input(shape=(1, n_x))
    x = x0
    
    # Initialize hidden state a0 and cell state c0
    a0 = tf.keras.layers.Input(shape=(n_a, ), name='a0')
    c0 = tf.keras.layers.Input(shape=(n_a, ), name='c0')
    a = a0
    c = c0
    
    # Initialize output
    outputs = []
    
    # Loop
    for t in range(T_y):
        
        ## one step of LSTM
        a, _, c = LSTM(inputs=x, initial_state=[a, c])
        
        ## compute output
        output = Densor(a)
        
        ## record output
        outputs.append(output)
        
        ## generate next value for x
        x = tf.keras.layers.Lambda(one_hot)(output)
    
    # Create model instance
    model = tf.keras.models.Model(inputs=[x0, a0, c0], outputs=outputs)
    
    return model

In [10]:
inference_model = generate_music_model(LSTM, Densor, n_x, n_a, T_y=50)

inference_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 1, 78)]      0                                            
__________________________________________________________________________________________________
a0 (InputLayer)                 [(None, 64)]         0                                            
__________________________________________________________________________________________________
c0 (InputLayer)                 [(None, 64)]         0                                            
__________________________________________________________________________________________________
lstm (LSTM)                     [(None, 64), (None,  36608       input_2[0][0]                    
                                                                 a0[0][0]                   

2. Predict and Sample

In [11]:
def predict_sample(inference_model, x0, a0, c0):
    
    # Predict and output a list of length = T_y, each element is an array of shape (1, n_x)
    preds = inference_model.predict(x=[x0, a0, c0])
    
    # Convert prediction to an ndarray of indices with the max probabilities, shape (T_y, 1)
    indices = np.argmax(preds, axis=-1)
    
    # Convert indices to ont-hot vectors of shape (T_y, n_x)
    results = tf.keras.utils.to_categorical(y=indices, num_classes=n_x)
    
    return results, indices

In [12]:
# initialization for x0, a0, c0
x0 = np.zeros((1, 1, n_x))
a0 = np.zeros((1, n_a))
c0 = np.zeros((1, n_a))

# predict and sample
results, indices = predict_sample(inference_model, x0, a0, c0)