In [1]:
import tensorflow as tf 
tf.enable_eager_execution()
import numpy as np
import os
import time
import functools
import util
import matplotlib.pyplot as plt
import __init__ as util1

In [2]:
text = open('data/irish.abc').read()

In [3]:
vocab = sorted(set(text))

In [4]:
char2idx= {u:i for i,u in enumerate(vocab)}
text_as_int = np.array([char2idx[c] for c in text])
idx2char = np.array(vocab)

In [5]:
print('{')
for char,_ in zip(char2idx, range(20)):
    print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')

{
  '\n':   0,
  ' ' :   1,
  '!' :   2,
  '"' :   3,
  '#' :   4,
  "'" :   5,
  '(' :   6,
  ')' :   7,
  ',' :   8,
  '-' :   9,
  '.' :  10,
  '/' :  11,
  '0' :  12,
  '1' :  13,
  '2' :  14,
  '3' :  15,
  '4' :  16,
  '5' :  17,
  '6' :  18,
  '7' :  19,
  ...
}


In [6]:
print ('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13]))

'X:1\nT:Alexand' ---- characters mapped to int ---- > [49 22 13  0 45 22 26 67 60 79 56 69 59]


In [7]:
seq_length = 100
examples_per_epoch = len(text)//seq_length

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences = char_dataset.batch(seq_length+1,drop_remainder=True)

In [8]:
def split(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text,target_text

dataset = sequences.map(split)

In [9]:
for input_example, target_example in dataset.take(1):
    for i,(input_idx,target_idx) in enumerate(zip(input_example[:5], target_example[:5] )):
        print("Step {:4d}".format(i))
        print("  input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
        print("  expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

Step    0
  input: 49 ('X')
  expected output: 22 (':')
Step    1
  input: 22 (':')
  expected output: 13 ('1')
Step    2
  input: 13 ('1')
  expected output: 0 ('\n')
Step    3
  input: 0 ('\n')
  expected output: 45 ('T')
Step    4
  input: 45 ('T')
  expected output: 22 (':')


In [10]:
batch_size = 64
steps_per_epoch = examples_per_epoch//batch_size

buffer_size = 10000
dataset = dataset.shuffle(buffer_size).batch(batch_size,drop_remainder=True)

# Define RNN (LSTM) Model

In [11]:
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024

In [12]:
LSTM = tf.keras.layers.CuDNNLSTM
LSTM = functools.partial(LSTM,return_sequences=True,recurrent_initializer='glorot_uniform',stateful=True)

In [13]:
def build_model(vocab_size,embedding_dim,rnn_units,batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size,embedding_dim,
                                 batch_input_shape=[batch_size,None]),
        LSTM(rnn_units),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

In [14]:
model = build_model(len(vocab),embedding_dim,rnn_units,batch_size)

In [15]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           21248     
_________________________________________________________________
cu_dnnlstm (CuDNNLSTM)       (64, None, 1024)          5251072   
_________________________________________________________________
dense (Dense)                (64, None, 83)            85075     
Total params: 5,357,395
Trainable params: 5,357,395
Non-trainable params: 0
_________________________________________________________________


In [16]:
for input_example_batch, target_example_batch in dataset.take(1): 
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 83) # (batch_size, sequence_length, vocab_size)


In [17]:
sampled_indices = tf.random.multinomial(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

In [18]:
sampled_indices

array([75, 56, 30, 19, 78, 36, 24, 11, 51, 39, 63, 74,  8,  0, 62, 76, 32,
       25,  8, 16, 18, 33, 25, 30, 79, 42, 19, 62, 78,  0,  4, 21, 37, 16,
       69, 47, 54, 12, 77, 73, 74, 35, 52, 48, 82, 31, 82, 41, 38, 42, 40,
       70, 37, 79, 25, 48, 18, 77, 19, 37, 27, 13, 21,  5, 32, 32, 43,  0,
       51, 74, 45, 13, 42, 36, 19, 34, 13, 31, 50, 58, 60, 73, 34, 27, 35,
       50, 16,  0, 43, 20, 19,  4, 35,  6, 34, 25, 33, 79, 57, 36])

In [19]:
print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ])))

Input: 
 "2 gfed|cAeA fAed|cdef gfef|!\ngBB2 gBaB|gBB2 gfed|cdef gfec|dfec d2:|!\n\nX:68\nT:Dillon's Fancy\nZ: id:d"

Next Char Predictions: 
 "taE7wK=/ZNhs,\nguG>,46H>ExQ7gw\n#9L4nV^0vrsJ[W|F|PMQOoLx>W6v7LB19'GGR\nZsT1QK7I1FYcerIBJY4\nR87#J(I>HxbK"


In [20]:
def lossi(labels,logits):
    return tf.keras.backend.sparse_categorical_crossentropy(labels,logits,from_logits=True)

example_batch_loss = lossi(target_example_batch,example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)") 
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (64, 100, 83)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.4190216


In [21]:
epochs = 10
optimizer = tf.train.AdamOptimizer()


checkpoint_dir = './training_chkponts'
checkpoint_prefix = os.path.join(checkpoint_dir,"ckpt_")

history = []
# plotter = util1.PeriodicPlotter(sec=1,xlabel='Iterations',ylabel='Loss')

for epoch in range(epochs):
    hidden = model.reset_states()
    
#     custom_msg = util1.custom_progress_text("Loss : %(loss)2.2f")
#     bar = util1.create_progress_bar(custom_msg)
    for inp, target in (dataset):
        with tf.GradientTape() as tape:
            lossia = lossi(target,model(inp))
            grads = tape.gradient(lossia,model.trainable_variables)
            optimizer.apply_gradients(zip(grads,model.trainable_variables))
            history.append(lossia.numpy().mean())
#             custom_msg.update_mapping(loss=history[-1])
#             plotter.plot(history)
    model.save_weights(checkpoint_prefix.format(epoch=epoch))

In [22]:
history

[4.418793,
 4.3752522,
 3.780592,
 3.76109,
 3.6996508,
 3.547746,
 3.4951887,
 3.4046812,
 3.373907,
 3.3902133,
 3.464433,
 3.3601978,
 3.4228723,
 3.3167646,
 3.3637993,
 3.4170356,
 3.3419876,
 3.3444624,
 3.3420892,
 3.390226,
 3.3628933,
 3.3801916,
 3.3537023,
 3.2436635,
 3.307915,
 3.2845602,
 3.333327,
 3.2732892,
 3.2140276,
 3.235311,
 3.8831065,
 3.731568,
 3.435525,
 3.088342,
 3.113435,
 3.0615542,
 3.100753,
 3.03721,
 3.067346,
 2.918993,
 2.9774315,
 2.8924835,
 2.880531,
 2.9288635,
 2.8237922,
 2.8320215,
 2.835042,
 2.8036416,
 2.7438953,
 2.7130022,
 2.737503,
 2.7120583,
 2.666442,
 2.63426,
 2.6001081,
 2.6264374,
 2.530113,
 2.516898,
 2.492437,
 2.492936,
 2.4953377,
 2.359719,
 2.4450073,
 2.4236479,
 2.4038906,
 2.3441515,
 2.304076,
 2.303994,
 2.3492954,
 2.2239385,
 2.2206037,
 2.192122,
 2.1843913,
 2.1106339,
 2.0891309,
 2.084842,
 2.1127484,
 2.0266662,
 2.061989,
 2.0371702,
 2.0219824,
 2.0112062,
 1.9472604,
 1.9306308,
 1.9230396,
 1.9193436,
 1.9

In [23]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [24]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            21248     
_________________________________________________________________
cu_dnnlstm_1 (CuDNNLSTM)     (1, None, 1024)           5251072   
_________________________________________________________________
dense_1 (Dense)              (1, None, 83)             85075     
Total params: 5,357,395
Trainable params: 5,357,395
Non-trainable params: 0
_________________________________________________________________


In [25]:
def generate_text(model, start_string, generation_length=1000):
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)
    text_generated = []
    
    model.reset_states()
    
    for i in (range(generation_length)):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)
        
        predicted_id = tf.multinomial(predictions, num_samples=1)[-1,0].numpy() # TODO 
        
        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id]) # TODO 

    return (start_string + ''.join(text_generated))

In [26]:
text = generate_text(model, start_string="X")

In [None]:
util.play_generated_song(text)