<a href="https://colab.research.google.com/github/djlancelot/rejtification/blob/master/notebook/Rejtification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
%tensorflow_version 2.x
import tensorflow as tf
from tqdm import tqdm
import numpy as np
import os
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [2]:
!wget https://github.com/djlancelot/rejtification/raw/master/data/selection.txt

--2020-12-04 08:02:29--  https://github.com/djlancelot/rejtification/raw/master/data/selection.txt
Resolving github.com (github.com)... 140.82.112.3
Connecting to github.com (github.com)|140.82.112.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/djlancelot/rejtification/master/data/selection.txt [following]
--2020-12-04 08:02:30--  https://raw.githubusercontent.com/djlancelot/rejtification/master/data/selection.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1193731 (1.1M) [text/plain]
Saving to: ‘selection.txt’


2020-12-04 08:02:31 (18.1 MB/s) - ‘selection.txt’ saved [1193731/1193731]



In [3]:
with open("selection.txt", "r") as selection:
  text = selection.read()

In [4]:
text[:10]

'\nRejtő Jen'

In [5]:
len(text)

1096489

In [19]:
vocab = sorted(set(text))
idx2char = np.array(vocab)
char2idx = {v: k for k, v in enumerate(vocab)}
text_vec = np.array([char2idx[char] for char in text])
text_vec[:5]

array([ 1, 42, 55, 60, 70])

In [20]:
def get_batch(vector, seq_length, batch_size):
  # the length of the vectorized songs string
  n = vector.shape[0] - 1
  # randomly choose the starting indices for the examples in the training batch
  idx = np.random.choice(n-seq_length, batch_size)

  '''TODO: construct a list of input sequences for the training batch'''
  input_batch = [vector[i : i+seq_length] for i in idx]
  '''TODO: construct a list of output sequences for the training batch'''
  output_batch = [vector[i+1 : i+seq_length+1] for i in idx]

  # x_batch, y_batch provide the true inputs and targets for network training
  x_batch = np.reshape(input_batch, [batch_size, seq_length])
  y_batch = np.reshape(output_batch, [batch_size, seq_length])
  return x_batch, y_batch

In [21]:
x_batch, y_batch = get_batch(text_vec, seq_length=5, batch_size=1)

for i, (input_idx, target_idx) in enumerate(zip(np.squeeze(x_batch), np.squeeze(y_batch))):
    print("Step {:3d}".format(i))
    print("  input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
    print("  expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

Step   0
  input: 2 (' ')
  expected output: 56 ('f')
Step   1
  input: 56 ('f')
  expected output: 55 ('e')
Step   2
  input: 55 ('e')
  expected output: 62 ('l')
Step   3
  input: 62 ('l')
  expected output: 55 ('e')
Step   4
  input: 55 ('e')
  expected output: 62 ('l')


In [22]:
def LSTM(rnn_units): 
  return tf.keras.layers.LSTM(
    rnn_units, 
    return_sequences=True, 
    recurrent_initializer='glorot_uniform',
    recurrent_activation='sigmoid',
    stateful=True,
  )

In [24]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    # Layer 1: Embedding layer to transform indices into dense vectors 
    #   of a fixed embedding size
    tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),

    # Layer 2: LSTM with `rnn_units` number of units. 
    # TODO: Call the LSTM function defined above to add this layer.
    LSTM(rnn_units),

    # Layer 3: Dense (fully-connected) layer that transforms the LSTM output
    #   into the vocabulary size. 
    # TODO: Add the Dense layer.
    tf.keras.layers.Dense(vocab_size)
  ])

  return model

# Build a simple model with default hyperparameters. You will get the 
#   chance to change these later.
model = build_model(len(vocab), embedding_dim=300, rnn_units=1024, batch_size=32)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (32, None, 300)           31200     
_________________________________________________________________
lstm_1 (LSTM)                (32, None, 1024)          5427200   
_________________________________________________________________
dense_1 (Dense)              (32, None, 104)           106600    
Total params: 5,565,000
Trainable params: 5,565,000
Non-trainable params: 0
_________________________________________________________________


In [27]:
def compute_loss(labels, logits):
  loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True) # TODO
  return loss

In [46]:
### Hyperparameter setting and optimization ###

# Optimization parameters:
num_training_iterations = 1600  # Increase this to train longer
batch_size = 80  # Experiment between 1 and 64
seq_length = 160  # Experiment between 50 and 500
learning_rate = 5e-3  # Experiment between 1e-5 and 1e-1

# Model parameters: 
vocab_size = len(vocab)
embedding_dim = 512 
rnn_units = 2048  # Experiment between 1 and 2048

# Checkpoint location: 
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "my_ckpt")

In [47]:
### Define optimizer and training operation ###

'''TODO: instantiate a new model for training using the `build_model`
  function and the hyperparameters created above.'''
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size)

'''TODO: instantiate an optimizer with its learning rate.
  Checkout the tensorflow website for a list of supported optimizers.
  https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/
  Try using the Adam optimizer to start.'''
optimizer = tf.keras.optimizers.Adam(learning_rate)


@tf.function
def train_step(x, y): 
  # Use tf.GradientTape()
  with tf.GradientTape() as tape:
  
    '''TODO: feed the current input into the model and generate predictions'''
    y_hat = model(x)
  
    '''TODO: compute the loss!'''
    loss = compute_loss(y, y_hat)

  # Now, compute the gradients 
  '''TODO: complete the function call for gradient computation. 
      Remember that we want the gradient of the loss with respect all 
      of the model parameters. 
      HINT: use `model.trainable_variables` to get a list of all model
      parameters.'''
  grads = tape.gradient(loss, model.trainable_variables)
  
  # Apply the gradients to the optimizer so it can update the model accordingly
  optimizer.apply_gradients(zip(grads, model.trainable_variables))
  return loss

##################
# Begin training!#
##################

history = []
if hasattr(tqdm, '_instances'): tqdm._instances.clear() # clear if it exists

for iter in tqdm(range(num_training_iterations)):

  # Grab a batch and propagate it through the network
  x_batch, y_batch = get_batch(text_vec, seq_length, batch_size)
  loss = train_step(x_batch, y_batch)

  # Update the progress bar
  history.append(loss.numpy().mean())
  #plotter.plot(history)

  # Update the model with the changed weights!
  if iter % 100 == 0:
    print(loss.numpy().mean())     
    model.save_weights(checkpoint_prefix)
    
# Save the trained model and the weights
model.save_weights(checkpoint_prefix)

  0%|          | 1/1600 [00:01<50:38,  1.90s/it]

4.644745


  6%|▋         | 101/1600 [01:03<16:44,  1.49it/s]

2.3111217


 13%|█▎        | 201/1600 [02:07<15:36,  1.49it/s]

1.9081161


 19%|█▉        | 301/1600 [03:11<14:34,  1.49it/s]

1.6532043


 25%|██▌       | 401/1600 [04:15<13:32,  1.48it/s]

1.5445389


 31%|███▏      | 501/1600 [05:18<12:19,  1.49it/s]

1.4640526


 38%|███▊      | 601/1600 [06:22<11:13,  1.48it/s]

1.3764997


 44%|████▍     | 701/1600 [07:26<10:05,  1.49it/s]

1.3338128


 50%|█████     | 801/1600 [08:30<08:56,  1.49it/s]

1.2952191


 56%|█████▋    | 901/1600 [09:34<07:53,  1.48it/s]

1.2453682


 63%|██████▎   | 1001/1600 [10:37<06:43,  1.48it/s]

1.2602403


 69%|██████▉   | 1101/1600 [11:41<05:35,  1.49it/s]

1.2120409


 75%|███████▌  | 1201/1600 [12:45<04:29,  1.48it/s]

1.1339723


 81%|████████▏ | 1301/1600 [13:49<03:21,  1.48it/s]

1.1346508


 88%|████████▊ | 1401/1600 [14:53<02:14,  1.48it/s]

1.1049399


 94%|█████████▍| 1501/1600 [15:57<01:06,  1.48it/s]

1.0605714


100%|██████████| 1600/1600 [17:00<00:00,  1.57it/s]


In [48]:
'''TODO: Rebuild the model using a batch_size=1'''
pmodel = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

# Restore the model weights for the last checkpoint after training
pmodel.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
pmodel.build(tf.TensorShape([1, None]))

pmodel.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_7 (Embedding)      (1, None, 512)            53248     
_________________________________________________________________
lstm_7 (LSTM)                (1, None, 2048)           20979712  
_________________________________________________________________
dense_7 (Dense)              (1, None, 104)            213096    
Total params: 21,246,056
Trainable params: 21,246,056
Non-trainable params: 0
_________________________________________________________________


In [49]:

def generate_text(model, start_string, generation_length=300):
  # Evaluation step (generating ABC text using the learned RNN model)

  '''TODO: convert the start string to numbers (vectorize)'''
  input_eval = [char2idx[char] for char in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Here batch size == 1
  model.reset_states()
  tqdm._instances.clear()

  for i in tqdm(range(generation_length)):
      '''TODO: evaluate the inputs and generate the next character predictions'''
      predictions = model(input_eval)
      
      # Remove the batch dimension
      predictions = tf.squeeze(predictions, 0)
      
      '''TODO: use a multinomial distribution to sample'''
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
      
      # Pass the prediction along with the previous hidden state
      #   as the next inputs to the model
      input_eval = tf.expand_dims([predicted_id], 0)
      
      '''TODO: add the predicted character to the generated text!'''
      # Hint: consider what format the prediction is in vs. the output
      text_generated.append(idx2char[predicted_id])
    
  return (start_string + ''.join(text_generated))

In [57]:
generated_text = generate_text(pmodel, start_string="A sziget magas", generation_length=100) # TODO
generated_text

100%|██████████| 100/100 [00:00<00:00, 144.91it/s]


'A sziget magasságában, és az úty áll a mozdulattal, a hajón sokat viszont a kentemet fog a gitárt vigyálati arrafé'

In [45]:
generated_text

'Cirkáló Fülégykorr lengeres elgy, gyérűnében és eszélán síssalják hőéptról a szébe akarutatjul?\n- Mijgon, mincs'