In [1]:
import tensorflow as tf
import time

Init Plugin
Init Graph Optimizer
Init Kernel


In [2]:
# Define the model (this must stay in sync with train_and_save_model.py)
class RnnGRUModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense1 = tf.keras.layers.Dense(vocab_size)
    self.dense2 = tf.keras.layers.Dense(vocab_size)
    
  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense1(x, training=training)
    x = self.dense2(x, training=training)
    
    if return_state:
      return x, states
    else:
      return x

In [3]:
# load other needed layers
ids_from_chars = tf.keras.models.load_model('ids_from_chars')
chars_from_ids = tf.keras.models.load_model('chars_from_ids')

Metal device set to: Apple M1


2022-01-17 10:36:55.703914: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-01-17 10:36:55.704039: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2022-01-17 10:36:55.722943: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2022-01-17 10:36:55.723383: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-01-17 10:36:55.723439: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-01-17 10:36:55.740241: I tensorflow/core/grappler/

In [4]:
# initialize the base model
vocab_size = 28
embedding_dim = 256
rnn_units = 1024

loaded_model = RnnGRUModel(
    # Be sure the vocabulary size matches the `StringLookup` layers.
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

# load the saved weights
loaded_model.load_weights("base_model_saved_weights")

In [11]:
# Define the model wrapper which provides utilities for running the model and doing word evaluation
# (this must stay in sync with train_and_save_model.py)
# TODO actually this should live in its own file
class ModelWrapper():
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=0.5):
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

  def probability_of_letter(this, logits, letter):
    return (tf.squeeze(logits)[this.ids_from_chars(letter).numpy()]).numpy()

  def evaluate_word(this, word, show_work):
      # Seed the model with '**'
      seed_chars = tf.constant(['**'])
      input_chars = tf.strings.unicode_split(seed_chars, 'UTF-8')
      input_ids = this.ids_from_chars(input_chars).to_tensor()
      states = None

      # get the initial probability distribution
      predicted_logits, states = this.model(inputs=input_ids, states=states, return_state=True)
      predicted_logits = predicted_logits[:, -1, :]/this.temperature

      total_prob = 0
      for letter in word:
        # check the log probability of the letter given current state
        probability = this.probability_of_letter(predicted_logits, letter)
        total_prob += probability
        if show_work:
          print("Probability of", letter, "=", probability)

        # feed this letter into the model
        this_char = tf.constant([letter])        
        input_char = tf.strings.unicode_split(this_char, 'UTF-8')
        this_input_id = this.ids_from_chars(input_char).to_tensor()
        predicted_logits, states = this.model(inputs=this_input_id, states=states,
                                              return_state=True)
        predicted_logits = predicted_logits[:, -1, :]/this.temperature

      # check the log probability of '*' given current state
      probability = this.probability_of_letter(predicted_logits, '*')
      # TODO This assumes that the probability of the word is the product of the probability of each letter
      # the problem is that I'm not really training the RNN to generate probabilities for each letter. In otherwords
      # the log likelihoods in the logits is not real, just directionally correct
      total_prob += probability
      if show_work:
        print("Probability of * =", probability)

      # TODO normalize by length
      # TODO actually, shouldn't the model do this automatically if it's well trained?
      return total_prob

# initialize the model wrapper
model_wrapper = ModelWrapper(loaded_model, chars_from_ids, ids_from_chars)

In [12]:
model_wrapper.evaluate_word('test', True)

Probability of t = 0.939692
Probability of e = 7.774877
Probability of s = 3.583509
Probability of t = 10.354789
Probability of * = 5.4483023


28.101169109344482