# Load the Data

https://www.tensorflow.org/text/tutorials/text_generation

In [1]:
import json

import tensorflow as tf

import numpy as np
import os
import time

In [2]:

def format_data(card):
    
    if (card['type'] == 'HERO'):
        return None
    card_string = card['name']
    card_string += ' is a '
    card_string += card['cardClass'] + ' ' + card['type']
    card_string += ' that costs '
    card_string += str(card['cost']) + ' mana. '
    if (card['type'] == 'MINION'):
        card_string += 'It has ' + str(card['attack']) + ' attack and ' + str(card['health']) + ' health. '
    elif (card['type'] == 'WEAPON'):
        card_string += 'It has ' + str(card['attack']) + ' attack and ' + str(card['durability']) + ' durability. '
    if 'text' in card:
        text = card['text']
        text = text.replace('$','')
        text = text.replace('<b>','')
        text = text.replace('</b>','')
        card_string += 'It\'s text reads: \"' + text + '\".'
    
    return card_string

In [3]:
# Do You Want to Print the JSON?
print_data = False

# Openning JSON as a Dictionary
with open('cards.collectible.json', encoding='utf-8') as f:
    data = json.load(f)

if print_data:
    print('data looks like:')
    print(data)

# Store Data in a string
text = ''
for card in data['allCards']:
    card_string = format_data(card) 
    if card_string != None:
        text += card_string + ' '

f.close()    

In [4]:
print(f"Length of text: {len(text)} characters")

Length of text: 764173 characters


In [5]:
print('first 250 characters of text:')
print(text[0:250])
print(text)

first 250 characters of text:
Flame Lance is a MAGE SPELL that costs 5 mana. It's text reads: "Deal 8 damage to a minion.". Effigy is a MAGE SPELL that costs 3 mana. It's text reads: "Secret: When a friendly minion dies, summon a random minion with the same Cost.". Fallen Hero is
Flame Lance is a MAGE SPELL that costs 5 mana. It's text reads: "Deal 8 damage to a minion.". Effigy is a MAGE SPELL that costs 3 mana. It's text reads: "Secret: When a friendly minion dies, summon a random minion with the same Cost.". Fallen Hero is a MAGE MINION that costs 2 mana. It has 3 attack and 2 health. It's text reads: "Your Hero Power deals 1 extra damage.". Arcane Blast is a MAGE SPELL that costs 1 mana. It's text reads: "Deal 2 damage to a minion. This spell gets double bonus from Spell Damage.". Polymorph: Boar is a MAGE SPELL that costs 3 mana. It's text reads: "Transform a minion into a 4/2 Boar with Charge.". Dalaran Aspirant is a MAGE MINION that costs 4 mana. It has 3 attack and 5 health. It

# Vectorize the Text

In [6]:
#vocab = sorted(set(text))
set_list = set(text)
print(set_list)
sorted_list = sorted(set_list)
print(sorted_list)
vocab = sorted_list
print(f'{len(vocab)} unique characters')

{'1', 'T', 'B', 'r', 'g', 'c', 'u', 'W', '/', 'D', '+', 'ñ', '0', '%', '9', 'a', ':', 'v', 'k', 'x', '3', 'p', 'i', 'L', 'n', '6', 'j', 'h', '!', 'S', '@', '-', '(', ' ', 's', "'", 'y', '{', ';', 'P', '.', '2', '>', ',', 'I', '7', 'd', 'Q', 'C', 'E', '"', '5', 'w', 'X', 'b', 'm', '&', 'V', 'J', 'K', 'o', 'f', 'R', '4', 'H', '[', 'U', 'e', 'N', '}', '\n', 't', 'F', 'q', '’', 'O', 'G', 'Z', ')', ']', '<', '#', 'z', 'A', 'M', 'Y', '8', 'l'}
['\n', ' ', '!', '"', '#', '%', '&', "'", '(', ')', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '>', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '}', 'ñ', '’']
88 unique characters


In [7]:
ids_from_chars = tf.keras.layers.StringLookup(vocabulary=list(vocab), mask_token=None)
print(ids_from_chars)
print(ids_from_chars('"'))

chars_from_ids = tf.keras.layers.StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)
print(chars_from_ids)
print(chars_from_ids(4))

<keras.src.layers.preprocessing.string_lookup.StringLookup object at 0x00000171ADAA4FA0>
tf.Tensor(4, shape=(), dtype=int64)
<keras.src.layers.preprocessing.string_lookup.StringLookup object at 0x00000171F4E7F670>
tf.Tensor(b'"', shape=(), dtype=string)


In [8]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

# The Prediction Task

In [9]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
print(len(text))
print(all_ids) # Create a tensor with ids instead of chars of text

764173
tf.Tensor([36 70 59 ...  4 14  2], shape=(764173,), dtype=int64)


In [10]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

print(type(ids_dataset))

for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

<class 'tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset'>
F
l
a
m
e
 
L
a
n
c


In [11]:
seq_length = 100

sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

print(type(sequences))

for seq in sequences.take(1):
    print(chars_from_ids(seq))

<class 'tensorflow.python.data.ops.batch_op._BatchDataset'>
tf.Tensor(
[b'F' b'l' b'a' b'm' b'e' b' ' b'L' b'a' b'n' b'c' b'e' b' ' b'i' b's'
 b' ' b'a' b' ' b'M' b'A' b'G' b'E' b' ' b'S' b'P' b'E' b'L' b'L' b' '
 b't' b'h' b'a' b't' b' ' b'c' b'o' b's' b't' b's' b' ' b'5' b' ' b'm'
 b'a' b'n' b'a' b'.' b' ' b'I' b't' b"'" b's' b' ' b't' b'e' b'x' b't'
 b' ' b'r' b'e' b'a' b'd' b's' b':' b' ' b'"' b'D' b'e' b'a' b'l' b' '
 b'8' b' ' b'd' b'a' b'm' b'a' b'g' b'e' b' ' b't' b'o' b' ' b'a' b' '
 b'm' b'i' b'n' b'i' b'o' b'n' b'.' b'"' b'.' b' ' b'E' b'f' b'f' b'i'
 b'g' b'y' b' '], shape=(101,), dtype=string)


In [12]:
import numpy as np

# Define your dataset as a list
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Define the batch size
batch_size = 3

# Calculate the number of batches
num_batches = len(data) // batch_size

# Create an empty list to store batches
batches = []

# Iterate through the data to create batches
for i in range(num_batches):
    start_idx = i * batch_size
    end_idx = (i + 1) * batch_size
    batch = data[start_idx:end_idx]
    batches.append(batch)

# If there are any remaining data points, create a final batch
if len(data) % batch_size != 0:
    final_batch = data[num_batches * batch_size:]
    batches.append(final_batch)

# Now, batches contains your custom batched data
for batch in batches:
    print(batch)

[1, 2, 3]
[4, 5, 6]
[7, 8, 9]
[10]


In [13]:
for seq in sequences.take(5):
    print(text_from_ids(seq).numpy())

b'Flame Lance is a MAGE SPELL that costs 5 mana. It\'s text reads: "Deal 8 damage to a minion.". Effigy '
b'is a MAGE SPELL that costs 3 mana. It\'s text reads: "Secret: When a friendly minion dies, summon a ra'
b'ndom minion with the same Cost.". Fallen Hero is a MAGE MINION that costs 2 mana. It has 3 attack and'
b' 2 health. It\'s text reads: "Your Hero Power deals 1 extra damage.". Arcane Blast is a MAGE SPELL tha'
b't costs 1 mana. It\'s text reads: "Deal 2 damage to a minion. This spell gets double bonus from Spell '


In [14]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [15]:
dataset = sequences.map(split_input_target)

for input_example, target_example in dataset.take(1):
    print('Input :',  text_from_ids(input_example).numpy())
    print('Target:', text_from_ids(target_example).numpy())

Input : b'Flame Lance is a MAGE SPELL that costs 5 mana. It\'s text reads: "Deal 8 damage to a minion.". Effigy'
Target: b'lame Lance is a MAGE SPELL that costs 5 mana. It\'s text reads: "Deal 8 damage to a minion.". Effigy '


# Create Training Batches

In [16]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

print(dataset)

<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>


# Build the Model

In [17]:
vocab_size = len(ids_from_chars.get_vocabulary())
embedding_dim = 256
rnn_units = 1024

In [18]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [19]:
model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [20]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 89) # (batch_size, sequence_length, vocab_size)


In [21]:
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  22784     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  91225     
                                                                 
Total params: 4052313 (15.46 MB)
Trainable params: 4052313 (15.46 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [22]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()
print(sampled_indices)

[ 3 35 25 45 37 57 68 47 49  5 39 37 30  0 76 51 88 43  7 34 21 82 78  1
 81 53 45 60 59 70 74 53 17 84 62 18 46 68 62 59 19 36 52 43 77  5  1 19
 56 51 32 43  7 11 64 55 80 44 59 49 87  2 11  3 52  6 73 57 54 26 59 21
 29 86 41 67 40 58 78 29 85 23  1 72  2 56 63 58 81 54  8 35 81 67 51 55
 78 47 70 29]


In [23]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b') less.". Solar Eclipse is a DRUID SPELL that costs 2 mana. It\'s text reads: "The next spell you cas'

Next Char Predictions:
 b"!E9OG[jQS#IG@[UNK]rU\xe2\x80\x99M&D5xt\nwWObalpW1zd2Pjda3FVMs#\n3ZUBM&+fYvNaS\xc3\xb1 +!V%o[X:a5>}KiJ]t>{7\nn Ze]wX'EwiUYtQl>"


# Train the Model

In [24]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [25]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

Prediction shape:  (64, 100, 89)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.4849944, shape=(), dtype=float32)


In [26]:
tf.exp(example_batch_mean_loss).numpy()

88.67645

In [27]:
model.compile(optimizer='adam', loss=loss)

In [28]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [29]:
EPOCHS = 20

In [30]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# Generate Text

In [31]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [32]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [33]:
start = time.time()
states = None
next_char = tf.constant(['Magic'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

Magicing Scavenger is a NEUTRAL MINION that costs 5 mana. It has 5 attack and 5 health. It's text reads: "Battlecry: If you're holding a Dragon, destroy a friendly minion.". Celemonsair Coblast is a ROGUE MINION that costs 5 mana. It has 4 attack and 6 health. It's text reads: "[x]Battlecry: Give a friendly
minion "Deathrattle:
Summon a 3/3 Wolf with Stealth.
Lasts 3 turns.". Palkatity is a ROGUE SPELL that costs 2 mana. It's text reads: "[x]Discover a Deathrattle minion. If you play it this turn,
repeat this effect.". Secremord Geder is a DRUID MINION that costs 4 mana. It has 3 attack and 3 health. It's text reads: "Battlecry: Discover a Deathrattle minion from the pore to your hand.". Ghostly Greamus is a DRUID MINION that costs 3 mana. It has 3 attack and 3 health. It's text reads: "[x]At the end of your turn,
summon a Reblect
from your deck.". Azshara Runner is a HUNTER MINION that costs 4 mana. It has 4 attack and 2 health. It's text reads: "[x]Battlecry: Gain +1 Attack for each


In [34]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, '\n\n' + '_'*80)
print('\nRun time:', end - start)

tf.Tensor(
[b'ROMEO: Starnet in your deck.\nReward: Time Warp.". Fajacul Shidment is a PRIEST MINION that costs 5 mana. It has 4 attack and 4 health. It\'s text reads: "At the end of your turn, deal 6 damage to a random enemy minion.". Unhol\'ding Manisaul is a DEATHKNIGHT WEAPON that costs 5 mana. It has 5 attack and 6 health. It\'s text reads: "After a friendly minion dies, deal 5 damage to your hero.". Call of the Boomstuce is a DEMONHUNTER SPELL that costs 2 mana. It\'s text reads: "Summon three 1/1 Silver Hand Rice with Cost to all enemy minions.". Webweaver is a DEMONHUNTER MINION that costs 3 mana. It has 4 attack and 2 health. It\'s text reads: "Whenever another minion takes damage, deal that much damage this turn.". Veweton Mine is a WARRIOR SPELL that costs 2 mana. It\'s text reads: "Give a friendly Murloc Taunt.". Innerve of Kalian is a NEUTRAL MINION that costs 5 mana. It has 5 attack and 5 health. It\'s text reads: "Battlecry: For each card in your hand, deal 6 damage to a

In [35]:
tf.saved_model.save(one_step_model, 'CardGeneratorRNN2')

INFO:tensorflow:Assets written to: CardGeneratorRNN2\assets


INFO:tensorflow:Assets written to: CardGeneratorRNN2\assets


In [36]:
one_step_reloaded = tf.saved_model.load('CardGeneratorRNN2')

In [37]:
states = None
next_char = tf.constant(['Star'])
result = [next_char]

for n in range(250):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

Starget minion into your deck. When drawn, restore #3 Health to
a deads and gain its stats.". Plagierook Vastric is a PALADIN MINION that costs 2 mana. It has 2 attack and 1 health.  Blood Bow is a HUNTER MINION that costs 9 mana. It has costs 3 mana. It


In [38]:
#Fire is a DRUID SPELL that costs 3 mana. It's text reads: "Deal 4 damage to a minion and give it Rush."
#Gidan Jower is a NEUTRAL MINION that costs 5 mana. It has 5 attack and 5 health. It's text reads: "Summon five 2/2 Treants
#Magiked Sletherwing is a NEUTRAL MINION that costs 2 mana. It has 2 attack and 2 health. It's text reads: "Deathrattle: Summon a 3/1 Undead with Taunt.
#Wanted In OG! is a WARRIOR SPELL that costs 2 mana. It's text reads: "Freeze all enemy minions. Honorable Kill: Draw a card.
#Mage Souls is a DRUID SPELL that costs 3 mana. It's text reads: "Freeze a minion and the minions next to it, and deal 2 damage to the enemy hero.
