In [1]:
import tensorflow as tf

import numpy as np
import os
import time
import pandas as pd

df = pd.read_csv('sqllab_untitled_query_7_20230719T124351.csv')

In [3]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [2]:
df_filtered = df[(df['result']=='win')]
skills = list(df_filtered['skills'])

skills_dataset = []
all_skill = []
for s in skills:
    skills_ind = s.split(',')
    skills_ind = [sk.split('_')[-1] for sk in skills_ind if sk.split('_')[-1] != ""]

    if len(skills_ind) >= 30 and len(skills_ind) <= 65:
        skills_dataset.append(skills_ind)
        all_skill += skills_ind
    
all_skill = list(set(all_skill)) + ["end"]

ids_from_chars = lambda skills: [all_skill.index(s) for s in skills]
chars_from_ids = lambda ids: [all_skill[i] for i in ids]

lengths = [len(s) for s in skills_dataset]
max_length = max(lengths)
dataset_combined = []
for skills in skills_dataset:
    if len(skills) <= max_length:
        to_add = ids_from_chars(skills) + [27] * (max_length - len(skills))
        dataset_combined += to_add

In [4]:
ids_dataset = tf.data.Dataset.from_tensor_slices(dataset_combined)
sequences = ids_dataset.batch(max_length)
for seq in sequences.take(1):
    print(chars_from_ids(seq))
    
dataset = sequences.map(split_input_target)

for input_example, target_example in dataset.take(1):
    print(input_example)
    print(target_example)

['Soundwave Field', 'Guard Badge', 'Revolver', 'Laser Beam', 'Soundwave Field', 'Drill', 'Wanted Poster', 'Drill', 'Drill', 'Revolver', 'Drill', 'Energy Drink', 'Guard Badge', 'Soundwave Field', 'Revolver', 'Revolver', 'Drill', 'Energy Core', 'Wanted Poster', 'Guard Badge', 'Pocket Watch', 'Laser Beam', 'Captain Shield', 'Laser Beam', 'Guard Badge', 'Soundwave Field', 'Soundwave Field', 'Guard Badge', 'Laser Beam', 'Captain Boot', 'Laser Beam', 'Ammo Thruster', 'Ammo Thruster', 'Captain Shield', 'Captain Boot', 'Captain Shield', 'Captain Boot', 'Captain Shield', 'Wanted Poster', 'Captain Boot', 'Energy Core', 'Captain Shield', 'Wanted Poster', 'Wanted Poster', 'Energy Core', 'Energy Core', 'Ammo Thruster', 'Ammo Thruster', 'Ammo Thruster', 'Pocket Watch', 'Captain Boot', 'Energy Core', 'Pocket Watch', 'Pocket Watch', 'Pocket Watch', 'Energy Drink', 'Energy Drink', 'Energy Drink', 'Energy Drink', 'end', 'end', 'end', 'end', 'end', 'end']
tf.Tensor(
[ 6 19 16  7  6 11 10 11 11 16 11  1 1

In [5]:
BATCH_SIZE = 128
BUFFER_SIZE = 5000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)
dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(128, None), dtype=tf.int32, name=None), TensorSpec(shape=(128, None), dtype=tf.int32, name=None))>

In [9]:
vocab_size = len(all_skill)
embedding_dim = 128
rnn_units = 1024

In [10]:
class RNNModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(rnn_units, return_sequences=True, return_state=True, dropout=0.1)
        self.dense = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x, training=training)
        if states is None:
            tates = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)

        if return_state:
            return x, states
        else:
            return x
  
model = RNNModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units
)

In [11]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")
model.summary()

(128, 64, 28) # (batch_size, sequence_length, vocab_size)
Model: "rnn_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  3584      
                                                                 
 gru (GRU)                   multiple                  3545088   
                                                                 
 dense (Dense)               multiple                  28700     
                                                                 
Total params: 3,577,372
Trainable params: 3,577,372
Non-trainable params: 0
_________________________________________________________________


In [12]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()
print("Input:\n", chars_from_ids(input_example_batch[0]))
print()
print("Next Char Predictions:\n", chars_from_ids(sampled_indices))

Input:
 ['Revolver', 'Revolver', 'Revolver', 'Drill', 'Drill', 'Soundwave Field', 'Black Hawk', 'Revolver', 'Rocket', 'Drill', 'Guard Badge', 'Power Gauntlet', 'Drill', 'Soundwave Field', 'Revolver', 'Soundwave Field', 'Drill', 'Guard Badge', 'Soundwave Field', 'Soundwave Field', 'Black Hawk', 'Guard Badge', 'Power Gauntlet', 'Guard Badge', 'Guard Badge', 'Captain Boot', 'Black Hawk', 'Black Hawk', 'Black Hawk', 'Power Gauntlet', 'Power Gauntlet', 'Rocket', 'Rocket', 'Rocket', 'Rocket', 'Captain Boot', 'Power Gauntlet', 'Captain Boot', 'Life Insurance', 'Energy Drink', 'Hi-Power Magnet', 'Wanted Poster', 'Life Insurance', 'Energy Drink', 'Energy Drink', 'Energy Drink', 'Energy Drink', 'Captain Boot', 'Captain Boot', 'Life Insurance', 'Wanted Poster', 'Wanted Poster', 'Life Insurance', 'end', 'end', 'end', 'end', 'end', 'end', 'end', 'end', 'end', 'end', 'end']

Next Char Predictions:
 ['Soundwave Field', 'Black Hawk', 'Rocket', 'Energy Drink', 'Lightsaber', 'Black Hawk', 'Captain Boot'

In [14]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

print(tf.exp(example_batch_mean_loss).numpy())

model.compile(optimizer='adam', loss=loss)

Prediction shape:  (128, 64, 28)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(3.3328528, shape=(), dtype=float32)
28.01816


In [15]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_local_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

In [19]:
print(tf.config.list_physical_devices('GPU'))

[]


In [23]:
EPOCHS = 20
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20


KeyboardInterrupt: 

In [22]:
EPOCHS

20