In [None]:
import tensorflow as tf
import numpy as np
import os
import time

# Step 1: Load Shakespeare data from TensorFlow datasets
path_to_file = tf.keras.utils.get_file('shakespeare.txt',
    'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print(f"Length of text: {len(text)} characters")

# Step 2: Create vocabulary
vocab = sorted(set(text))
print(f"{len(vocab)} unique characters")

char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

# Vectorize the text
text_as_int = np.array([char2idx[c] for c in text])

# Step 3: Create training sequences
seq_length = 100
examples_per_epoch = len(text) // (seq_length + 1)

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

def split_input_target(chunk):
    return chunk[:-1], chunk[1:]

dataset = sequences.map(split_input_target)

# Shuffle and batch
BATCH_SIZE = 64
BUFFER_SIZE = 10000
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

# Step 4: Build model (stateless)
vocab_size = len(vocab)


model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(None,)),
    tf.keras.layers.Embedding(vocab_size, 256),
    tf.keras.layers.GRU(1024, return_sequences=True),
    # tf.keras.layers.LSTM(200, return_sequences=True),
    # tf.keras.layers.LSTM(100, return_sequences=True),
    tf.keras.layers.Dense(vocab_size)
])

model.summary()

In [None]:
# Step 5: Compile and train

model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))


In [None]:
history = model.fit(dataset, epochs=20)

In [None]:
start_string = "ROME"
input_eval = [char2idx[s] for s in start_string]
print(input_eval)


In [None]:
input_eval = tf.expand_dims(input_eval, 0)
input_eval

In [None]:
predictions = model.predict(input_eval)
predictions.shape

In [None]:
predictions = predictions[:, -1, :]  # Take last timestep
predictions

In [None]:
temperature = 0.8
predictions = predictions / temperature

In [None]:
predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
predicted_id

In [None]:
predicted_char = idx2char[predicted_id]
print(predicted_char)

In [None]:
# Step 6: Text generation function
def generate_text(model, start_string, num_generate=500, temperature=1.0):
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []

    for _ in range(num_generate):
        predictions = model(input_eval)
        predictions = predictions[:, -1, :]  # Take last timestep
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])

    return start_string + ''.join(text_generated)



In [None]:
# Step 7: Try it!
print(generate_text(model, start_string="ROMEO: ", temperature=0.8))

In [None]:
print(generate_text(model, num_generate=100, start_string="ROME"))

In [None]:
import plotly.express as px
from sklearn.manifold import TSNE
import pandas as pd

# Get the embedding layer weights
embedding_layer = model.layers[0]
embedding_weights = embedding_layer.get_weights()[0]

# Reduce dimensionality using t-SNE
# You might need to adjust n_components and perplexity depending on your data
tsne = TSNE(n_components=2, random_state=42, perplexity=30)
embedding_2d = tsne.fit_transform(embedding_weights)

# Create a pandas DataFrame for Plotly
df_embeddings = pd.DataFrame(embedding_2d, columns=['TSNE-1', 'TSNE-2'])
df_embeddings['character'] = idx2char # Add character labels

# Create an interactive scatter plot using Plotly
fig = px.scatter(df_embeddings, x='TSNE-1', y='TSNE-2', text='character', title='Character Embeddings Visualization (t-SNE)')
fig.update_traces(textposition='top center')
fig.show()