In [8]:
import numpy as np
import io
import tensorflow as tf
from tensorflow.keras import layers, models

# Load the data
with io.open('shakespeare_train.txt', 'r', encoding='utf8') as f:
    train_text = f.read()

with io.open('shakespeare_valid.txt', 'r', encoding='utf8') as f:
    valid_text = f.read()

# Create vocabulary and mappings
vocab = sorted(set(train_text))
vocab_size = len(vocab)
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))


# Convert text to integers
train_data = np.array([vocab_to_int[c] for c in train_text], dtype=np.int32)
valid_data = np.array([vocab_to_int[c] for c in valid_text], dtype=np.int32)
print(train_data)
# temp
temp_size = 10000
temp_size_v = 1000
train_data = train_data[:temp_size]  # Use the first 100,000 characters
valid_data = valid_data[:temp_size_v] 



[18 49 58 ... 52  2  0]


In [9]:
import matplotlib.pyplot as plt

def plot_loss(history):
    plt.plot(history.history['loss'], label='Training BPC')
    plt.plot(history.history['val_loss'], label='Validation BPC')
    plt.legend()
    plt.xlabel('Epoch')
    plt.ylabel('Bits Per Character')
    plt.title('Learning Curve')
    plt.show()

In [10]:
def bpc_loss(y_true, y_pred):
    y_true = tf.cast(y_true, tf.int32)
    scce = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
    loss = scce(y_true, y_pred)
    loss_in_bits = loss / tf.math.log(2.0)
    return tf.reduce_mean(loss_in_bits)


In [None]:
def create_tf_dataset(data, seq_length, batch_size):
    # Convert data to TensorFlow Dataset
    dataset = tf.data.Dataset.from_tensor_slices(data)
    
    # Create sequences
    sequences = dataset.window(seq_length + 1, shift=1, drop_remainder=True)
    sequences = sequences.flat_map(lambda window: window.batch(seq_length + 1))
    # Split sequences into input and target
    def split_input_target(seq):
        input_seq = seq[:-1]
        target_seq = seq[1:]
        input_seq = tf.one_hot(input_seq, depth=vocab_size)
        return input_seq, target_seq
    
    dataset = sequences.map(split_input_target,  num_parallel_calls=tf.data.AUTOTUNE)
    # Shuffle, batch, and prefetch
    dataset = dataset.shuffle(10000).batch(batch_size, drop_remainder=True).prefetch(tf.data.AUTOTUNE)
    
    return dataset

seq_length = 100
batch_size = 64

train_dataset = create_tf_dataset(train_data, seq_length, batch_size)
valid_dataset = create_tf_dataset(valid_data, seq_length, batch_size)


In [12]:


# Define a custom callback to reset hidden states at the start of each epoch
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='model_epoch_{epoch}',
    save_weights_only=True,
    save_freq='epoch'
)

def generate_text(model, start_string, num_generate=100):
    # Convert start string to integer indices
    input_indices = [vocab_to_int[c] for c in start_string]
    input_indices = tf.expand_dims(input_indices, 0)  # Add batch dimension

    text_generated = []
    model.reset_states()

    for _ in range(num_generate):
        predictions = model(input_indices)
        predictions = tf.squeeze(predictions, 0)  # Remove batch dimension

        # Use the last prediction
        predictions = predictions[-1]

        # Sample from the distribution
        predicted_id = tf.random.categorical(predictions[None, :], num_samples=1)[0, 0].numpy()

        # Add predicted character
        text_generated.append(int_to_vocab[predicted_id])

        # Update input
        input_indices = tf.expand_dims([predicted_id], 0)

    return start_string + ''.join(text_generated)


def experiment(optimizer, epochs):
    hidden_sizes = [128, 256, 512]
    sequence_lengths = [50, 100, 200]

    for hidden_units in hidden_sizes:
        for seq_length in sequence_lengths:
            # Build and compile the model
            model = build_rnn_model(vocab_size, hidden_units)
            model.compile(optimizer=optimizer, loss=bpc_loss)
            # Train the model
            history = model.fit(
                train_dataset,validation_data=valid_dataset, epochs=epochs,callbacks=[checkpoint_callback]
            )
            # Record the final training loss
            final_loss = history.history['loss'][-1]
            print(f'Hidden Units: {hidden_units}, Seq Length: {seq_length}, Final Loss: {final_loss}')


In [None]:

def build_rnn_model(vocab_size):
    embedding_dim = 256  
    hidden_units = 1024
    model = models.Sequential([
        # layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim),
        layers.Input(shape=(seq_length, vocab_size)),
        layers.SimpleRNN(hidden_units, return_sequences=True),
        layers.Dense(vocab_size) 
    ])
    return model

def build_lstm_model(vocab_size):
    hidden_units = 512  
    model = models.Sequential([
        layers.Embedding(input_dim=vocab_size, output_dim=hidden_units),
        layers.LSTM(hidden_units, return_sequences=True, input_shape=(None, vocab_size)),
        layers.Dense(vocab_size, activation='softmax')
    ])
    return model

# Build and compile the LSTM model
model = build_rnn_model(vocab_size)

# model = build_rnn_model(vocab_size)

In [None]:

model.compile(optimizer='adam', loss=bpc_loss)
epochs = 5
# Fit the model
history = model.fit(
    train_dataset,
    validation_data=valid_dataset,
    epochs=epochs,
    callbacks=[checkpoint_callback]
)



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [15]:
# Load model weights from a specific epoch
model.load_weights('model_epoch_5')  # For example, epoch 5

# Generate text
start_string = "JULIET"
generated_text = generate_text(model, start_string, num_generate=500)
print(generated_text)
# Load model weights from a specific epoch
model.load_weights('model_epoch_10')  # For example, epoch 5

# Generate text
start_string = "JULIET"
generated_text = generate_text(model, start_string, num_generate=500)
print(generated_text)

JULIETh sthithpin:
Eeowhare lngitillithe th;ales ake thithe nonth bathe sth the thous rstingore thid t mene co pulloun:
A s:
The d as thenon:
MOrtheratheran:
Firfis hest y sthenth y s wize mous y wenesthesers arsth fize m reve ounous, t, thisthen:
Thid sthe blalicy yourersthat-
Thimar th histhesthinglunonof the win:
Whathar sthatheren:
MWhed wed wes be oustocon blilin:
US:
whener piwigat nof odat then thesCake, ond, f fenorelid whe hollll ay
Avesth.
I thatherath,
Wistenoullllillis ther y,
Anous the la


NotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for model_epoch_10