# import library

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM,GRU, Dense


# Load Shakespeare dataset





In [None]:
import requests

# Download the Shakespeare dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
data = response.text

# Save the data to a file
with open("shakespeare.txt", "w") as f:
    f.write(data)


In [None]:
# Read the text from the file
with open("/content/shakespeare.txt", "r") as f:
    text_data = f.read()

# Split the text into words
words = text_data.split()

# Take only the first 5000 words
words = words[:20000]

# Tokenize the words
tokenizer = Tokenizer()
tokenizer.fit_on_texts([words])

# Convert words to sequences of integers
sequences = tokenizer.texts_to_sequences([words])[0]

# Create input and output sequences for training the model
input_sequences = []
output_sequences = []
for i in range(1, len(sequences)):
    input_sequences.append(sequences[:i])
    output_sequences.append(sequences[i])

# Pad sequences to have the same length
max_sequence_length = 50  # Set a smaller sequence length
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')
output_sequences = np.array(output_sequences)

# Prepare the data for training
vocab_size = len(tokenizer.word_index) + 1
X_train = input_sequences
y_train = to_categorical(output_sequences, num_classes=vocab_size)



# train model

In [None]:
model = Sequential()
model.add(Embedding(vocab_size, 256, input_length=max_sequence_length))
model.add(GRU(128))
model.add(Dense(vocab_size, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam')

# Train the model
model.fit(X_train, y_train, batch_size=8, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7ed0a8eaa1d0>

# generate text

In [None]:
import numpy as np

# Function to generate text using the trained model
def generate_text(seed_text, temperature=1.0, p_val=0.8, max_length=100):
    generated_text = seed_text.lower()

    for _ in range(max_length):
        # Tokenize the generated text
        sequences = tokenizer.texts_to_sequences([generated_text])
        input_sequence = pad_sequences(sequences, maxlen=max_sequence_length, padding='pre')

        # Predict the next word probabilities using the model
        next_word_probs = model.predict(input_sequence)[0]
        next_word_probs = next_word_probs ** (1.0 / temperature)  # Apply temperature
        next_word_probs /= np.sum(next_word_probs)  # Normalize probabilities to sum to 1

        # Select the next word based on p val and probabilities
        next_word_idx = np.random.choice(range(vocab_size), p=next_word_probs)
        next_word = tokenizer.index_word[next_word_idx]

        # Add the next word to the generated text
        generated_text += " " + next_word

        # If the next word is a punctuation mark or line break, end the text generation
        if next_word in [".", ",", ":", ";", "!", "?", "\n"]:
            break

    return generated_text
# Generate text with temperature and p val control
seed_text = "he fall in love"
generated_text = generate_text(seed_text, temperature=0.3, p_val=0.9, max_length=30)
print(generated_text)

he fall in love him or so, dishonour'd that he that end: like to find you any thing, cominius: with the other lose, never was never be the motive of our so frank donation.


In [None]:
print(generated_text, end="")

he fall in love him or so, dishonour'd that he that end: like to find you any thing, cominius: with the other lose, never was never be the motive of our so frank donation.