In [1]:
import pandas as pd
import numpy as np 
import requests
from bs4 import BeautifulSoup
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize

In [2]:
# Collecting Data
url = "http://shakespeare.mit.edu/"  # Here I use an online text source which is like the Project Gutenberg dataset.

In [3]:
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

In [4]:
links = soup.find_all('a')

In [5]:
# Filtering
link1 = [url + link['href'] for link in links if link['href'].endswith('.html')]

In [6]:
# Making A Function To Extracting Text.
def text_play(play_url):
    play_response = requests.get(play_url)
    play_soup = BeautifulSoup(play_response.content, 'html.parser')
    play_text = play_soup.get_text()
    return play_text

In [7]:
# Extract text from all plays
all_texts = []
for link1 in link1:
    all_texts.append( text_play(link1))

In [8]:
# Making a single string by combine
combined_text = ' '.join(all_texts)

In [9]:
nltk.download('punkt') # extracting text data.

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vkvis\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [10]:
## Tokenize the text into sentences and words
sentences = sent_tokenize(combined_text)
words = [word_tokenize(sentence) for sentence in sentences]
words = [[word.lower() for word in sentence if word.isalpha()] for sentence in words]

In [11]:
#Importing libraries and fuction to making model and train
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense




In [12]:
all_words = [word for sentence in words for word in sentence] #training
tokenizer = Tokenizer()
tokenizer.fit_on_texts(all_words)
sequences = tokenizer.texts_to_sequences(all_words)

In [13]:
vocab_size = len(tokenizer.word_index) + 1
sequence_length = 50

In [14]:
def create_sequences(sequences, length):
    X, y = [], []
    for i in range(length, len(sequences)):
        X.append(sequences[i-length:i])
        y.append(sequences[i])
    return X, y

In [15]:
X, y = create_sequences(sequences, sequence_length)
X = tf.convert_to_tensor(X)
y = tf.convert_to_tensor(to_categorical(y, num_classes=vocab_size))

In [16]:
#Model
model = Sequential()
model.add(Embedding(vocab_size, 50, input_length=sequence_length))
model.add(LSTM(100, return_sequences=True))
model.add(LSTM(100))
model.add(Dense(100, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))




In [17]:
# Collecting arranging and training the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=10, batch_size=256) #Here I gave epochs = 10 because it will take less time to run


Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x24839c97490>

In [27]:
#Text 
from tensorflow.keras.preprocessing.sequence import pad_sequences

def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds + 1e-10) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [45]:
def text(model, tokenizer, seed_text, num_words, sequence_length, temperature=2.0):
    for _ in range(num_words):
        tlist = tokenizer.texts_to_sequences([seed_text])[0]
        tlist = pad_sequences([tlist], maxlen=sequence_length, padding='pre')
        predicted = model.predict(tlist, verbose=0)[0]
        word_index = sample(predicted, temperature)
        word = tokenizer.index_word.get(word_index, '')
        seed_text += " " + word
    return seed_text

In [46]:
# Now Generating text
seed_text = "To be or not to be"
text = text(model, tokenizer, seed_text, 100, sequence_length, temperature=0.7)
print(text)

To be or not to be made whose beauty is else to with be fall and his tide look with woe say for the stone would would come for with my heart and strife his garment but the bird doth to not talk o i up to his eyes his day of flatter whose painter do these in my fixed cxxvi of another field of you began and me her end of fearfully from till being comfortable pride and be yet the spectacle of frozen in i then like lives i more thou but he be give for let quoth this love hath my other still
