In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, Bidirectional, GRU, Dense

corpus = [
    "I love machine learning",
    "I love deep learning",
    "deep learning is fun",
    "I enjoy learning new things",
    "machine learning is powerful"
]

tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

input_sequences = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram = token_list[:i+1]
        input_sequences.append(n_gram)

max_len = max(len(x) for x in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_len, padding='pre')

X = input_sequences[:, :-1]
y = input_sequences[:, -1]
y = to_categorical(y, num_classes=total_words)

In [None]:
model_rnn = Sequential([
    Embedding(total_words, 10, input_length=X.shape[1]),
    SimpleRNN(64),
    Dense(total_words, activation='softmax')
])
model_rnn.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_rnn.fit(X, y, epochs=200, verbose=0)



<keras.src.callbacks.history.History at 0x7f482498a2d0>

In [None]:
model_lstm = Sequential([
    Embedding(total_words, 10, input_length=X.shape[1]),
    LSTM(64),
    Dense(total_words, activation='softmax')
])
model_lstm.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_lstm.fit(X, y, epochs=200, verbose=0)

<keras.src.callbacks.history.History at 0x7f48247b82d0>

In [None]:
model_bilstm = Sequential([
    Embedding(total_words, 10, input_length=X.shape[1]),
    Bidirectional(LSTM(64)),
    Dense(total_words, activation='softmax')
])
model_bilstm.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_bilstm.fit(X, y, epochs=200, verbose=0)


<keras.src.callbacks.history.History at 0x7f4821837e90>

In [None]:
model_gru = Sequential([
    Embedding(total_words, 10, input_length=X.shape[1]),
    GRU(64),
    Dense(total_words, activation='softmax')
])
model_gru.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_gru.fit(X, y, epochs=200, verbose=0)

<keras.src.callbacks.history.History at 0x7f481b41b110>

In [None]:
from tensorflow.keras.layers import Attention, Input, Concatenate, Lambda
from tensorflow.keras.models import Model

inputs = Input(shape=(X.shape[1],))
x = Embedding(total_words, 10)(inputs)
lstm_out = LSTM(64, return_sequences=True)(x)
attn = Attention()([lstm_out, lstm_out])
x = Lambda(lambda x: tf.reduce_mean(x, axis=1))(attn)
output = Dense(total_words, activation='softmax')(x)

model_attention = Model(inputs, output)
model_attention.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_attention.fit(X, y, epochs=200, verbose=0)

<keras.src.callbacks.history.History at 0x7f4821491450>

In [None]:
def predict_next_word(model, tokenizer, text, max_len):
    token_seq = tokenizer.texts_to_sequences([text])[0]
    token_seq = pad_sequences([token_seq], maxlen=max_len-1, padding='pre')
    predicted_probs = model.predict(token_seq, verbose=0)
    predicted_id = np.argmax(predicted_probs)
    for word, index in tokenizer.word_index.items():
        if index == predicted_id:
            return word

In [None]:
print(predict_next_word(model_lstm, tokenizer, "I love", max_len))
print(predict_next_word(model_bilstm, tokenizer, "I love", max_len))
print(predict_next_word(model_rnn, tokenizer, "I love", max_len))
print(predict_next_word(model_attention, tokenizer, "I love", max_len))
print(predict_next_word(model_gru, tokenizer, "I love", max_len))

machine
deep
machine
learning




deep


In [None]:
def generate_sentence(model, tokenizer, seed_text, max_len, n_words):
    output_text = seed_text

    for _ in range(n_words):
        token_seq = tokenizer.texts_to_sequences([output_text])[0]
        token_seq = pad_sequences([token_seq], maxlen=max_len-1, padding='pre')

        predicted_probs = model.predict(token_seq, verbose=0)
        predicted_id = np.argmax(predicted_probs)

        for word, index in tokenizer.word_index.items():
            if index == predicted_id:
                output_text += " " + word
                break

    return output_text

In [None]:
sentence = generate_sentence(model_bilstm, tokenizer, seed_text="I love", max_len=max_len, n_words=7)
print(sentence)

sentence = generate_sentence(model_lstm, tokenizer, seed_text="I love", max_len=max_len, n_words=7)
print(sentence)

sentence = generate_sentence(model_attention, tokenizer, seed_text="I love", max_len=max_len, n_words=7)
print(sentence)

sentence = generate_sentence(model_gru, tokenizer, seed_text="I love", max_len=max_len, n_words=7)
print(sentence)

sentence = generate_sentence(model_rnn, tokenizer, seed_text="I love", max_len=max_len, n_words=7)
print(sentence)

I love deep learning things new new things learning
I love machine learning new fun things things things
I love learning is powerful things things things powerful
I love deep learning new things things things powerful
I love machine learning things powerful love love love
