In [None]:
import random
import pickle

import numpy as np
import pandas as pd
from nltk.tokenize import RegexpTokenizer

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Activation
from tensorflow.keras.optimizers import RMSprop

In [None]:
text_data = pd.read_csv("Text-Gen/fake_or_real.csv")

In [None]:
text = list(text_data.text.values)
combined_text = " ".join(text)

In [None]:
partial_text = combined_text[:10000]

In [None]:
tokenizer = RegexpTokenizer(r"\w+")
tokens = tokenizer.tokenize(partial_text.lower())

In [None]:
unique_tokens = np.unique(tokens)
unique_token_index = {token : idx for idx, token in enumerate(unique_tokens)}

In [None]:
initial_words = 10
input = []
next_words = []

for i in range(len(tokens) - initial_words):
    input.append(tokens[i:i + initial_words])
    next_words.append(tokens[i + initial_words])

In [None]:
X = np.zeros((len(input), initial_words, len(unique_tokens)), dtype=bool)
y = np.zeros((len(next_words), len(unique_tokens)), dtype=bool)

In [None]:
for i, words in enumerate(input):
    for j, word in enumerate(words):
        X[i, j, unique_token_index[word]] = 1
    y[i, unique_token_index[next_words[i]]] = 1

In [None]:
model = Sequential()
model.add(LSTM(128, input_shape=(initial_words, len(unique_tokens)), return_sequences=True))
model.add(LSTM(128))
model.add(Dense(len(unique_tokens)))
model.add(Activation("softmax"))

In [25]:
model.compile(loss="categorical_crossentropy", optimizer=RMSprop(learning_rate=0.01), metrics = ["accuracy"])
model.fit(X, y, batch_size=128, epochs = 30, shuffle=True)

Epoch 1/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.0285 - loss: 6.3396 
Epoch 2/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.0587 - loss: 5.8317
Epoch 3/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.0615 - loss: 5.7197
Epoch 4/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.0529 - loss: 5.7684
Epoch 5/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.0588 - loss: 5.7278
Epoch 6/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.0558 - loss: 5.6797
Epoch 7/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.0641 - loss: 5.6183
Epoch 8/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.0657 - loss: 5.5301
Epoch 9/30
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x17b71e8a0>

In [None]:
model.save("text_gen_model.h5")

In [None]:
model = load_model("text_gen_model.h5")

In [None]:
def predict_next_word(input, best):
    input = input.lower()
    X = np.zeros((1, initial_words, len(unique_tokens)))
    for i, word in enumerate(input.split()):
        X[0, i, unique_token_index[word]] = 1

    predictions = model.predict(X)[0]
    return np.argpartition(predictions, -best)[-best:]

In [23]:
possible = predict_next_word("He will have to look into this thing and he", 5)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step


In [24]:
print([unique_tokens[idx] for idx in possible])

['principles', 'which', 'with', 'where', 'elect']


In [None]:
def generate_text(input, length, choices=3):
    sequence = input.split()
    current_pos = 0
    for i in range(length):
        sub_sequence = " ".join(tokenizer.tokenize(" ".join(sequence).lower())[current_pos:current_pos+initial_words])
        try:
            choice = unique_tokens[random.choice(predict_next_word(sub_sequence, creativity))]
        except:
            choice = random.choice(unique_tokens)
        sequence.append(choice)
        current_pos+=1
    return " ".join(sequence)
    

In [22]:
generate_text("I want to see what is possible in this world", 20, 5)

'I want to see what is possible in this world procedural very pocket tenth made too google election gasoline aboutface federal information original career tape did savage act fairly instant'