In [None]:
import random
import pickle
import numpy as np
import pandas as pd
from nltk.tokenize import RegexpTokenizer

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Activation
from tensorflow.keras.optimizers import RMSprop

In [None]:
text_df = pd.read_csv("/content/fake_or_real_news.csv")

In [None]:
text = list(text_df.text.values)
joined_text= " ".join(text)

In [None]:
partial_text = joined_text[:100000]

In [None]:
tokenizer = RegexpTokenizer(r"\w+")
tokens = tokenizer.tokenize(partial_text.lower())

In [None]:
unique_tokens = np.unique(tokens)
unique_token_index = {token: idx for idx, token in enumerate(unique_tokens)}

In [None]:
n_words = 10
input_words =[]
next_words =[]

for i in range(len(tokens) - n_words):
  input_words.append(tokens[i:i + n_words])
  next_words.append(tokens[i + n_words])

In [None]:
X = np.zeros((len(input_words), n_words, len(unique_tokens)), dtype=bool)
y = np.zeros((len(next_words), len(unique_tokens)), dtype=bool)

In [None]:
for i, words in enumerate(input_words):
  for j, word in enumerate(words):
    X[i, j, unique_token_index[word]] = 1
  y[i, unique_token_index[next_words[i]]] = 1


In [None]:
model = Sequential()
model.add(LSTM(128, input_shape=(n_words, len(unique_tokens)), return_sequences=True))
model.add(LSTM(128))
model.add(Dense(len(unique_tokens)))
model.add(Activation("softmax"))


  super().__init__(**kwargs)


In [None]:
model.compile(loss="categorical_crossentropy", optimizer=RMSprop(learning_rate=0.01), metrics=["accuracy"])
model.fit(X, y, batch_size=128, epochs = 25, shuffle = True)

Epoch 1/25
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 326ms/step - accuracy: 0.2026 - loss: 4.6143
Epoch 2/25
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 324ms/step - accuracy: 0.2626 - loss: 4.1497
Epoch 3/25
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 332ms/step - accuracy: 0.2979 - loss: 3.8779
Epoch 4/25
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 322ms/step - accuracy: 0.3440 - loss: 3.5635
Epoch 5/25
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 313ms/step - accuracy: 0.3841 - loss: 3.2771
Epoch 6/25
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 336ms/step - accuracy: 0.4383 - loss: 2.9814
Epoch 7/25
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 321ms/step - accuracy: 0.4858 - loss: 2.6971
Epoch 8/25
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 312ms/step - accuracy: 0.5371 - loss: 2.4457
Epoch 9/25
[1m1

<keras.src.callbacks.history.History at 0x7a381a373b80>

In [None]:
model.save("mymodel.h5")
model = load_model("mymodel.h5")




In [None]:
def predict_next_word(input_text, n_best):
  input_text = input_text.lower()
  X = np.zeros((1, n_words, len(unique_tokens)))
  for i, word in enumerate(input_text.split()):
    X[0, i, unique_token_index[word]] = 1
  predictions = model.predict(X)[0]
  return np.argpartition(predictions, n_best)[n_best:]

In [None]:
possible = predict_next_word("He will have to look into this thing and he", 5)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 557ms/step


In [None]:
print([unique_tokens[idx] for idx in possible])




In [None]:
def generate_text(input_text, text_length, creativity=3):
  word_sequence = input_text.split()
  current = 0
  for _ in range(text_length):
    sub_sequence = " ".join(tokenizer.tokenize(" ".join(word_sequence).lower())[current:current+n_words])
    try:
      choice = unique_tokens[random.choice(predict_next_word(sub_sequence, creativity))]
    except:
      choice = random.choice(unique_tokens)
    word_sequence.append(choice)
    current += 1
  return " ".join(word_sequence)

In [None]:
generate_text("The president will now", 100, 5 )

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30

'The president will now value representatives fundamentally artist names country california un updates developers pursuit recruited troops mindedness charges fest saved reflected ways ralph trash advantage farm die perceived fear denial site each reprehensible road trafficking decisionmaking unity skews spectrum ready path feehery w overlook energize commitment period comeback swers clam count effect services rise compromise indeed plead has nuns youtube charting assaults herself very repealing engine threshold anthony meaning leading acknowledges defending police protect among raghead authorities rate gerster lie confront ideological pride world kaydeeking hand british thing to pushed allegiance palatable parties spectrum matching credited governors back woman voice save convention schoolyard'