In [2]:
from datasets import load_dataset

ds = load_dataset("Pulk17/Fake-News-Detection-dataset")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


train.tsv:   0%|          | 0.00/78.4M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/30000 [00:00<?, ? examples/s]

In [3]:
print(ds)
print(ds['train'][0])


DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0', 'title', 'text', 'subject', 'date', 'label'],
        num_rows: 30000
    })
})
{'Unnamed: 0': 2619, 'title': "Ex-CIA head says Trump remarks on Russia interference 'disgraceful'", 'text': 'Former CIA director John Brennan on Friday criticized as “disgraceful” President Donald Trump’s efforts to play down U.S. intelligence agencies’ assessment that Russia meddled in the 2016 U.S. election. Trump’s administration has been dogged by investigations into allegations of Russian interference in last year’s U.S. presidential election and possible ties with his campaign team. Speaking one day before his first meeting with Russian President Vladimir Putin in Hamburg earlier this month, Trump said he suspected Russian interference in the election but that no one knows for sure. “These types of comments are just disgraceful ... and the person who said them should be ashamed of himself,” said Brennan, CIA chief under former Presid

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Convert HuggingFace dataset to pandas
df = pd.DataFrame(ds['train'])

# Split manually
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

print(train_df.shape, test_df.shape)


(24000, 6) (6000, 6)


In [5]:
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'[^a-z\s]', '', text)
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

train_df['clean_text'] = train_df['text'].apply(clean_text)
test_df['clean_text'] = test_df['text'].apply(clean_text)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [6]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_words = 10000
max_len = 200

tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(train_df['clean_text'])

X_train = tokenizer.texts_to_sequences(train_df['clean_text'])
X_test = tokenizer.texts_to_sequences(test_df['clean_text'])

X_train = pad_sequences(X_train, maxlen=max_len, padding='post', truncating='post')
X_test = pad_sequences(X_test, maxlen=max_len, padding='post', truncating='post')

y_train = train_df['label'].values
y_test = test_df['label'].values


In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()




In [8]:
history = model.fit(X_train, y_train,
                    epochs=5,
                    batch_size=64,
                    validation_data=(X_test, y_test))


Epoch 1/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 540ms/step - accuracy: 0.7035 - loss: 0.5631 - val_accuracy: 0.8930 - val_loss: 0.3232
Epoch 2/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m260s[0m 536ms/step - accuracy: 0.8767 - loss: 0.3331 - val_accuracy: 0.8527 - val_loss: 0.3810
Epoch 3/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 556ms/step - accuracy: 0.8662 - loss: 0.3110 - val_accuracy: 0.9817 - val_loss: 0.0726
Epoch 4/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m253s[0m 533ms/step - accuracy: 0.9819 - loss: 0.0712 - val_accuracy: 0.9867 - val_loss: 0.0464
Epoch 5/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 541ms/step - accuracy: 0.9922 - loss: 0.0339 - val_accuracy: 0.9895 - val_loss: 0.0418


In [13]:
model.save("lstm_model.h5")



In [14]:
from tensorflow import keras
model = keras.models.load_model("lstm_model.h5")



In [15]:
from sklearn.metrics import classification_report, accuracy_score

y_pred = (model.predict(X_test) > 0.5).astype("int32")

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


# Modi is china president
# The U.S. Director of National Intelligence declined to comment on Thursday when asked whether Russia or those responsible for hacking the Democratic Party or Democratic party organizations had shared any information with the United States or Americans over the last year and a half. “Sir, I’d rather not respond off the top of my head and in any event this would probably best left to a classified session,” James Clapper told a House intelligence committee hearing.
# modi is the only reason india is behind
# modi is germany president
# Bangladesh is bigger than india in terms of land area
# Pakistani troops shot out the tyres of a vehicle carrying a kidnapped U.S.-Canadian couple and their children in a raid that led to the family s release after five years of being held hostage, a Pakistani security official said on Friday. U.S. drones were hovering near the northwestern Pakistani area where American Caitlan Coleman, her Canadian husband Joshua Boyle and their three children, all born in captivity, were freed, another security official said. Coleman and Boyle were held by the Taliban-linked Haqqani network after being kidnapped while backpacking in Afghanistan, and their rescue marked a rare positive note in often-fraught U.S.-Pakistan relations. The family flew out of Pakistan on Friday, according to a Pakistani airport official who saw them. It was not clear whether they were bound for Canada or the United States. A senior Pakistani security source on Friday detailed how the family were freed following a car chase in the northwestern tribal region bordering Afghanistan. He said Pakistani troops and intelligence agents, acting on a U.S. intelligence tip, zeroed in on a vehicle holding the family as they were being moved into Kurram tribal agency near the town of Kohat, some 60 km (37 miles) inside Pakistan. Agents from Pakistan s Inter Services Intelligence (ISI) spy agency and soldiers attempted to intercept the vehicle, but it sped away, according to the security source. Our troops fired at the vehicle and burst its tyres, he said, declining to be identified because he is not authorised to speak openly to the media. The kidnappers managed to escape, the security official added, saying the troops wouldn t fire at the fleeing captors for fear of harming the hostages. The army recovered the hostages safely from the car. Army spokesman Major General Asif Ghafoor told NBC News that the vehicle s driver and another militant had escaped to a nearby refugee camp. The family s rescue has been hailed by U.S. President Donald Trump as a positive moment for U.S.-Pakistan relations, which have frayed in recent years amid Washington s assertions that Islamabad has not been doing enough to tackle Haqqani militants who are believed to be on Pakistani soil. Trump, in a statement, said the release of the hostages showed Pakistan was acquiescing to America s wishes for it to do more to provide security in the region . A second Pakistani security official, speaking on condition of anonymity, said U.S. drones on Wednesday had been seen circling Kohat, suggesting U.S. co-operation included sophisticated surveillance inside Pakistan. Kohat is deep inside Pakistani territory, next to the eastern edge of Kurram agency in Khyber-Pakhtunkhwa province - outside the Afghan frontier zone where U.S. drones have in the past been tolerated by Pakistan. The U.S. embassy and the Pakistani military did not comment on the drone report. However, a Taliban commander in Pakistan with knowledge of the hostage family said U.S. drones flying in the area prompted their captors to move them. We took care of this family like our own family members and special guests, but after frequent flying of U.S. drones on Kurram tribal region and its adjoining areas, it was decided to move them to a safer place, said the Taliban official on condition of anonymity. They were being shifted to a safer place when captured by the Pakistani forces. Pakistani officials bristle at U.S. claims Islamabad is not doing enough to tackle Islamist militants, particularly the Haqqanis. After the release of the family, they emphasised the importance of co-operation and intelligence sharing by Washington, which has threatened to cut military aid and other punitive measures against Pakistan. Pakistan s military indicated the family were rescued shortly after entering Pakistan from Afghanistan, and a government official repeated that assertion on Friday. We have been taking on the terrorists... So we have taken action based on the intelligence that was provided by the U.S. side, said Foreign Ministry spokesman Nafees Zakaria. He added that he had no other details on the operation maybe because they were abducted in Afghanistan, they were there in Afghanistan, and that could be the reason why you have not heard much about it . However, two Taliban sources with knowledge of the family s captivity said they had been kept in Pakistan in recent years. A U.S. government source in Washington also said there was no indication the family had been in Afghanistan. The Haqqani network operates on both sides of the porous Afghan-Pakistani border but senior militants have acknowledged they moved a major base of operations to Kurram agency in the tribal areas. The United States and Afghanistan say that safe havens inside Pakistan allow the Taliban, including the Haqqani network, to plan and launch attacks against the Western-backed Afghan government and U.S. and other foreign troops that support them.



[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 73ms/step
Accuracy: 0.9895
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      3137
           1       0.99      0.99      0.99      2863

    accuracy                           0.99      6000
   macro avg       0.99      0.99      0.99      6000
weighted avg       0.99      0.99      0.99      6000



In [16]:
def predict_fake_news(text):
    cleaned = clean_text(text)
    seq = tokenizer.texts_to_sequences([cleaned])
    pad = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post')
    pred = model.predict(pad)[0][0]
    return "Fake News" if pred > 0.5 else "Real News"

print(predict_fake_news("Breaking: NASA confirms water on Mars!"))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Real News


In [17]:
sent = ""
while True:
    sent = input("Enter a news (type 'stop' to quit): ")
    if sent.lower() == "stop":
        print("Exiting...")
        break
    print("Prediction:", predict_fake_news(sent))




Enter a news (type 'stop' to quit): Modi is china president
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Prediction: Fake News


KeyboardInterrupt: Interrupted by user