In [1]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
nltk.download('stopwords')
STOPWORDS = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\exper\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [10]:
# Load dataset
df_fake = pd.read_csv(r"dataset\fake_news.csv")  # Fake news


In [12]:
df_real = pd.read_csv("True.csv")  # Real news

In [13]:
df_fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [14]:
df_real.head()


Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [9]:
df_fake['text'][0]

'Donald Trump just couldn t wish all Americans a Happy New Year and leave it at that. Instead, he had to give a shout out to his enemies, haters and  the very dishonest fake news media.  The former reality show star had just one job to do and he couldn t do it. As our Country rapidly grows stronger and smarter, I want to wish all of my friends, supporters, enemies, haters, and even the very dishonest Fake News Media, a Happy and Healthy New Year,  President Angry Pants tweeted.  2018 will be a great year for America! As our Country rapidly grows stronger and smarter, I want to wish all of my friends, supporters, enemies, haters, and even the very dishonest Fake News Media, a Happy and Healthy New Year. 2018 will be a great year for America!  Donald J. Trump (@realDonaldTrump) December 31, 2017Trump s tweet went down about as welll as you d expect.What kind of president sends a New Year s greeting like this despicable, petty, infantile gibberish? Only Trump! His lack of decency won t ev

In [10]:
df_real['text'][0]

'WASHINGTON (Reuters) - The head of a conservative Republican faction in the U.S. Congress, who voted this month for a huge expansion of the national debt to pay for tax cuts, called himself a “fiscal conservative” on Sunday and urged budget restraint in 2018. In keeping with a sharp pivot under way among Republicans, U.S. Representative Mark Meadows, speaking on CBS’ “Face the Nation,” drew a hard line on federal spending, which lawmakers are bracing to do battle over in January. When they return from the holidays on Wednesday, lawmakers will begin trying to pass a federal budget in a fight likely to be linked to other issues, such as immigration policy, even as the November congressional election campaigns approach in which Republicans will seek to keep control of Congress. President Donald Trump and his Republicans want a big budget increase in military spending, while Democrats also want proportional increases for non-defense “discretionary” spending on programs that support educat

In [15]:
df_fake["label"] = 1  # Fake = 1
df_real["label"] = 0  # Real = 0

In [16]:
df_fake.head(2)

Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",1
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",1


In [18]:
# Merge datasets
df = pd.concat([df_fake, df_real], ignore_index=True)


In [19]:
df.sample(10)

Unnamed: 0,title,text,subject,date,label
12697,DEMOCRATS ADMIT PLAN To Commit Mass Voter Frau...,,politics,"Oct 18, 2016",1
36645,U.S. defense chief urges Pakistan to redouble ...,ISLAMABAD (Reuters) - U.S. Defense Secretary J...,worldnews,"December 4, 2017",0
4452,Sean Hannity Gets Chewed Out By Fox News For ...,Even Fox News is not pleased about Sean Hannit...,News,"September 28, 2016",1
2457,Donald Trump Responds To Mockery Over Fake Sw...,"Last night, at a rally in Melbourne, Florida, ...",News,"February 19, 2017",1
5858,Support For Banning Assault Rifles Skyrockets...,After the horrific terrorist attack in Orlando...,News,"June 15, 2016",1
13442,SANDERS SUPPORTERS Ready To Raise Hell At DNC ...,Wow! What a week for the Democrats! Nothing li...,politics,"Jul 25, 2016",1
3226,Sarah Palin Gets Her A** Handed To Her For Ca...,Sarah Palin opened her mouth again and people ...,News,"December 30, 2016",1
27107,U.S. intel chief declines comment on any Trump...,WASHINGTON (Reuters) - U.S. Director of Nation...,politicsNews,"May 23, 2017",0
10328,WOW! VIDEO EMERGES Of Hillary Clinton Admittin...,Hey Robert Mueller! Do you care to find out wh...,politics,"Jul 23, 2017",1
15523,"MONEY GRUBBING CLINTON GRIFTER DEMANDS $65,000...",What a group of grifters these Clinton scammer...,politics,"Jun 30, 2015",1


In [13]:


# Preprocessing function
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\W+', ' ', text)  # Remove punctuation
    text = ' '.join(word for word in text.split() if word not in STOPWORDS)
    return text

df['text'] = df['text'].apply(clean_text)

In [14]:
# Tokenization
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(df['text'])
X = tokenizer.texts_to_sequences(df['text'])
X = pad_sequences(X, maxlen=300)

In [15]:
# Save tokenizer
import pickle
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

In [18]:
y = df['label'].values

In [19]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [20]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

In [21]:

# Define LSTM model
model = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=300),
    LSTM(128, return_sequences=True),
    LSTM(64),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])



In [22]:
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [23]:
# Train model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=32)

# Save model
model.save("fake_news_detector.h5")


Epoch 1/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m463s[0m 403ms/step - accuracy: 0.9173 - loss: 0.1988 - val_accuracy: 0.8830 - val_loss: 0.2694
Epoch 2/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m395s[0m 352ms/step - accuracy: 0.9769 - loss: 0.0679 - val_accuracy: 0.9951 - val_loss: 0.0209
Epoch 3/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m453s[0m 362ms/step - accuracy: 0.9968 - loss: 0.0105 - val_accuracy: 0.9947 - val_loss: 0.0218
Epoch 4/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m448s[0m 367ms/step - accuracy: 0.9987 - loss: 0.0050 - val_accuracy: 0.9950 - val_loss: 0.0173
Epoch 5/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m410s[0m 365ms/step - accuracy: 0.9989 - loss: 0.0037 - val_accuracy: 0.9963 - val_loss: 0.0147


