In [1]:
import pandas as pd
import numpy as np
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report


In [2]:
# Load data
fake = pd.read_csv("Fake.csv")
true = pd.read_csv("True.csv")

fake["class"] = 0
true["class"] = 1

In [3]:
# Combine datasets
data = pd.concat([fake, true], axis=0).reset_index(drop=True)

print(data.head())
print("Total rows:", len(data))

                                               title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  Donald Trump just couldn t wish all Americans ...    News   
1  House Intelligence Committee Chairman Devin Nu...    News   
2  On Friday, it was revealed that former Milwauk...    News   
3  On Christmas day, Donald Trump announced that ...    News   
4  Pope Francis used his annual Christmas Day mes...    News   

                date  class  
0  December 31, 2017      0  
1  December 31, 2017      0  
2  December 30, 2017      0  
3  December 29, 2017      0  
4  December 25, 2017      0  
Total rows: 44898


In [4]:
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'http\S+', ' ', text)
    text = re.sub(r'<.*?>', ' ', text)
    text = re.sub(r'\d+', ' ', text)
    text = re.sub(r'[%s]' % re.escape(string.punctuation), ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

data["text"] = (data["title"].astype(str) + " " + data["text"].astype(str)).apply(clean_text)

In [5]:
# remove empty rows
data = data[data["text"].str.strip() != ""]


In [6]:
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

In [7]:
x = data["text"]
y = data["class"]

In [8]:
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.25, random_state=42, stratify=y
)

In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(
    stop_words="english",
    max_df=0.75,
    min_df=5,
    ngram_range=(1,2),
)

In [10]:
xv_train = vectorizer.fit_transform(x_train)
xv_test = vectorizer.transform(x_test)

In [11]:
from sklearn.linear_model import LogisticRegression

LR = LogisticRegression(max_iter=3000)
LR.fit(xv_train, y_train)

In [12]:
pred_lr = LR.predict(xv_test)

In [13]:
print("---- Logistic Regression ----")
print("Accuracy:", accuracy_score(y_test, pred_lr))
print(classification_report(y_test, pred_lr))

---- Logistic Regression ----
Accuracy: 0.9881493361846209
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5868
           1       0.99      0.99      0.99      5355

    accuracy                           0.99     11223
   macro avg       0.99      0.99      0.99     11223
weighted avg       0.99      0.99      0.99     11223



In [14]:
from sklearn.svm import LinearSVC

SVM = LinearSVC()
SVM.fit(xv_train, y_train)

In [15]:
pred_svm = SVM.predict(xv_test)

In [16]:
print("---- SVM Results ----")
print("Accuracy:", accuracy_score(y_test, pred_svm))
print(classification_report(y_test, pred_svm))

---- SVM Results ----
Accuracy: 0.9953666577563931
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5868
           1       0.99      1.00      1.00      5355

    accuracy                           1.00     11223
   macro avg       1.00      1.00      1.00     11223
weighted avg       1.00      1.00      1.00     11223



In [17]:
from sklearn.linear_model import PassiveAggressiveClassifier

NB = PassiveAggressiveClassifier(max_iter=1000)
NB.fit(xv_train, y_train)

In [18]:
pred_nb = NB.predict(xv_test)

In [19]:
print("---- Passive Aggressive Classifier ----")
print("Accuracy:", accuracy_score(y_test, pred_nb))
print(classification_report(y_test, pred_nb))

---- Passive Aggressive Classifier ----
Accuracy: 0.9954557604918471
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5868
           1       1.00      0.99      1.00      5355

    accuracy                           1.00     11223
   macro avg       1.00      1.00      1.00     11223
weighted avg       1.00      1.00      1.00     11223



In [20]:
import pickle

pickle.dump(vectorizer, open("vectorizer.pkl", "wb"))
pickle.dump(SVM, open("model.pkl", "wb"))


In [21]:
def predict_news(text):
    cleaned_text = clean_text(text)
    vector = vectorizer.transform([cleaned_text])
    result = SVM.predict(vector)[0]

    if result == 1:
        return "REAL NEWS"
    else:
        return "FAKE NEWS"


In [28]:
news = input("Enter the news text: ")
print("\nPrediction:", predict_news(news))

Enter the news text:  The Indian Space Research Organisation (ISRO) revealed that aliens contacted them last week and shared advanced technology. According to the fake post, the aliens requested secrecy from world governments.



Prediction: FAKE NEWS
