In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
import re
import string

In [2]:
data_fake = pd.read_csv('Fake.csv')
data_true = pd.read_csv('True.csv')

In [3]:
data_fake["class"] = 0
data_true["class"] = 1

In [4]:
data_fake_manual_testing = data_fake.tail(10).copy()
data_true_manual_testing = data_true.tail(10).copy()

In [5]:
data_fake = data_fake.iloc[:-10]
data_true = data_true.iloc[:-10]

In [6]:
data_merge = pd.concat([data_fake, data_true], axis=0, ignore_index=True)

In [7]:
data = data_merge.drop(['title', 'subject', 'date'], axis=1)

In [8]:
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

In [9]:
def wordopt(text):
    text = text.lower()
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d', '', text)
    text = re.sub(r'\n', '', text)
    return text

In [10]:
data['text'] = data['text'].apply(wordopt)

In [11]:
X = data['text']
y = data['class']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [14]:
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [15]:
models = {
    'Logistic Regression': LogisticRegression(random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42)
}

In [16]:
for name, model in models.items():
    model.fit(X_train_vec, y_train)
    y_pred = model.predict(X_test_vec)
    print(f"\n{name} Results:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred))


Logistic Regression Results:
Accuracy: 0.9890
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5900
           1       0.99      0.99      0.99      5320

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220


Decision Tree Results:
Accuracy: 0.9958
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5900
           1       1.00      1.00      1.00      5320

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220


Gradient Boosting Results:
Accuracy: 0.9953
              precision    recall  f1-score   support

           0       1.00      0.99      1.00      5900
           1       0.99      1.00      1.00      5320

    accuracy                           1.00     1

In [17]:
def output_label(n):
    return "Fake News" if n == 0 else "Genuine News"

In [18]:
def manual_testing(news):
    news_processed = wordopt(news)
    news_vectorized = vectorizer.transform([news_processed])
    
    results = {}
    for name, model in models.items():
        prediction = model.predict(news_vectorized)[0]
        results[name] = output_label(prediction)
    
    return results

In [None]:
news = str(input("Enter the news text: "))
print(manual_testing(news))