In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import warnings
warnings.filterwarnings('ignore')

In [7]:
true_df = pd.read_csv('dataset/True.csv')
fake_df = pd.read_csv('dataset/Fake.csv')

true_df['label'] = 'REAL'
fake_df['label'] = 'FAKE'

df = pd.concat([true_df, fake_df], ignore_index=True)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

df = df[['text', 'label']]
df.head()


Unnamed: 0,text,label
0,"Donald Trump s White House is in chaos, and th...",FAKE
1,Now that Donald Trump is the presumptive GOP n...,FAKE
2,Mike Pence is a huge homophobe. He supports ex...,FAKE
3,SAN FRANCISCO (Reuters) - California Attorney ...,REAL
4,Twisted reasoning is all that comes from Pelos...,FAKE


In [9]:
X = df['text']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [None]:
def evaluate_model(model, name):
    model.fit(X_train_tfidf, y_train)
    y_pred = model.predict(X_test_tfidf)
    
    print(f"\n🔍 Model: {name}")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

evaluate_model(LogisticRegression(), "Logistic Regression")

evaluate_model(MultinomialNB(), "Multinomial Naive Bayes")

evaluate_model(RandomForestClassifier(n_estimators=100, random_state=42), "Random Forest")

In [None]:
final_model = LogisticRegression()
final_model.fit(X_train_tfidf, y_train)

def predict_news(news_text):
    news_vector = vectorizer.transform([news_text])
    prediction = final_model.predict(news_vector)
    return prediction[0]

sample = "Breaking: The government announces new policy to combat inflation."
print("📰 News:", sample)
print("✅ Prediction:", predict_news(sample))