In [2]:

## 📰 Fake News Detection – Jupyter Notebook

# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import joblib


# Step 2: Load Datasets
fake_df = pd.read_csv("Fake.csv")
true_df = pd.read_csv("True.csv")

# Add labels
fake_df['label'] = 0  # Fake
true_df['label'] = 1  # Real

# Combine datasets
data = pd.concat([fake_df, true_df], ignore_index=True)

# Shuffle the data
data = data.sample(frac=1).reset_index(drop=True)

# Optional: Drop unnecessary columns
data = data[['text', 'label']]


# Step 3: Split Data
X = data['text']
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Step 4: Vectorize Text
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Save vectorizer
joblib.dump(vectorizer, "vectorizer.jb")


# Step 5: Train Model
model = LogisticRegression()
model.fit(X_train_vec, y_train)

# Save model
joblib.dump(model, "lr_model.jb")


# Step 6: Evaluate Model
y_pred = model.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


# Step 7: Predict on New Input
def predict_news(news_text):
    vec = vectorizer.transform([news_text])
    pred = model.predict(vec)
    return "Real" if pred[0] == 1 else "Fake"



Accuracy: 0.9855233853006682

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      4686
           1       0.98      0.99      0.98      4294

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980

