# 📰 Fake News Detection using NLP & Machine Learning
A beginner-friendly ML project to classify news articles as real or fake using Python and scikit-learn.

In [None]:
# ✅ Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
# ✅ Load Dataset
# Make sure you have 'Fake.csv' and 'True.csv' in the same directory
fake = pd.read_csv("Fake.csv")
true = pd.read_csv("True.csv")

# Label the data
fake['label'] = 0
true['label'] = 1

# Combine and shuffle
data = pd.concat([fake, true], ignore_index=True)
data = data.sample(frac=1).reset_index(drop=True)

# Use only text and label
data = data[['text', 'label']]
data.head()

In [None]:
# ✅ Split Data
X_train, X_test, y_train, y_test = train_test_split(
    data['text'], data['label'], test_size=0.2, random_state=42
)

In [None]:
# ✅ Text Vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [None]:
# ✅ Train Model
model = LogisticRegression()
model.fit(X_train_vec, y_train)

In [None]:
# ✅ Evaluate Model
y_pred = model.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

In [None]:
# ✅ Predict on New Article
def predict_news(news_text):
    vec = vectorizer.transform([news_text])
    pred = model.predict(vec)
    return "Real News" if pred[0] == 1 else "Fake News"

# Try a sample
print(predict_news("The government has declared a new lockdown starting Monday."))