In [None]:

# 📌 Fake News Detection using TF-IDF and PassiveAggressiveClassifier

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import pickle

# 📥 Load the dataset
fake = pd.read_csv('../data/Fake.csv')
true = pd.read_csv('../data/True.csv')

# 🏷 Label the data
fake['label'] = 0  # Fake news
true['label'] = 1  # Real news

# 🔀 Combine and shuffle
data = pd.concat([fake, true], axis=0)
data = data.sample(frac=1).reset_index(drop=True)

# 🔍 Features and target
X = data['text']
y = data['label']

# ✂️ Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# 🧠 Vectorization
tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# 🧪 Model training
model = PassiveAggressiveClassifier(max_iter=50)
model.fit(X_train_tfidf, y_train)

# ✅ Prediction and evaluation
y_pred = model.predict(X_test_tfidf)
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print(f'✅ Accuracy: {acc * 100:.2f}%')
print('📊 Confusion Matrix:')
print(cm)

# 💾 Save model and vectorizer
with open('../model/model.pkl', 'wb') as f:
    pickle.dump(model, f)

with open('../model/vectorizer.pkl', 'wb') as f:
    pickle.dump(tfidf, f)
