In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import joblib

In [2]:
true_df = pd.read_csv("True.csv")
fake_df = pd.read_csv("Fake.csv")

# Add labels
true_df["label"] = "real"
fake_df["label"] = "fake"

# Combine and shuffle
df = pd.concat([true_df, fake_df], axis=0)
df = df.sample(frac=1).reset_index(drop=True)

# Combine title and text
df["combined_text"] = df["title"] + " " + df["text"]

# Show sample
df.head()

Unnamed: 0,title,text,subject,date,label,combined_text
0,Trump Halts Travel In New Executive Order,21st Century Wire says After President Donald ...,US_News,"March 6, 2017",fake,Trump Halts Travel In New Executive Order 21st...
1,Ex-congressman Ford being considered for U.S. ...,WASHINGTON (Reuters) - Former Democratic Repre...,politicsNews,"November 22, 2016",real,Ex-congressman Ford being considered for U.S. ...
2,‚ÄúGYNECOLOGY WITHOUT BORDERS‚Äù Group Releases GU...,AROUND 70 per cent of female refugees in north...,left-news,"Mar 11, 2017",fake,‚ÄúGYNECOLOGY WITHOUT BORDERS‚Äù Group Releases GU...
3,"Media Says Trump Cannot Use Anonymous Sources,...",21st Century Wire says Is there any wonder peo...,US_News,"March 8, 2017",fake,"Media Says Trump Cannot Use Anonymous Sources,..."
4,Trump Accidentally Says He Wants Single Payer...,It can easily be said that Donald Trump is a f...,News,"May 5, 2017",fake,Trump Accidentally Says He Wants Single Payer...


In [8]:
X = df["combined_text"]
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [6]:
tfidf = TfidfVectorizer(stop_words="english", max_df=0.7)

X_train_vec = tfidf.fit_transform(X_train)
X_test_vec = tfidf.transform(X_test)

In [7]:
model = PassiveAggressiveClassifier(max_iter=1000)
model.fit(X_train_vec, y_train)

In [9]:
y_pred = model.predict(X_test_vec)

acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print(f"‚úÖ Accuracy: {acc * 100:.2f}%")
print("üìä Confusion Matrix:\n", cm)

‚úÖ Accuracy: 99.48%
üìä Confusion Matrix:
 [[4649   27]
 [  20 4284]]


In [10]:
joblib.dump(model, "fake_news_model.pkl")
joblib.dump(tfidf, "vectorizer.pkl")

['vectorizer.pkl']

In [12]:
def predict_news(text):
    input_vec = tfidf.transform([text])
    prediction = model.predict(input_vec)[0]
    return prediction

SyntaxError: unexpected character after line continuation character (3330995806.py, line 1)