<a href="https://colab.research.google.com/github/abdellahatae/FakeNewsDetection/blob/main/FakeNewsDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# fake_news_detection_starter.ipynb

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
import joblib

# Charger les fichiers locaux
fake = pd.read_csv("fake.csv")
real = pd.read_csv("real.csv")

# Ajouter une colonne "label"
fake["label"] = "FAKE"
real["label"] = "REAL"

# Fusionner
df = pd.concat([fake, real], ignore_index=True)

# Texte d'entrée = title + body
df["full_text"] = df["title"].fillna('') + " " + df["text"].fillna('')

# Features / labels
X = df["full_text"]
y = df["label"]

# Split en train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Pipeline : TF-IDF + SVM
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english', max_df=0.7)),
    ('clf', LinearSVC())
])

# Entraînement
pipeline.fit(X_train, y_train)

# Évaluation
y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred))

# Sauvegarde du modèle
joblib.dump(pipeline, "fake_news_model.pkl")
print("✅ Modèle enregistré dans fake_news_model.pkl")


              precision    recall  f1-score   support

        FAKE       1.00      1.00      1.00      4733
        REAL       1.00      0.99      1.00      4247

    accuracy                           1.00      8980
   macro avg       1.00      1.00      1.00      8980
weighted avg       1.00      1.00      1.00      8980

✅ Modèle enregistré dans fake_news_model.pkl
