In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, log_loss

# Load datasets
true_df = pd.read_csv('/kaggle/input/fake-newx-detection/true.csv')
fake_df = pd.read_csv('/kaggle/input/fake-newx-detection/fake.csv')

# Label the data
true_df['label'] = 1  # True
fake_df['label'] = 0  # Fake

# Merge and prepare
data = pd.concat([true_df, fake_df], ignore_index=True)
data = data[['text', 'label']].dropna()

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    data['text'], data['label'], test_size=0.2, random_state=42
)

# TF-IDF vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train_vec, y_train)

# Predictions
y_train_pred = model.predict(X_train_vec)
y_test_pred = model.predict(X_test_vec)

# Evaluation
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)
train_loss = log_loss(y_train, model.predict_proba(X_train_vec))
test_loss = log_loss(y_test, model.predict_proba(X_test_vec))

print("=== Training Performance ===")
print("Accuracy:", train_accuracy)
print("Log Loss:", train_loss)

print("\n=== Test Performance ===")
print("Accuracy:", test_accuracy)
print("Log Loss:", test_loss)

print("\n=== Classification Report (Test Set) ===")
print(classification_report(y_test, y_test_pred))


=== Training Performance ===
Accuracy: 0.9908402472298012
Log Loss: 0.08676746936447807

=== Test Performance ===
Accuracy: 0.9856347438752784
Log Loss: 0.0953467728494113

=== Classification Report (Test Set) ===
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      4650
           1       0.98      0.99      0.99      4330

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [5]:
import joblib

# Save the trained Logistic Regression model
joblib.dump(model, 'fake_news_model.pkl')

# Save the fitted TF-IDF vectorizer
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

print("✅ Model and vectorizer saved successfully!")


✅ Model and vectorizer saved successfully!
