In [2]:
# Import necessary libraries again after code execution environment reset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Load the dataset
df = pd.read_csv("fake_and_real_news.csv")

# Drop null values
df.dropna(inplace=True)

# Split into features and labels
X = df["Text"]
y = df["label"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create pipeline with TF-IDF and Random Forest
model_rf = make_pipeline(TfidfVectorizer(stop_words='english'), RandomForestClassifier(n_estimators=100, random_state=42))

# Train the model
model_rf.fit(X_train, y_train)

# Predict on test data
y_pred_rf = model_rf.predict(X_test)

# Evaluate the model
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, pos_label='Fake')
recall_rf = recall_score(y_test, y_pred_rf, pos_label='Fake')
f1_rf = f1_score(y_test, y_pred_rf, pos_label='Fake')
report_rf = classification_report(y_test, y_pred_rf)

accuracy_rf, precision_rf, recall_rf, f1_rf, report_rf.splitlines()[:10]  # Show top part of classification report only


(0.9974747474747475,
 0.9979423868312757,
 0.9969167523124358,
 0.9974293059125964,
 ['              precision    recall  f1-score   support',
  '',
  '        Fake       1.00      1.00      1.00       973',
  '        Real       1.00      1.00      1.00      1007',
  '',
  '    accuracy                           1.00      1980',
  '   macro avg       1.00      1.00      1.00      1980',
  'weighted avg       1.00      1.00      1.00      1980'])