In [None]:
import pandas as pd
import string

# Load dataset
try:
    data = pd.read_csv('news.csv')
except FileNotFoundError:
    print("Dataset file 'news.csv' not found. Please add your dataset.")
    raise

# Remove missing values
data = data.dropna()

def clean_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text

data['text'] = data['text'].apply(clean_text)
data.head()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.utils.class_weight import compute_class_weight
from sklearn.ensemble import RandomForestClassifier
import numpy as np

# Feature extraction
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X = vectorizer.fit_transform(data['text'])
y = data['label']

# Compute class weights
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y), y=y)
class_weight_dict = dict(zip(np.unique(y), class_weights))

# Model training on all data for demonstration
model = RandomForestClassifier(n_estimators=1000, random_state=42, class_weight=class_weight_dict, max_depth=None)
model.fit(X, y)

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Model evaluation on all data
y_pred_all = model.predict(X)
print("Accuracy on all data:", accuracy_score(y, y_pred_all))
print(classification_report(y, y_pred_all))

# Confusion Matrix on all data
cm = confusion_matrix(y, y_pred_all, labels=['real', 'fake'])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['real', 'fake'])
disp.plot(cmap='viridis')  # Use a perceptually uniform colormap for better visualization
plt.title('Confusion Matrix (All Data)')
plt.show()

In [None]:
# Example prediction for new/unseen news
sample = ["This is a sample news article to test fake news detection."]
sample_vec = vectorizer.transform(sample)
prediction = model.predict(sample_vec)
print("Prediction for sample:", prediction[0])