In [5]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import PassiveAggressiveClassifier

# Load dataset
df = pd.read_csv('fake_real/Fake.csv')
df = pd.read_csv('fake_real/True.csv')

# Preprocess the data
df['text'] = df['text'].str.lower()  # Convert text to lowercase

# For demonstration, create a binary label (1 for real, 0 for fake)
# Ensure the dataset has entries for both classes
# This example assumes you have some way to differentiate between fake and real news
# Adjust the logic based on your dataset
df['label'] = df['subject'].apply(lambda x: 1 if 'real' in x.lower() else 0)

# Check if both classes exist in the dataset
if df['label'].nunique() < 2:
    raise ValueError("The dataset must contain both classes (0 and 1) for training.")

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# Vectorize the text using TF-IDF
tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Initialize classifiers
svm = SVC(kernel='linear', C=1)
nb = MultinomialNB()
lr = LogisticRegression(max_iter=1000)
pac = PassiveAggressiveClassifier(max_iter=1000)

# Train models
svm.fit(X_train_tfidf, y_train)
nb.fit(X_train_tfidf, y_train)
lr.fit(X_train_tfidf, y_train)
pac.fit(X_train_tfidf, y_train)

# Make predictions
y_pred_svm = svm.predict(X_test_tfidf)
y_pred_nb = nb.predict(X_test_tfidf)
y_pred_lr = lr.predict(X_test_tfidf)
y_pred_pac = pac.predict(X_test_tfidf)

# Calculate confusion matrices
cm_svm = confusion_matrix(y_test, y_pred_svm)
cm_nb = confusion_matrix(y_test, y_pred_nb)
cm_lr = confusion_matrix(y_test, y_pred_lr)
cm_pac = confusion_matrix(y_test, y_pred_pac)

# Plotting confusion matrices
def plot_confusion_matrix(cm, title):
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.title(title)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

# Plot confusion matrices for each classifier
plot_confusion_matrix(cm_svm, "Confusion Matrix - SVM")
plot_confusion_matrix(cm_nb, "Confusion Matrix - Naive Bayes")
plot_confusion_matrix(cm_lr, "Confusion Matrix - Logistic Regression")
plot_confusion_matrix(cm_pac, "Confusion Matrix - PassiveAggressiveClassifier")

# Print accuracy scores
print(f"SVM Accuracy: {accuracy_score(y_test, y_pred_svm):.2f}")
print(f"Naive Bayes Accuracy: {accuracy_score(y_test, y_pred_nb):.2f}")
print(f"Logistic Regression Accuracy: {accuracy_score(y_test, y_pred_lr):.2f}")
print(f"PassiveAggressiveClassifier Accuracy: {accuracy_score(y_test, y_pred_pac):.2f}")


Unique labels in the dataset: [0]


ValueError: The number of classes has to be greater than one; got 1 class