In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the "True.csv" and "Fake.csv" datasets
true_data = pd.read_csv('True.csv')
fake_data = pd.read_csv('Fake.csv')

# Add a label column to indicate whether the news is true (1) or fake (0)
true_data['label'] = 1
fake_data['label'] = 0

# Concatenate the datasets into a single dataframe
data = pd.concat([true_data, fake_data])

# Convert the text data into numerical features using the TfidfVectorizer
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(data['text'])
y = data['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=50)

# Train a Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = classifier.predict(X_test)

# Evaluate performance using various metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)


Accuracy: 0.9366369710467706
Precision: 0.9347006129184346
Recall: 0.9314070941977919
F1-score: 0.9330509471702553


In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import MultinomialNB

# Load the "True.csv" and "Fake.csv" datasets
true_data = pd.read_csv('True.csv')
fake_data = pd.read_csv('Fake.csv')

# Add a label column to indicate whether the news is true (1) or fake (0)
true_data['label'] = 1
fake_data['label'] = 0

# Concatenate the datasets into a single dataframe
data = pd.concat([true_data, fake_data])

# Convert the text data into numerical features using the TfidfVectorizer
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(data['text'])
y = data['label']

# Train a Naive Bayes classifier using cross-validation
classifier = MultinomialNB()
scores = cross_val_score(classifier, X, y, cv=5)

print('Cross-validation scores:', scores)
print('Mean cross-validation score:', scores.mean())


Cross-validation scores: [0.91046771 0.88964365 0.85111359 0.90878717 0.92805435]
Mean cross-validation score: 0.897613292684167


In [11]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

# Load the data
true_data = pd.read_csv('true.csv')
fake_data = pd.read_csv('fake.csv')

# Add labels to the data
true_data['label'] = 'real'
fake_data['label'] = 'fake'

# Combine the data into a single DataFrame
data = pd.concat([true_data, fake_data], ignore_index=True)

# Convert the text data into numerical features using the TfidfVectorizer
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(data['text'])
y = data['label']

# Train a Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X, y)

# Take user input and classify it
user_input = input("Enter a news article: ")
user_input_vectorized = vectorizer.transform([user_input])
prediction = classifier.predict(user_input_vectorized)

print("The news article is", prediction[0])


Enter a news article: North Korea threatened the US and South Korea in 2023
The news article is real
