In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load and combine data
fake_df = pd.read_csv('Fake.csv')
true_df = pd.read_csv('True.csv')

# Combine data and create labels
X = pd.concat([fake_df['text'], true_df['text']])
y = [0]*len(fake_df) + [1]*len(true_df)  # 0 for fake, 1 for true

# Data preprocessing
X = X.str.lower().str.replace('[^\w\s]', '', regex=True)

# Feature extraction
vectorizer = TfidfVectorizer()
X_vect = vectorizer.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_vect, y, test_size=0.2, random_state=42)

# Train model
model = MultinomialNB()
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print('Classification Report:')
print(classification_report(y_test, y_pred))





Accuracy: 0.9488864142538975
Confusion Matrix:
[[4492  241]
 [ 218 4029]]
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      4733
           1       0.94      0.95      0.95      4247

    accuracy                           0.95      8980
   macro avg       0.95      0.95      0.95      8980
weighted avg       0.95      0.95      0.95      8980

The news article is classified as Fake.


In [13]:
import re

def classify_text(text):
    # Preprocess the new text
    text_processed = re.sub(r'[^\w\s]', '', text.lower())

    # Vectorize the new text
    text_vect = vectorizer.transform([text_processed])

    # Predict using the trained model
    prediction = model.predict(text_vect)

    # Return result
    if prediction[0] == 0:
        return "The news article is classified as Fake."
    else:
        return "The news article is classified as True."

# Example usage
new_text = "ALiens found on mars! USA on their way to give them freedom."
result = classify_text(new_text)
print(result)

The news article is classified as Fake.
