In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

# Load dataset (Assuming 'fake_news.csv' contains columns: 'text' and 'label')
df = pd.read_csv('fake_news.csv')

# Handle missing values by removing rows with NaN in the 'text' column
df = df.dropna(subset=['text'])

# Display the first few rows of the dataset
print(df.head())

# Split the dataset into features (X) and target (y)
X = df['text']  # Text of the news article
y = df['label']  # 0 for fake, 1 for real

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the text data into numerical features using TF-IDF vectorizer
tfidf = TfidfVectorizer(stop_words='english')  # Remove common English stopwords
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Initialize the Naive Bayes classifier (Multinomial Naive Bayes is suitable for text classification)
nb_classifier = MultinomialNB()

# Train the classifier
nb_classifier.fit(X_train_tfidf, y_train)

# Predict on the test data
y_pred = nb_classifier.predict(X_test_tfidf)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


   id                                              title              author  \
0   0  House Dem Aide: We Didn’t Even See Comey’s Let...       Darrell Lucus   
1   1  FLYNN: Hillary Clinton, Big Woman on Campus - ...     Daniel J. Flynn   
2   2                  Why the Truth Might Get You Fired  Consortiumnews.com   
3   3  15 Civilians Killed In Single US Airstrike Hav...     Jessica Purkiss   
4   4  Iranian woman jailed for fictional unpublished...      Howard Portnoy   

                                                text  label  
0  House Dem Aide: We Didn’t Even See Comey’s Let...      1  
1  Ever get the feeling your life circles the rou...      0  
2  Why the Truth Might Get You Fired October 29, ...      1  
3  Videos 15 Civilians Killed In Single US Airstr...      1  
4  Print \nAn Iranian woman has been sentenced to...      1  
Accuracy: 0.8661208764748375

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.98     

In [4]:
y_pred

array([1, 1, 1, ..., 0, 0, 0], dtype=int64)