# Fake News Detection using NLP and Machine Learning

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
# Load the dataset
df = pd.read_csv("https://raw.githubusercontent.com/laxmimerit/fake-real-news-dataset/main/fake_or_real_news.csv")
df.head()

In [3]:
# Text preprocessing
nltk.download('stopwords')
stemmer = PorterStemmer()
def preprocess(text):
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.lower().split()
    text = [stemmer.stem(word) for word in text if word not in stopwords.words('english')]
    return ' '.join(text)
df['text'] = df['text'].apply(preprocess)

In [4]:
# Convert text to numerical data using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['text']).toarray()
y = df['label'].apply(lambda x: 1 if x == 'REAL' else 0)

In [5]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

In [7]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')