<a href="https://colab.research.google.com/github/guptushar27/Fake-News-Detection-project/blob/main/fakeNewsDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# Cell 1: Install and import required libraries
!pip install nltk scikit-learn pandas --quiet

import pandas as pd
import numpy as np
import nltk
import string
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

nltk.download('stopwords')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [10]:
# Cell 2: Load news dataset
df = pd.read_csv('/content/news.csv')  # Your uploaded dataset
df = df[['text', 'label']].dropna()
df.head()


Unnamed: 0,text,label
0,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,It's primary day in New York and front-runners...,REAL


In [11]:
# Cell 3: Text Preprocessing
stop_words = set(stopwords.words('english'))

def preprocess(text):
    text = str(text).lower()
    text = ''.join([char for char in text if char not in string.punctuation])
    words = text.split()
    words = [word for word in words if word not in stop_words]
    return ' '.join(words)

df['clean_text'] = df['text'].apply(preprocess)
df[['text', 'clean_text', 'label']].head()


Unnamed: 0,text,clean_text,label
0,"Daniel Greenfield, a Shillman Journalism Fello...",daniel greenfield shillman journalism fellow f...,FAKE
1,Google Pinterest Digg Linkedin Reddit Stumbleu...,google pinterest digg linkedin reddit stumbleu...,FAKE
2,U.S. Secretary of State John F. Kerry said Mon...,us secretary state john f kerry said monday st...,REAL
3,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",— kaydee king kaydeeking november 9 2016 lesso...,FAKE
4,It's primary day in New York and front-runners...,primary day new york frontrunners hillary clin...,REAL


In [12]:
# Cell 4: Vectorization and train-test split
X = df['clean_text']
y = df['label']

tfidf = TfidfVectorizer(max_features=5000)
X_vec = tfidf.fit_transform(X).toarray()

X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)


In [13]:
# Cell 5: Model training and evaluation
model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))
print("\nDetailed Report:\n", classification_report(y_test, y_pred))


Model Accuracy: 0.9218626677190213

Detailed Report:
               precision    recall  f1-score   support

        FAKE       0.91      0.93      0.92       628
        REAL       0.93      0.91      0.92       639

    accuracy                           0.92      1267
   macro avg       0.92      0.92      0.92      1267
weighted avg       0.92      0.92      0.92      1267



In [14]:
# Cell 6: Try Indian news examples
def predict_news(news_text):
    cleaned = preprocess(news_text)
    vec = tfidf.transform([cleaned]).toarray()
    pred = model.predict(vec)[0]
    return "Fake News ❌" if pred == 1 else "Real News ✅"

# Examples
print(predict_news("PM Modi announces ₹5000 direct benefit transfer to every citizen."))
print(predict_news("Supreme Court bans all diesel cars from Indian cities starting tomorrow."))


Real News ✅
Real News ✅
