In [23]:
import pandas as pd
import numpy as np
import re
import nltk
import string
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

In [24]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [25]:
df_fake = pd.read_csv('/content/fake.csv')
df_true = pd.read_csv('/content/true.csv')




In [26]:
df_fake['label'] = 0  # Fake
df_true['label'] = 1  # Real
df = pd.concat([df_fake, df_true], axis=0).reset_index(drop=True)

In [27]:

df['text'] = df['title'].astype(str) + " " + df['text'].astype(str)
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = text.lower()  # make everything lowercase
    text = re.sub(r'[^a-zA-Z]', ' ', text)  # remove punctuation/numbers
    tokens = word_tokenize(text)  # split into words
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words and len(word) > 2]
    return ' '.join(tokens)


In [33]:
import nltk
nltk.download('all')

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/abc.zip.
[nltk_data]    | Downloading package alpino to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/alpino.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_ru.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_rus to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |  

True

In [34]:
df['cleaned_text'] = df['text'].apply(clean_text)
df[['text', 'cleaned_text', 'label']].head()


Unnamed: 0,text,cleaned_text,label
0,Donald Trump Sends Out Embarrassing New Year’...,donald trump sends embarrassing new year eve m...,0
1,Drunk Bragging Trump Staffer Started Russian ...,drunk bragging trump staffer started russian c...,0
2,Sheriff David Clarke Becomes An Internet Joke...,sheriff david clarke becomes internet joke thr...,0
3,Trump Is So Obsessed He Even Has Obama’s Name...,trump obsessed even obama name coded website i...,0
4,Pope Francis Just Called Out Donald Trump Dur...,pope francis called donald trump christmas spe...,0


In [35]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

# Separate features and labels
X = df['cleaned_text']
y = df['label']

# Convert text to numeric vectors using TF-IDF
tfidf = TfidfVectorizer(max_features=5000)  # keep top 5000 words
X_vec = tfidf.fit_transform(X)

# Split data into train and test sets (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)


In [41]:
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB()
model.fit(X_train, y_train)


In [42]:

!pip install gradio --quiet


In [43]:
import gradio as gr

def predict_news(text):
    cleaned = clean_text(text)
    vector = tfidf.transform([cleaned])
    prediction = model.predict(vector)[0]
    return "🔴 FAKE News" if prediction == 0 else "🟢 REAL News"


In [44]:
interface = gr.Interface(
    fn=predict_news,
    inputs=gr.Textbox(lines=6, placeholder="Paste news content or headline here..."),
    outputs="text",
    title="📰 Fake News Detector",
    description="Enter a news article or headline. This model will predict if it's real or fake."
)

interface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://094488495f601d550b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


