In [None]:
import numpy as np
import pandas as pd
import string
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = stopwords.words('english')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
import kagglehub

path = kagglehub.dataset_download("clmentbisaillon/fake-and-real-news-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/fake-and-real-news-dataset


In [None]:
fake_df = pd.read_csv((path + "/Fake.csv"))
true_df = pd.read_csv((path + "/True.csv"))
fake_df['label'] = 0
true_df['label'] = 1

In [None]:
df = pd.concat([fake_df, true_df], axis=0).reset_index(drop=True)
print("Datasets loaded and combined successfully!")
print("First 5 rows of the combined data:")
print(df.head())
print("\nLast 5 rows of the combined data:")
print(df.tail())

Datasets loaded and combined successfully!
First 5 rows of the combined data:
                                               title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  Donald Trump just couldn t wish all Americans ...    News   
1  House Intelligence Committee Chairman Devin Nu...    News   
2  On Friday, it was revealed that former Milwauk...    News   
3  On Christmas day, Donald Trump announced that ...    News   
4  Pope Francis used his annual Christmas Day mes...    News   

                date  label  
0  December 31, 2017      0  
1  December 31, 2017      0  
2  December 30, 2017      0  
3  December 29, 2017      0  
4  December 25, 2017      0  

Last 5 rows of the 

In [None]:
def preprocess(text):
  text = text.lower()
  text = "".join([char for char in text if char not in string.punctuation])
  tokens = nltk.word_tokenize(text)
  tokens = [word for word in tokens if word not in stop_words]
  return " ".join(tokens)

df['cleaned'] = df['text'].apply(preprocess)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

X = df['cleaned']
y = df['label']

vectorizer = TfidfVectorizer(max_features=5000)
X_vec = vectorizer.fit_transform(X)

print("Shape of the vectorized data (X_vec):", X_vec.shape)

Shape of the vectorized data (X_vec): (44898, 5000)


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_vec, y, test_size=0.2, random_state=42
)

print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)

Shape of X_train: (35918, 5000)
Shape of X_test: (8980, 5000)


In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

model.fit(X_train, y_train)
print("Model training complete!")

Model training complete!


In [None]:
y_pred  = model.predict(X_test)
from sklearn.metrics import accuracy_score, classification_report
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Fake (0)', 'Real (1)']))


Model Accuracy: 98.70%

Classification Report:
              precision    recall  f1-score   support

    Fake (0)       0.99      0.99      0.99      4733
    Real (1)       0.98      0.99      0.99      4247

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [None]:
# general testing
news = "NASA gets cancelled by USA"
clean_text = preprocess(news)
vec_text = vectorizer.transform([clean_text])
prediction = model.predict(vec_text)
print("Prediction:", prediction)

if prediction ==0 :
  print("Fake News")
else:
  print("Real News")

Prediction: [0]
Fake News


In [None]:
import gradio as gr


def predict_news(text):
    cleaned = preprocess(text)
    vectorized = vectorizer.transform([cleaned])
    pred = model.predict(vectorized)[0]
    return 'Real News' if pred == 1 else 'Fake News'

# Creating the Gradio Interface
print("\nLaunching the Gradio demo app...")
demo = gr.Interface(fn=predict_news,
                    inputs='text',
                    outputs='text',
                    title='📰 Fake News Detector',
                    description='Paste a news headline or article and check if it\'s real or fake.')
demo.launch()


Launching the Gradio demo app...
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://18b69bcb092a554510.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


