In [1]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [2]:
# Initialize lemmatizer and stopwords
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

# Preprocess function
def preprocess_text(text):
    text = re.sub(r'[^a-zA-Z\s]', '', str(text), re.I)  # Remove punctuation/numbers
    text = text.lower()
    words = text.split()
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    return ' '.join(words)

In [3]:
# Load datasets
fake_df = pd.read_csv('Fake.csv')  # Update path if needed
true_df = pd.read_csv('True.csv')  # Update path if needed

# Add labels: 0 = fake, 1 = true
fake_df['label'] = 0
true_df['label'] = 1

In [4]:
# Preprocess the text
fake_df['cleaned_text'] = fake_df['text'].apply(preprocess_text)
true_df['cleaned_text'] = true_df['text'].apply(preprocess_text)

# Combine datasets
data = pd.concat([fake_df[['cleaned_text', 'label']], true_df[['cleaned_text', 'label']]], ignore_index=True)

In [5]:
# Convert text to numerical features
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(data['cleaned_text'])
y = data['label']

# Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Train Naive Bayes model
model = MultinomialNB()
model.fit(X_train, y_train)

In [7]:
# Predict and evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.98125
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.98      0.98       415
           1       0.97      0.99      0.98       385

    accuracy                           0.98       800
   macro avg       0.98      0.98      0.98       800
weighted avg       0.98      0.98      0.98       800



In [8]:
# Save model and vectorizer
joblib.dump(model, 'fake_news_model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')

['vectorizer.pkl']

In [9]:
# Load model and vectorizer
model = joblib.load('fake_news_model.pkl')
vectorizer = joblib.load('vectorizer.pkl')

# Function to predict new article
def predict_news(news_text):
    cleaned = preprocess_text(news_text)
    vectorized = vectorizer.transform([cleaned])
    prediction = model.predict(vectorized)[0]
    return "True News" if prediction == 1 else "Fake News"

# Example usage
new_text = "The government has announced new economic reforms today."
print(predict_news(new_text))


True News


In [10]:
import joblib

# Assuming your model is named 'model'
joblib.dump(model, 'fake_news_model.pkl')


['fake_news_model.pkl']

In [11]:
joblib.dump(vectorizer, 'vectorizer.pkl')


['vectorizer.pkl']

In [12]:
import gradio as gr
import joblib

# Load model and vectorizer
model = joblib.load("fake_news_model.pkl")
vectorizer = joblib.load("vectorizer.pkl")

# Define prediction function
def predict_news(news_text):
    news_vector = vectorizer.transform([news_text])
    prediction = model.predict(news_vector)
    return "Fake News" if prediction[0] == 1 else "Real News"

# Create UI
interface = gr.Interface(
    fn=predict_news,
    inputs=gr.Textbox(lines=10, label="Enter News Content"),
    outputs=gr.Label(label="Prediction"),
    title="Fake News Detector",
    description="Enter a news article to check if it's real or fake."
)

# Launch
interface.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2bed6909b3659bb226.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


