# **LOADING DATASET**

In [8]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import zipfile


zip_path = '/content/drive/MyDrive/fake-and-real-news-dataset/fake-and-real-news-dataset.zip'

# Extraction path
extract_to = '/content/fake_news_data/'

# Unzipping
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)


In [None]:
import pandas as pd

# Read CSVs after extraction
true_df = pd.read_csv('/content/fake_news_data/True.csv')
fake_df = pd.read_csv('/content/fake_news_data/Fake.csv')


In [None]:
# Add labels
true_df['label'] = 1
fake_df['label'] = 0

# Combine datasets
df = pd.concat([true_df, fake_df], ignore_index=True)

# Preview
df.head()


Unnamed: 0,title,text,subject,date,label
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


# **NLP LIBRARIES**

In [None]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer


nltk.download('stopwords')
nltk.download('wordnet')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

# **PREPROCESSING**

In [None]:
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = text.split()
    tokens = [w for w in tokens if w not in stop_words]
    tokens = [lemmatizer.lemmatize(stemmer.stem(w)) for w in tokens]
    return " ".join(tokens)

# Apply on the combined dataframe
df['text'] = df['text'].apply(preprocess)


# **TEXT TO FEATURES**

In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['text'])
y = df['label']


# **TRAINING NAIVE BAYES MODEL**





In [10]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = MultinomialNB()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.9356347438752785
              precision    recall  f1-score   support

           0       0.94      0.94      0.94      4650
           1       0.94      0.93      0.93      4330

    accuracy                           0.94      8980
   macro avg       0.94      0.94      0.94      8980
weighted avg       0.94      0.94      0.94      8980



In [11]:
import joblib

# Save locally
joblib.dump(model, '/content/fake_news_model.pkl')
joblib.dump(vectorizer, '/content/vectorizer.pkl')

# Copy to Drive
!cp /content/fake_news_model.pkl /content/drive/MyDrive/fake-and-real-news-dataset/
!cp /content/vectorizer.pkl /content/drive/MyDrive/fake-and-real-news-dataset/


In [12]:
def predict_news(text):
    text = preprocess(text)
    text_vector = vectorizer.transform([text])
    prediction = model.predict(text_vector)[0]
    return "Real News" if prediction == 1 else "Fake News"

# Example:
predict_news("The prime minister held a conference to discuss economic growth...")


'Real News'

# **WEB APP**

In [13]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
model_path = '/content/drive/My Drive/fake-and-real-news-dataset/fake_news_model.pkl'
model = joblib.load(model_path)


In [15]:
!ls


drive  fake_news_data  fake_news_model.pkl  sample_data  vectorizer.pkl


In [None]:
import joblib


model = joblib.load('fake_news_model.pkl')
vectorizer = joblib.load('vectorizer.pkl')


In [16]:
!pip install streamlit pyngrok

Collecting streamlit
  Downloading streamlit-1.44.1-py3-none-any.whl.metadata (8.9 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.4-py3-none-any.whl.metadata (8.7 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.44.1-py3-none-any.whl (9.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m66.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.4-py3-none-any.whl (23 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m111.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (7

In [17]:
code = """
import streamlit as st
import joblib
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import string

nltk.download('stopwords')

# Load model + vectorizer
model      = joblib.load("/content/drive/MyDrive/fake-and-real-news-dataset/fake_news_model.pkl")
vectorizer = joblib.load("/content/drive/MyDrive/fake-and-real-news-dataset/vectorizer.pkl")

stemmer = PorterStemmer()
def preprocess_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\d+', '', text)
    tokens = text.split()
    tokens = [stemmer.stem(w) for w in tokens if w not in stopwords.words('english')]
    return ' '.join(tokens)

st.set_page_config(page_title="Fake News Detector", layout="centered")
st.title("Fake News Detection App")
news_input = st.text_area("Paste your news article here:", height=250)

if st.button("Check News"):
    if not news_input.strip():
        st.warning("Please enter a news article.")
    else:
        processed = preprocess_text(news_input)
        vect      = vectorizer.transform([processed])
        pred      = model.predict(vect)[0]
        if pred == 0:
            st.error("This news article is likely **Fake**.")
        else:
            st.success("This news article appears to be **Real**.")
"""
with open("streamlit_app.py", "w") as f:
    f.write(code)

In [18]:
!ngrok config add-authtoken 2vzIoU44gSiCWMfB58bGCz6Tdoj_5nHhSiPnpKhHccMNH4e8A

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [19]:
from pyngrok import ngrok
import os

# Kill any previous Streamlit or ngrok processes
os.system("pkill streamlit")

# Start the Streamlit app in the background
os.system("streamlit run streamlit_app.py &")

# Connect to localhost:8501 using ngrok
public_url = ngrok.connect("http://localhost:8501", bind_tls=True)
print("Streamlit App URL:", public_url)


Streamlit App URL: NgrokTunnel: "https://31a3-34-80-207-47.ngrok-free.app" -> "http://localhost:8501"
