# **MINI PROJECT01:-**

> Add blockquote



Fake News Detection Using Machine Learning & NLP

In [None]:
pip install pandas numpy scikit-learn nltk matplotlib seaborn



In [None]:
import pandas as pd

# Load datasets
fake = pd.read_csv('Fake.csv')
real = pd.read_csv('True.csv')

# Add labels
fake['label'] = 0  # Fake news
real['label'] = 1  # Real news

# Combine datasets
data = pd.concat([fake, real])
data = data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Explore
print(data.head())
print(data.shape)
print(data['label'].value_counts())


                                               title  \
0  DEADBEATS BEWARE: Trump‚Äôs Food Stamp Reform Is...   
1   Ammon Bundy‚Äôs Own ‚ÄòSafety‚Äô Committee Tells Hi...   
2  FLORIDA DOCTOR Under Fire After Running TV Ad ...   
3  Assad aide says Syria will fight any force, in...   
4  JUST IN: ‚ÄúPit Bull‚Äù Attorney For Special Couns...   

                                                text          subject  \
0  All we re hearing from the left is fear monger...  Government News   
1  If anything should tell the militia terrorists...             News   
2  As a woman, I have to say I agree with this do...         politics   
3  BEIRUT (Reuters) - A top aide to President Bas...        worldnews   
4  Is there a single person left on Robert Muelle...         politics   

                  date  label  
0         May 26, 2017      0  
1      January 9, 2016      0  
2          Oct 3, 2016      0  
3  September 15, 2017       1  
4          Dec 8, 2017      0  
(44898, 5)
label
0  

In [None]:
print(data.isnull().sum())
data = data.dropna()  # Drop rows with missing values

title      0
text       0
subject    0
date       0
label      0
dtype: int64


In [None]:
import nltk
from nltk.corpus import stopwords
import string

nltk.download('stopwords')
stop_words = stopwords.words('english')

def preprocess(text):
    text = text.lower()  # lowercase
    text = ''.join([c for c in text if c not in string.punctuation])  # remove punctuation
    text = ' '.join([word for word in text.split() if word not in stop_words])  # remove stopwords
    return text

data['clean_text'] = data['text'].apply(preprocess)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Split Data

Goal: Separate data into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split

X = data['clean_text']
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Convert Text to Numbers

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_df=0.7, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


Train the Model
Use Logistic Regression (simple & effective for text classification).

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train_tfidf, y_train)


Make Predictions & Evaluate

In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test_tfidf)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.9854120267260579
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4697
           1       0.98      0.98      0.98      4283

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



Save Model
For reuse or deployment:

In [None]:
import pickle

# Save model
pickle.dump(model, open('fake_news_model.pkl', 'wb'))
# Save vectorizer
pickle.dump(vectorizer, open('tfidf_vectorizer.pkl', 'wb'))

# Load
# model = pickle.load(open('fake_news_model.pkl', 'rb'))
# vectorizer = pickle.load(open('tfidf_vectorizer.pkl', 'rb'))


Deployment

In [None]:
!pip install streamlit


Collecting streamlit
  Downloading streamlit-1.49.1-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.49.1-py3-none-any.whl (10.0 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m10.0/10.0 MB[0m [31m62.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.9/6.9 MB[0m [31m92.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.49.1


In [None]:
import streamlit as st
import pickle
import re

# Load model and vectorizer
model = pickle.load(open("fake_news_model.pkl", "rb"))
vectorizer = pickle.load(open("tfidf_vectorizer.pkl", "rb"))

# Text preprocessing
def preprocess(text):
    text = text.lower()
    text = re.sub(r'\W', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    return text

# Streamlit app
st.set_page_config(page_title="Fake News Detector", layout="centered")

st.title("üì∞ Fake News Detection App")
st.markdown("Detect whether the news you entered is **Real** or **Fake** using AI!")

user_input = st.text_area("Enter news content below üëá", height=200)

if st.button("Predict"):
    if user_input.strip() == "":
        st.warning("‚ö†Ô∏è Please enter some news text.")
    else:
        clean_text = preprocess(user_input)
        vectorized_input = vectorizer.transform([clean_text])
        prediction = model.predict(vectorized_input)[0]

        if prediction == 1:
            st.success("‚úÖ This looks like *Real News*!")
        else:
            st.error("üö® This might be *Fake News*!")

st.markdown("---")
st.markdown("Made with ‚ù§Ô∏è using Streamlit and Machine Learning")


2025-09-17 18:59:10.212 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-09-17 18:59:10.223 Session state does not function when running a script without `streamlit run`


DeltaGenerator()