<a href="https://colab.research.google.com/github/insominiac21/Fake-News-Detector/blob/main/fake_news_detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# Install required libraries
!pip install streamlit requests beautifulsoup4 tensorflow pillow spacy validators
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m55.7 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [17]:
# Write the Streamlit app code to a file
app_code = """import streamlit as st
import requests
import os
import numpy as np
from bs4 import BeautifulSoup
from tensorflow.keras.models import load_model
from PIL import Image
import io
import base64
import spacy
import validators

def scrape_website(url):
    Scrape the given news website for text and images.
    response = requests.get(url)
    if response.status_code != 200:
        return None, None

    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract text content
    paragraphs = soup.find_all('p')
    text_content = ' '.join([p.get_text() for p in paragraphs])

    # Extract image URLs, excluding data URLs
    images = [img['src'] for img in soup.find_all('img') if 'src' in img.attrs and not img['src'].startswith('data:')]
    return text_content, images

def check_text_fact(text, api_key):

    endpoint = "https://factchecktools.googleapis.com/v1alpha1/claims:search"
    params = {
        "query": text,
        "key": api_key
    }
    response = requests.get(endpoint, params=params)
    if response.status_code != 200:
        return "Error accessing Fact Check API", None

    data = response.json()

    # Debugging: Log the query and response
    print("Query Sent to API:", text)
    print("API Response:", data)

    if 'claims' in data and len(data['claims']) > 0:
        claim = data['claims'][0]
        claim_review = claim.get('claimReview', [{}])[0]
        textual_rating = claim_review.get('textualRating', 'Unknown')
        review_text = claim_review.get('title', 'No additional details available')
        return textual_rating, review_text
    return "No fact-check available", None

def check_image_deepfake(image_url, model)

    response = requests.get(image_url, stream=True)
    if response.status_code != 200:
        return "Error fetching image"

    try:
        # Convert image to RGB format
        img = Image.open(io.BytesIO(response.content))
        img = img.convert('RGB')
        img = img.resize((128, 128))
        img_array = np.array(img) / 255.0
        img_array = np.expand_dims(img_array, axis=0)
        prediction = model.predict(img_array)
        return "Deepfake" if prediction[0][0] > 0.5 else "Real"
    except Exception:
        return "Invalid Image"

# Streamlit UI
st.title("Fake News Detector")
st.write("Enter a news article URL to check its authenticity.")

url = st.text_input("Enter News URL:")
apikey ="Your-api-key"

if st.button("Check News"):
    if url and apikey:
        # Validate the URL
        if not validators.url(url) or not (url.startswith("http://") or url.startswith("https://")):
            st.error("Invalid URL. Please enter a valid HTTP or HTTPS URL.")
        else:
            st.write("Scraping the website...")
            text, images = scrape_website(url)

            text_flag = False  # Initialize text_flag with a default value

            if text:
                st.subheader("Extracted Text")
                st.write(text[:500] + "...")

                # Extract key sentences for fact-checking
                try:
                    nlp = spacy.load('en_core_web_sm')
                    doc = nlp(text)
                    key_claims = [ent.text for ent in doc.ents if ent.label_ in ['ORG', 'PERSON', 'EVENT']]
                    key_sentences = key_claims[0] if key_claims else ' '.join(text.split('.')[:3])
                except:
                    key_sentences = ' '.join(text.split('.')[:3])  # Extract first 3 sentences

                st.write("Checking text authenticity...")
                text_result, review_details = check_text_fact(key_sentences, apikey)
                first_word = text_result.split()[0].rstrip('.') if text_result else ""  # Remove trailing period
                third_word = review_details.split()[2].rstrip('.') if review_details and len(review_details.split()) > 2 else ""  # Get third word

                if first_word == "No" or third_word == "No":
                    st.write("Could not run text review.")
                    st.write("Reason: The webpage has not been reviewed by Google Claim Review yet.")
                    text_flag = None

                elif first_word or third_word in {"Half true", "False", "Mostly", "Misrepresentation", "Pants", "Fake", "Incorrect", "Misleading", "No", "Out", "Unfounded", "Exaggerated", "Debunked"} or third_word in {"Half true", "False", "Mostly", "Misrepresentation", "Pants", "Fake", "Incorrect", "Misleading", "No", "Out", "Unfounded", "Exaggerated", "Debunked"}:
                    st.write("🚨 This news might be FAKE!")
                    st.write("Fact Check Result: ", text_result)
                    st.write("\n", review_details)
                    text_flag = True
                elif first_word == "Not":  # Not Transcript
                    st.write("Fact Check Result: Independent assessment provided")
                    st.write("\n", review_details)
                    text_flag = False
                else:
                    st.write("Fact Check Result: ", text_result)
                    if review_details:
                        st.write("Supporting Evidence: ", review_details)
                    text_flag = False
            else:
                st.write("No text found on the page.")
                text_result = "Unknown"
                review_details = None
                text_flag = False  # Ensure text_flag is set even if no text is found

            if images:
                st.subheader("Extracted Images")
                model = load_model("deepfake_model.h5")
                deepfake_results = {}

                for img_url in images[:3]:  # Limit to 3 images for performance
                    result = check_image_deepfake(img_url, model)
                    deepfake_results[img_url] = result
                    st.image(img_url, caption=result, use_column_width=True)

                # Calculate fake score
                fake_score = sum(1 for v in deepfake_results.values() if v == "Deepfake") / max(len(deepfake_results), 1)
            else:
                st.write("No images found.")
                fake_score = 0

            # Final Verdict Logic
            st.subheader("Final Verdict")
            st.write("Combining text and image analysis...")
            # Adjust confidence calculation to prioritize text_flag
            if text_flag is True:
                combined_confidence = max(fake_score, 0.7)  # At least 70% if text is flagged as fake
            elif text_flag is None:
                combined_confidence = fake_score  # Use only fake_score if no fact-check is available
            else:
                combined_confidence = fake_score * 0.5  # Reduce weight of fake_score if text is real

            # Display final verdict
            if text_flag is True and combined_confidence > 0.5:
                st.error(f"🚨 This news might be FAKE! Confidence: {combined_confidence * 100:.2f}%")
            elif text_flag is None and combined_confidence > 0.5:
                st.warning(f"⚠️ This news might be PARTIALLY FAKE. Confidence: {combined_confidence * 100:.2f}%")
            elif text_flag is False or combined_confidence <= 0.5:
                st.success(f"✅ This news appears REAL. Confidence: {(1 - combined_confidence) * 100:.2f}%")
    else:
        st.warning("Please enter a valid URL and API Key.")"""

with open("app.py", "w") as f:
    f.write(app_code)

In [None]:
# Run the Streamlit app
!streamlit run app.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.75.0.197:8501[0m
[0m
