In [2]:
import requests
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
import json
import pandas as pd
import matplotlib.pyplot as plt

# Load Pretrained Sentence Transformer Model
similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Load Sentiment Analysis Model for Bias Detection
sentiment_pipeline = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")


# Function to evaluate the credibility of a URL
def evaluate_credibility(user_query: str, url: str) -> dict:

    credibility_score = 0.0
    explanation = "No valid evaluation."

    try:
        # === Step 1: Fetch Page Content ===
        response = requests.get(url, timeout=10)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            page_text = " ".join([p.text for p in soup.find_all("p")])  # Extract main text content

            # === Step 2: Domain Authority Check ===
            if any(ext in url for ext in [".gov", ".edu", ".org"]):
                credibility_score += 0.3
                explanation = "Trusted domain detected."

            # === Step 3: Content Relevance (Semantic Similarity) ===
            similarity_score = util.pytorch_cos_sim(
                similarity_model.encode(user_query), similarity_model.encode(page_text)
            ).item() * 100

            # === Step 4: Fact-Checking Validation ===
            fact_check_score = check_facts(page_text)

            # === Step 5: Bias Detection (Sentiment Analysis) ===
            sentiment_result = sentiment_pipeline(page_text[:512])[0]  # Analyze first 512 characters
            bias_score = 100 if sentiment_result["label"] == "POSITIVE" else 50 if sentiment_result["label"] == "NEUTRAL" else 30

            # === Step 6: Citation Count Check ===
            citation_count = check_google_scholar(url)
            citation_score = min(citation_count * 10, 100)  # Normalize

            # === Step 7: Compute Final Credibility Score ===
            final_score = (
                (0.3 * credibility_score) +
                (0.3 * similarity_score) +
                (0.2 * fact_check_score) +
                (0.1 * bias_score) +
                (0.1 * citation_score)
            )

            # Round Score to 2 Decimal Places
            final_score = round(min(100.0, final_score), 2)

            explanation += f" Content Relevance: {similarity_score:.2f}. Fact-Check: {fact_check_score}. Bias Score: {bias_score}. Citation Score: {citation_score}."

        else:
            explanation = "Could not retrieve webpage."

    except Exception as e:
        explanation = f"Error fetching URL: {str(e)}"

    return {
        "Final Credibility Score": final_score,
        "Explanation": explanation
    }


# === Helper Function: Fact-Checking ===
def check_facts(text: str) -> int:
    """
    Cross-checks text against Google Fact Check API.
    Returns a score between 0-100 indicating factual reliability.
    """
    api_url = f"https://toolbox.google.com/factcheck/api/v1/claimsearch?query={text[:200]}"
    try:
        response = requests.get(api_url)
        data = response.json()
        if "claims" in data and data["claims"]:
            return 80  # If found in fact-checking database
        return 40  # No verification found
    except:
        return 50  # Default uncertainty score


# === Helper Function: Citation Count Check ===
def check_google_scholar(url: str) -> int:
    """
    Checks Google Scholar citations using SerpAPI.
    Returns the count of citations found.
    """
    serpapi_key = "YOUR_KEY_HERE"
    params = {"q": url, "engine": "google_scholar", "api_key": serpapi_key}
    try:
        response = requests.get("https://serpapi.com/search", params=params)
        data = response.json()
        return len(data.get("organic_results", []))
    except:
        return 0  # Assume no citations found


# === Example Test ===
user_prompt = "What are the latest updates on global politics?"
test_url = "https://www.bbc.com/news"

result = evaluate_credibility(user_prompt, test_url)
print(json.dumps(result, indent=2))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Device set to use cpu


{
  "Final Credibility Score": 30.21,
  "Explanation": "No valid evaluation. Content Relevance: 57.37. Fact-Check: 50. Bias Score: 30. Citation Score: 0."
}
