<a href="https://colab.research.google.com/github/ciao-baby/roy_amato/blob/main/roy_amato_revised_deliverable_1_project_1_university_id_u01862158.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ============================================
# URL Credibility Scoring Prototype
# --------------------------------------------
# Demonstrates core functionality:
# 1. Accept URLs as input
# 2. Fetch and process webpage data
# 3. Generate a basic credibility score
# 4. Display results in a simple format
# ============================================

import requests
from bs4 import BeautifulSoup
import tldextract

def get_credibility_score(url):
    score = 50  # Start with neutral base score
    info = {}

    try:
        response = requests.get(url, timeout=5)
        info["status_code"] = response.status_code

        # --- HTTPS Check ---
        if url.startswith("https://"):
            score += 10
            info["https"] = "Yes"
        else:
            info["https"] = "No"

        # --- Domain Analysis ---
        domain_info = tldextract.extract(url)
        domain = f"{domain_info.domain}.{domain_info.suffix}"
        info["domain"] = domain

        if domain.endswith((".edu", ".gov")):
            score += 20
        elif domain.endswith((".org", ".com")):
            score += 5
        elif domain.endswith((".biz", ".info", ".xyz")):
            score -= 15

        # --- Content Analysis ---
        soup = BeautifulSoup(response.text, "html.parser")
        title = soup.title.string if soup.title else "No title found"
        info["title"] = title.strip()[:80]  # Truncate for readability

        text = soup.get_text(separator=" ", strip=True)
        word_count = len(text.split())
        info["word_count"] = word_count

        if word_count < 200:
            score -= 10
        elif word_count > 1000:
            score += 5

        # --- Clamp Score & Label ---
        score = max(0, min(100, score))

        if score < 40:
            label = "Low"
        elif score < 70:
            label = "Medium"
        else:
            label = "High"

        # --- Return Results ---
        return {
            "url": url,
            "domain": info["domain"],
            "https": info["https"],
            "title": info["title"],
            "word_count": info["word_count"],
            "score": score,
            "label": label
        }

    except Exception as e:
        return {
            "url": url,
            "error": str(e),
            "score": 0,
            "label": "Unavailable"
        }

# --- Main Program ---
if __name__ == "__main__":
    print("üîç URL Credibility Scoring Prototype")
    print("Enter URLs separated by spaces:")
    urls = input("> ").split()

    print("\n=== Results ===\n")
    for url in urls:
        result = get_credibility_score(url)
        if "error" in result:
            print(f"URL: {result['url']}")
            print(f"  ‚ùå Error: {result['error']}\n")
        else:
            print(f"URL: {result['url']}")
            print(f"  Domain: {result['domain']}")
            print(f"  HTTPS: {result['https']}")
            print(f"  Title: {result['title']}")
            print(f"  Word Count: {result['word_count']}")
            print(f"  Credibility Score: {result['score']} ({result['label']})\n")
