## 1. Materiality Score Formatted

In [None]:
import json

items = _input.all()
results = []

for item in items:
    try:
        output_data = item.get("json", {}).get("output", [])
        if not output_data:
            continue

        raw_text = output_data[0]["content"][0]["text"]
        raw = json.loads(raw_text)

        meta = raw.get("meta", {})
        title = meta.get("title")
        keyword = meta.get("keyword")
        url = meta.get("url")
        pickup_count = meta.get("pickup_count", 0)
        content = raw.get("content")
        scores = raw.get("scores", {})

        sev = float(scores.get("event_severity", {}).get("score", 0))
        prox = float(scores.get("market_proximity", {}).get("score", 0))
        fwd = float(scores.get("forward_impact", {}).get("score", 0))

        materiality_score = round((sev + prox + fwd) / 12.0, 4)

        sentiment_obj = scores.get("sentiment_score", {})
        senti_score = float(sentiment_obj.get("score", 0))

        entities = raw.get("entities", [])
        if not isinstance(entities, list):
            entities = []

        out_json = {
            "title": title,
            "keyword": keyword,
            "url": url,
            "content": content,
            "pickup_count": pickup_count,
            "materiality_score": materiality_score,
            "event_severity_score": sev,
            "market_proximity_score": prox,
            "forward_impact_score": fwd,
            "sentiment_score": senti_score,
            "mentioned_entities": entities
        }

        results.append({"json": out_json})

    except Exception as e:
        results.append({
            "json": {
                "error": str(e),
                "raw_input": str(item)
            }
        })

return results


## 2. Quality Score 1 (Source Credibility)


In [None]:
from urllib.parse import urlparse

# --- Media Trust List Configuration ---
source_map = {
    # [Tier 0] Institutional & Academic Authority (0.99)
    "gov": 0.99, "nature.com": 0.99, "science.org": 0.99,
    "worldbank.org": 0.99, "imf.org": 0.99, "iea.org": 0.99,
    "who.int": 0.99, "un.org": 0.99, "bis.org": 0.99,

    # [Tier 1] Global Financial Core (0.98)
    "reuters.com": 0.98, "bloomberg.com": 0.98,
    "wsj.com": 0.98, "ft.com": 0.98,
    "nikkei.com": 0.97, "investors.com": 0.96,

    # [Tier 2] Industry Verticals (0.92 - 0.95)
    "mining.com": 0.95,       "oilprice.com": 0.94,
    "techcrunch.com": 0.95,   "theinformation.com": 0.94,
    "barrons.com": 0.94,      "spglobal.com": 0.93,
    "utilitydive.com": 0.92,  "argusmedia.com": 0.92,
    "pv-magazine.com": 0.92,  "hydrocarbonprocessing.com": 0.91,

    # [Tier 3] High-Quality General News (0.88 - 0.90)
    "nytimes.com": 0.90,      "economist.com": 0.90,
    "apnews.com": 0.90,       "bbc.com": 0.89,
    "washingtonpost.com": 0.88, "axios.com": 0.85,
    "time.com": 0.85,         "forbes.com": 0.85,

    # [Tier 4] Aggregators & Tier 2 (0.80 - 0.85)
    "cnbc.com": 0.85,         "marketwatch.com": 0.82,
    "finance.yahoo.com": 0.80, "businessinsider.com": 0.78,
    "seekingalpha.com": 0.75,

    # [Tier 5] Low Credibility / Cautionary (< 0.6)
    "dailymail.co.uk": 0.5,   "nypost.com": 0.5,
    "twitter.com": 0.2,       "reddit.com": 0.2,
    "zerohedge.com": 0.3,     "thestreet.com": 0.4,
    "benzinga.com": 0.4,      "medium.com": 0.3
}

def get_source_score(url):
    if not url: return 0.5
    try:
        # Extract clean domain
        domain = urlparse(url).netloc.lower().replace("www.", "")

        # 1. Dynamic Rule: Gov sites
        if domain.endswith(".gov"): return 0.99

        # 2. Dictionary Match (Longest match first)
        for key in sorted(source_map.keys(), key=len, reverse=True):
            if key in domain:
                return source_map[key]

        # 3. Default for unknown sources
        # Set to 0.5 for strict filtering in dual-stream architecture
        return 0.5
    except:
        return 0.5

# --- Execution Logic ---
items = _input.all()
results = []

for item in items:
    art = item.get("json", item)
    url = art.get("url", "")

    # Calculate score
    score = get_source_score(url)

    # Append score to JSON
    art["source_credibility"] = score

    results.append({"json": art})

return results

## 3. Quality Score 2 (Weighted)

In [None]:
# ==========================================
# Node 2: Weighted Final Scoring (Balanced)
# Formula: Final = (Cred * 0.40) + (Topic * 0.25) + (Pickup * 0.15) + (Search * 0.20)
# ==========================================

# --- Weight Configuration ---
W_CREDIBILITY = 0.40  # Reduced slightly to allow relevance to matter more
W_TOPIC       = 0.25  # Strategic priority
W_PICKUP      = 0.15  # Market Buzz
W_SEARCH      = 0.20  # INCREASED: Penalizes off-topic articles from big media

# --- Pickup Normalization ---
MAX_PICKUP_CAP = 3.0

items = _input.all()
results = []

for item in items:
    art = item.get("json", item)

    # 1. Get Metrics (Strict inputs)

    # A. Source Credibility
    c_score = float(art.get("source_credibility", 0.5))

    # B. Topic Importance
    t_weight = float(art.get("weight", 0.8))
    e_imp = float(art.get("expansion_importance", 1.0))
    t_score = t_weight * e_imp

    # C. Pickup Score
    raw_pickup = float(art.get("pickup_count", 0))
    p_score = min(raw_pickup / MAX_PICKUP_CAP, 1.0)

    # D. Search Relevance
    s_score = float(art.get("tavily_score", 0.6))

    # 2. Calculate Final Weighted Score
    final_score = (c_score * W_CREDIBILITY) + \
                  (t_score * W_TOPIC) + \
                  (p_score * W_PICKUP) + \
                  (s_score * W_SEARCH)

    # 3. Normalize and Round
    final_score = round(min(final_score, 1.0), 4)

    # Save derived metrics
    art["qual_score"] = final_score

    # ==========================================
    # 4. Reorder Output Columns (Strict Order)
    # ==========================================

    target_order = [
        "keyword",
        "weight",
        "expansion_importance",
        "qual_score",
        "title",
        "url",
        "content",
        "source_credibility",
        "tavily_score",
        "pickup_count"
    ]

    ordered_art = {}

    # Ensure keyword exists
    if "keyword" not in art:
        art["keyword"] = art.get("query") or art.get("seed") or "unknown"

    # Insert keys in target order
    for key in target_order:
        if key in art:
            ordered_art[key] = art[key]

    # Append remaining keys
    for key, val in art.items():
        if key not in ordered_art:
            ordered_art[key] = val

    results.append({"json": ordered_art})

return results

## 4. News Score Final

In [None]:
import statistics

items = _input.all()

# Collect scores
m_vals = [i["json"].get("materiality_score") for i in items if i["json"].get("materiality_score") is not None]
q_vals = [i["json"].get("qual_score") for i in items if i["json"].get("qual_score") is not None]

m_median = statistics.median(m_vals) if m_vals else 0.5
q_median = statistics.median(q_vals) if q_vals else 0.5

results = []

for item in items:
    original = item.get("json", {})

    # Copy only known fields to avoid mutation side-effects
    j = dict(original)  # shallow copy

    m_score = j.get("materiality_score", m_median)
    q_score = j.get("qual_score", q_median)

    final_score = float(m_score) * 0.6 + float(q_score) * 0.4

    j["final_score_100"] = round(final_score * 100, 2)

    # preserve everything else including 'content'
    results.append({"json": j})

return results


## 5. Group By Keywords

In [None]:
items = _input.all()
groups = {}

for item in items:
    original_json = item.get("json", {})


    content_check = original_json.get("content")
    if not content_check:
        original_json["content"] = "[WARNING: No Content Found]"


    art = dict(original_json)


    keyword = (art.get("keyword") or "unknown").strip().lower()
    groups.setdefault(keyword, []).append(art)

output_items = []

for keyword, article_list in groups.items():
    #article_list.sort(key=lambda x: x.get("final_score_100", 0), reverse=True)

    output_items.append({
        "json": {
            "keyword": keyword,
            "count": len(article_list),
            "articles": article_list
        }
    })

return output_items