In [34]:
import requests
import pandas as pd
import time
import json
import os

# ========== CONFIG ==========
LLM_ENDPOINT = "http://localhost:11434/api/chat"
LLM_MODEL_NAME = "llama3:70b"
TEMP_OUTPUT_PATH = "annotated_temp_output.csv"
FINAL_OUTPUT_PATH = "news_sample_annotated.csv"
SLEEP_BETWEEN_REQUESTS = 1

# ========== FRAMES & PROMPTS ==========
FRAME_ORDER = [
    "Foreign influence threat",
    "Systemic institutional corruption",
    "Elite collusion",
    "Politicized investigations",
    "Authoritarian reformism",
    "Judicial and institutional accountability failures",
    "Mobilizing anti-corruption"
]

FRAME_PROMPT = """
You are an annotation assistant helping a human coder identify which corruption narrative frames are present in a news article. An article may contain multiple frames or none.

### Frame Definitions:

1. **Foreign influence threat**: Identify any sentences or phrases that frame political corruption as an external attack by foreign actors. Look for references to external meddling, covert financing from abroad, secret deals with foreign entities, propaganda or undue influence by external powers.

2. **Systemic institutional corruption**: Find any passages that describe corruption as a deep‐rooted, system‐wide problem—built into institutions, laws, or culture—and not just one‐off wrongdoing. Look for terms like “endemic,” “deep-rooted,” “fragile institutions,” or metaphors like “weed” or “cancer.”

3. **Elite collusion**: Mark any passages that describe secretive alliances among powerful elites (e.g., businessmen and politicians)—such as backroom deals, undisclosed financing, or informal networks rigging policy in favor of those already in power.

4. **Politicized investigations**: Identify any passages that either:
   1. Depict corruption investigations as partisan tools or “witch hunts” (look for claims of bias, factional motives, selective enforcement),  
   OR  
   2. Show accused politicians publicly denying the allegations—portraying themselves as fair, law‐abiding citizens and claiming the probe is politically motivated.

5. **Authoritarian reformism**: Identify any passages that either:
   1. Describe reforms or institutional changes used to consolidate power, weaken democratic checks and balances, or target political opponents,  
   OR  
   2. Show politicians accusing other politicians or institutions of corruption as a campaign strategy or to gain electoral advantage.

6. **Judicial and institutional accountability failures**: Identify any passages that either:
   1. Describe how legal frameworks are manipulated (e.g., ambiguous laws, loopholes, selective enforcement) in ways that let corruption persist,  
   OR  
   2. Criticize anti-corruption laws, promised reforms, or public pledges as failures—empty rhetoric, broken promises, or poorly implemented measures.

7. **Mobilizing anti-corruption**: Identify any passages that either:
   1. Describe grassroots protests or elite demands calling for action against corruption (e.g., demonstrations, petitions, political speeches urging reform),  
   OR  
   2. Describe real institutional responses to corruption (e.g., new anti-corruption laws, court cases against officials, restructuring of oversight bodies).

---

### Output Format:

Return ONLY a JSON list like this:

[
  {{
    "frame": "Frame Name",
    "highlights": ["Exact sentence", "..."],
    "rationale": "Short explanation of why the frame applies",
    "confidence": 85
  }},
  ...
]

Only include frames that are clearly evidenced.
"""

# ========== LLM REQUEST ==========
def build_prompt(article_text: str) -> str:
    return f"{FRAME_PROMPT}\n\n---\n\nArticle:\n{article_text}"

def query_llm(article_text: str) -> list:
    try:
        response = requests.post(
            LLM_ENDPOINT,
            json={
                "model": LLM_MODEL_NAME,
                "messages": [{"role": "user", "content": build_prompt(article_text)}],
                "stream": False
            },
            timeout=120
        )
        response.raise_for_status()
        result = response.json()
        content = result.get("message", {}).get("content", "").strip()
        json_start = content.find("[")
        json_end = content.rfind("]") + 1
        json_str = content[json_start:json_end]
        return json.loads(json_str)

    except Exception as e:
        print(f"❌ Error querying LLM: {e}")
        return [{
            "frame": "Error",
            "rationale": str(e),
            "confidence": None,
            "highlights": []
        }]

# ========== RESULT FORMAT ==========
def format_llm_output(llm_frames: list) -> dict:
    formatted = {}
    frame_map = {
        frame["frame"].strip().lower(): frame
        for frame in llm_frames if frame.get("frame", "").lower() != "error"
    }

    for i, frame_name in enumerate(FRAME_ORDER, 1):
        key = frame_name.lower()
        match = frame_map.get(key, {})
        formatted[f"frame_{i}_name"] = match.get("frame", "")
        formatted[f"frame_{i}_rationale"] = match.get("rationale", "")
        formatted[f"frame_{i}_confidence"] = match.get("confidence", "")
        formatted[f"frame_{i}_evidence"] = "\n".join(match.get("highlights", []))

    return formatted

# ========== ANNOTATION LOOP ==========
def annotate_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    for i in range(1, len(FRAME_ORDER) + 1):
        for field in ["name", "rationale", "confidence", "evidence"]:
            col = f"frame_{i}_{field}"
            if col not in df.columns:
                df[col] = ""

    for idx, row in df.iterrows():
        if pd.notna(row.get("frame_1_name")) and row.get("frame_1_name") != "":
            print(f"⏭️ Article {idx} already annotated. Skipping.")
            continue

        print(f"\n🔍 Annotating article {idx}...\n")
        article_text = row.get("translated_text", "")
        frames = query_llm(article_text)
        formatted = format_llm_output(frames)

        for col, val in formatted.items():
            df.at[idx, col] = val

        df.to_csv(TEMP_OUTPUT_PATH, index=False)
        print(f"✅ Saved progress after article {idx}.")
        time.sleep(SLEEP_BETWEEN_REQUESTS)

    return df

# ========== MAIN ==========
if __name__ == "__main__":
    INPUT_PATH = "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/output/news_sample_translated_10000_with_llm_annotations.csv"
    df = pd.read_csv(INPUT_PATH)

    # Optional filtering
    df = df[df.get("llm_label", "") == "Yes"].sample(n=15, random_state=12).reset_index(drop=True)

    df = annotate_dataframe(df)

    # ========= Save updated DataFrame ==========
    output_path = os.path.expanduser('~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/output/news_sample_with_7_frames.csv')
    df.to_csv(output_path, index=False)
    print(f"\n✅ Saved annotated dataframe with up to 7 frames per article to: {output_path}")



🔍 Annotating article 0...

✅ Saved progress after article 0.

🔍 Annotating article 1...

✅ Saved progress after article 1.

🔍 Annotating article 2...

✅ Saved progress after article 2.

🔍 Annotating article 3...

✅ Saved progress after article 3.

🔍 Annotating article 4...

✅ Saved progress after article 4.

🔍 Annotating article 5...

✅ Saved progress after article 5.

🔍 Annotating article 6...

✅ Saved progress after article 6.

🔍 Annotating article 7...

✅ Saved progress after article 7.

🔍 Annotating article 8...

✅ Saved progress after article 8.

🔍 Annotating article 9...

✅ Saved progress after article 9.

🔍 Annotating article 10...

✅ Saved progress after article 10.

🔍 Annotating article 11...

✅ Saved progress after article 11.

🔍 Annotating article 12...

✅ Saved progress after article 12.

🔍 Annotating article 13...

❌ Error querying LLM: Invalid control character at: line 4 column 357 (char 405)
✅ Saved progress after article 13.

🔍 Annotating article 14...

✅ Saved progre

In [31]:
df

Unnamed: 0,combined_text,translated_text,uri,country,dateTime,source.uri,llm_evidence,llm_rationale,llm_confidence,llm_label,...,frame_5_confidence,frame_5_evidence,frame_6_name,frame_6_rationale,frame_6_confidence,frame_6_evidence,frame_7_name,frame_7_rationale,frame_7_confidence,frame_7_evidence
0,Турски банкер обвинен в заговор в полза на Ира...,Turkish banker accused of conspiracy in favor ...,787939780,Bulgaria,2018-01-04 08:12:00+00:00,struma.com,Witnesses described corruption at the highest ...,The article reports on a trial involving a hig...,90.0,Yes,...,,,,,,,,,,
1,България харчи сляпо за европредседателството\...,Bulgaria is blindly spending on the EU Preside...,786664095,Bulgaria,2018-01-01 16:41:00+00:00,segabg.com,"""No one pursues political corruption, there ar...",The article highlights instances of political ...,90.0,Yes,...,,,Judicial and institutional accountability fail...,The article highlights the lack of accountabil...,90.0,"no one pursues political corruption, there are...",,,,
2,Жаблянов: Вотът няма нищо общо с европредседат...,Zhablyanov: The vote has nothing to do with th...,796433765,Bulgaria,2018-01-18 17:09:00+00:00,actualno.com,"""It has become a public disease,"" he added, ci...",The article focuses on accusations of corrupti...,95.0,Yes,...,,,Judicial and institutional accountability fail...,The mention of legal shortcomings in the regul...,80.0,legal shortcomings of the current regulatory f...,Mobilizing anti-corruption,The article describes an institutional respons...,85.0,BSP will introduce a vote of no confidence aga...
3,Колко е приятно да си началник на хора без раб...,How pleasant it is to be in charge of people w...,787120623,Bulgaria,2018-01-02 17:47:00+00:00,segabg.com,Borislav Sarфов spoke about an additional burd...,The article focuses on the investigation of co...,80.0,Yes,...,,,Judicial and institutional accountability fail...,The article criticizes the lack of accountabil...,90.0,the share of newly established cases investiga...,,,,
4,Първият сигнал в антикорупционната комисия е з...,The first signal to the anti-corruption commis...,800836490,Bulgaria,2018-01-25 18:33:00+00:00,segabg.com,The signal regarding Peevski requests an inves...,The article primarily focuses on allegations o...,90.0,Yes,...,,,Judicial and institutional accountability fail...,The article highlights the lack of institution...,90.0,the signal regarding Peevski requests an inves...,Mobilizing anti-corruption,The article describes a civil organization cal...,95.0,the civil association “Boytz” requests an inve...


In [None]:
#### FRIDAY 18 JULY

In [35]:
import requests
import pandas as pd
import time
import json
import os

# ========== CONFIG ==========
LLM_ENDPOINT = "http://localhost:11434/api/chat"
LLM_MODEL_NAME = "llama3:70b"
TEMP_OUTPUT_PATH = "annotated_temp_output.csv"
FINAL_OUTPUT_PATH = "news_sample_annotated.csv"
SLEEP_BETWEEN_REQUESTS = 1

# ========== FRAMES & PROMPTS ==========
FRAME_ORDER = [
    "Foreign influence threat",
    "Systemic institutional corruption",
    "Elite collusion",
    "Politicized investigations",
    "Authoritarian reformism",
    "Judicial and institutional accountability failures",
    "Mobilizing anti-corruption"
]

FRAME_PROMPT = """
You are an annotation assistant helping a human coder identify which corruption narrative frames are present in a news article. An article may contain multiple frames or none. Only identify passages that are explicit instances of the frame. Do not identify implicit instances or make suggestions. Do not code passages that are purely descriptive or factual, such as: straight news reporting of events, verdicts, charges, or basic statistics. Don’t code for a frame when there are only statements of who did what, when, or where, without any interpretation or judgment. 

### Frame Definitions:

1. **Foreign influence threat**: Identify any explicit passages that describe political corruption as an external attack by foreign actors. Look for references to external meddling, covert financing from abroad, secret deals with foreign entities, propaganda or undue influence by external powers. It needs to be explicit that foreign powers infiltrate domestic politics. 

2. **Systemic institutional corruption**: Identify any explicit passages that describe corruption as a deep‐rooted, system‐wide problem, built into institutions, laws, or culture, and not just one‐off wrongdoing. Look for terms like “endemic,” “deep-rooted,” “fragile institutions,” or metaphors like “weed” or “cancer.” Do not code when the passage merely expresses an opinion or doubt about a single court, verdict, or actor’s treatment or lacks any language about long-term, system-wide corruption or institutional rot. 

3. **Elite collusion**: Identify any explicit passages that describe corruption as the result of secret alliances between powerful elites (e.g., businessmen and politicians), such as backroom deals, undisclosed financing, or informal networks rigging policy. It needs to be explicit that elites conspire to manipulate the system in favour of those already in power.

4. **Politicized investigations**: Identify any explicit passages that depict corruption investigations as partisan tools or “witch hunts” (look for claims of bias, factional motives, selective enforcement). 

5. **Authoritarian reformism**: Identify any explicit passages that either:
1. Describe instances where politicians use reforms or institutional changes (e.g., restructuring courts, purging oversight bodies, rewriting checks-and-balances) to concentrate authority and weaken democratic safeguards; 
OR  
2. Describe officials intimidating opponents to consolidate power, weaken institutional checks, or to avoid the possibility of being caught
OR
3. Describe instances of politicians accusing other politicians or institutions of corruption to gain electoral win for themselves; corruption is used as a political tool
6. **Judicial and institutional accountability failures**: Identify any explicit passages that either:
   1. Describe how institutional efforts to combat corruption fail. This is because laws or judicial procedures fall short, are manipulated or outdated (e.g., ambiguous laws, loopholes, selective enforcement, politically controlled courts), allowing corrupt officials to go unpunished or not be adequately punished. 
 OR  
   2. Describe failures of anti-corruption laws, promised reforms, or public pledges by the government or political figures. Look for references to empty rhetoric, broken promises, or poorly implemented measures to combat corruption.

7. **Mobilizing anti-corruption**: Identify any explicit passages that either:
   1. Describe grassroots protests or elite demands calling for action against corruption (e.g., demonstrations, petitions, political speeches urging reform),  
   OR  
   2. Describe real institutional responses to corruption (e.g., new anti-corruption laws, court cases against officials, restructuring of oversight bodies).
---

### Output Format:

Return ONLY a JSON list like this:

[
  {{
    "frame": "Frame Name",
    "highlights": ["Exact sentence", "..."],
    "rationale": "Short explanation of why the frame applies",
    "confidence": 85
  }},
  ...
]

Only include frames that are clearly evidenced. Do not include any frame with empty evidence or empty rationale.
"""

# ========== LLM REQUEST ==========
def build_prompt(article_text: str) -> str:
    return f"{FRAME_PROMPT}\n\n---\n\nArticle:\n{article_text}"

def query_llm(article_text: str) -> list:
    try:
        response = requests.post(
            LLM_ENDPOINT,
            json={
                "model": LLM_MODEL_NAME,
                "messages": [{"role": "user", "content": build_prompt(article_text)}],
                "stream": False
            },
            timeout=120
        )
        response.raise_for_status()
        result = response.json()
        content = result.get("message", {}).get("content", "").strip()
        json_start = content.find("[")
        json_end = content.rfind("]") + 1
        json_str = content[json_start:json_end]
        return json.loads(json_str)

    except Exception as e:
        print(f"❌ Error querying LLM: {e}")
        return [{
            "frame": "Error",
            "rationale": str(e),
            "confidence": None,
            "highlights": []
        }]

# ========== RESULT FORMAT ==========
def format_llm_output(llm_frames: list) -> dict:
    formatted = {}
    # Filter frames to only those with non-empty highlights and rationale
    valid_frames = [
        frame for frame in llm_frames
        if frame.get("frame") and
           frame.get("highlights") and len(frame["highlights"]) > 0 and
           frame.get("rationale") and frame["rationale"].strip() != ""
    ]
    frame_map = {
        frame["frame"].strip().lower(): frame
        for frame in valid_frames
    }

    for i, frame_name in enumerate(FRAME_ORDER, 1):
        key = frame_name.lower()
        match = frame_map.get(key, {})

        if not match:
            # If frame not present or no valid evidence, skip adding fields for this frame
            continue

        confidence = match.get("confidence", "")
        rationale = match.get("rationale", "")
        evidence_list = match.get("highlights", [])

        # Prepare evidence text
        evidence_text = "\n".join(evidence_list)

        # Add warning if confidence is low
        if confidence != "" and isinstance(confidence, (int, float)) and confidence < 80:
            warning = f"\n\n⚠️ Model confidence is only {confidence}%. Please verify carefully."
            evidence_text += warning

        formatted[f"frame_{i}_name"] = match.get("frame", "")
        formatted[f"frame_{i}_rationale"] = rationale
        formatted[f"frame_{i}_confidence"] = confidence
        formatted[f"frame_{i}_evidence"] = evidence_text

    return formatted

# ========== ANNOTATION LOOP ==========
def annotate_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    # Ensure all frame columns exist (empty if not present)
    for i in range(1, len(FRAME_ORDER) + 1):
        for field in ["name", "rationale", "confidence", "evidence"]:
            col = f"frame_{i}_{field}"
            if col not in df.columns:
                df[col] = ""

    for idx, row in df.iterrows():
        # Skip if first frame is already annotated (heuristic)
        if pd.notna(row.get("frame_1_name")) and row.get("frame_1_name") != "":
            print(f"⏭️ Article {idx} already annotated. Skipping.")
            continue

        print(f"\n🔍 Annotating article {idx}...\n")
        article_text = row.get("translated_text", "")
        frames = query_llm(article_text)
        formatted = format_llm_output(frames)

        for col, val in formatted.items():
            df.at[idx, col] = val

        df.to_csv(TEMP_OUTPUT_PATH, index=False)
        print(f"✅ Saved progress after article {idx}.")
        time.sleep(SLEEP_BETWEEN_REQUESTS)

    return df

# ========== MAIN ==========
if __name__ == "__main__":
    INPUT_PATH = "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/output/news_sample_translated_10000_with_llm_annotations.csv"
    df = pd.read_csv(INPUT_PATH)

    # Optional filtering (only articles labeled "Yes" by LLM and sample 15)
    df = df[df.get("llm_label", "") == "Yes"].sample(n=15, random_state=12).reset_index(drop=True)

    df = annotate_dataframe(df)

    # ========= Save updated DataFrame ==========
    output_path = os.path.expanduser('~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/output/news_sample_with_7_frames_adjusted_prompt.csv')
    df.to_csv(output_path, index=False)
    print(f"\n✅ Saved annotated dataframe with up to 7 frames per article to: {output_path}")



🔍 Annotating article 0...

✅ Saved progress after article 0.

🔍 Annotating article 1...

✅ Saved progress after article 1.

🔍 Annotating article 2...

✅ Saved progress after article 2.

🔍 Annotating article 3...

✅ Saved progress after article 3.

🔍 Annotating article 4...

✅ Saved progress after article 4.

🔍 Annotating article 5...

✅ Saved progress after article 5.

🔍 Annotating article 6...

✅ Saved progress after article 6.

🔍 Annotating article 7...

✅ Saved progress after article 7.

🔍 Annotating article 8...

✅ Saved progress after article 8.

🔍 Annotating article 9...

✅ Saved progress after article 9.

🔍 Annotating article 10...

✅ Saved progress after article 10.

🔍 Annotating article 11...

✅ Saved progress after article 11.

🔍 Annotating article 12...

✅ Saved progress after article 12.

🔍 Annotating article 13...

❌ Error querying LLM: Expecting ',' delimiter: line 4 column 356 (char 393)
✅ Saved progress after article 13.

🔍 Annotating article 14...

✅ Saved progress af

In [None]:
## 21 July

In [None]:
import requests
import pandas as pd
import time
import json
import os

# ========== CONFIG ==========
LLM_ENDPOINT = "http://localhost:11434/api/chat"
LLM_MODEL_NAME = "llama3:70b"
TEMP_OUTPUT_PATH = "annotated_temp_output.csv"
FINAL_OUTPUT_PATH = "news_sample_annotated.csv"
SLEEP_BETWEEN_REQUESTS = 1

# ========== FRAMES & PROMPTS ==========
FRAME_ORDER = [
    "Foreign influence threat",
    "Systemic institutional corruption",
    "Elite collusion",
    "Politicized investigations",
    "Authoritarian reformism",
    "Judicial and institutional accountability failures",
    "Mobilizing anti-corruption"
]

FRAME_PROMPT = """
You are an annotation assistant helping a human coder identify which corruption narrative frames are present in a news article. An article may contain multiple frames or none.

Only identify **explicit instances** of each frame.

**Do not tag**:
- Implicit or inferred meanings
- Purely descriptive or factual reporting (e.g., who/what/when/where, charges filed, investigations launched)
- Neutral or procedural updates without interpretive or evaluative language

### Frame Definitions:

### Frame 1: Foreign influence threat
Identify explicit passages that describe political corruption as an external attack by foreign governments or actors.

**Tag if:**
- A foreign state, intelligence service, or proxy actor is described as meddling in domestic politics
- The passage links covert foreign financing, secret deals, or propaganda to political subversion
- The influence targets elections, party leadership, or national institutions

**Language cues may include:**
- “foreign interference”, “Russian money”, “external financing”, “foreign-linked donors”, “infiltration into political networks”, “secret deals with foreign entities”, “undue influence from abroad”, “foreign power”, “Kremlin”, “EU sanctions”, “financial infiltration”, “propaganda”

These are supporting signals. Do not tag based on keywords alone.

**Do not tag:**
- Routine cross-border bribery or business corruption without political/electoral implications
- Fraud against foreign or supranational bodies (e.g., the EU) by domestic actors, with no foreign direction
- Commercial disputes with no mention of foreign political interference

---

### Frame 2: Systemic institutional corruption
Identify explicit passages that describe corruption as a deep-rooted, pervasive, or structural problem embedded in political institutions.

**Tag if:**
- The passage frames corruption as a normal, systemic part of how power is exercised
- Language includes terms like “endemic,” “deep-rooted,” “pervasive,” “institutionalized,” or metaphorical language suggesting decay (e.g., “cancer,” “disease,” “rot”)

**Language cues may include:**
- “institutional decay”, “longstanding fraud”, “systemic failure”, “outdated governance”, “abuse of power”, “erosion of democracy”, “no cure”, "cancer", "endemic", "deep-rooted"

These phrases are indicative but not definitive on their own.

**Do not tag:**
- Isolated scandals, even large ones, unless explicitly linked to systemic problems
- Criticism of a single case, verdict, or actor without a broader institutional interpretation

---

### Frame 3: Elite collusion
Identify explicit passages that describe corruption as a result of **secret or informal alliances** between political and/or business elites working together for mutual benefit.

**Tag if:**
- The passage alleges backroom deals, undisclosed funding, or elite networks rigging policy
- It describes institutionalized systems of insider advantage or power-sharing among elites
- Experts or sources describe closed political-business circles that enable corruption

**Language cues may include:**
- “elite collusion”, “backroom deals”, “cronyism”, “insider networks”, “in their pocket”, “bought”, “rigging the system”, “donations”, “illegal campaign donations”, “adjust tax rules”, “tax benefit”, “subsidies become welfare for the elite”, “elites push for laws”

Use only when the text explicitly shows or discusses coordinated benefit or influence.

**Do not tag:**
- Nepotism or favoritism without broader collusion
- Single-actor wrongdoing without any coordination or elite benefit

---

### Frame 4: Politicized investigations
Identify explicit passages where legal or investigative tools are described as being used to punish political opponents rather than to uncover wrongdoing impartially.

**Tag if:**
- Investigations or prosecutions are framed as biased, selective, or politically motivated
- Language includes terms like “witch hunt,” “show trial,” “fabrication,” “blackmail,” “defamation”
- The text alleges double standards (e.g., one party is targeted while another is ignored)
- Investigations are used to silence journalists, critics, or opposition actors

**Language cues may include:**
- “witch hunt”, “partisan inquiry”, “denies allegations”, “abuse of power”, “unjust accusations”, “politically motivated”, “claims innocence”

These are indicative but require the framing to be explicit.

**Do not tag:**
- Routine reports of legal proceedings or investigations
- Cases lacking explicit accusations of political misuse or motive

---

### Frame 5: Authoritarian reformism
Identify explicit passages where reforms, surveillance, or legal action are framed as tools to **concentrate political power** and weaken democratic checks and balances.

**Tag if:**
- Institutional reforms (e.g., court restructuring, changes to oversight bodies) are described as centralizing executive power
- State actors (intelligence, prosecutors, police) intimidate, monitor, or neutralize critics or rivals
- Anti-corruption accusations are used selectively to remove or discredit political opponents

**Language cues may include:**
- “accusing established powers”, “accusation of corruption”, “repression”, “silence dissent”, “power consolidation”, “threaten”, “seized power”, “election fraud”, “rigged elections”, “eliminating opposition”

Only tag if these cues accompany an interpretation of **concentrated, repressive political control**.

**Do not tag:**
- Routine or technical reforms without evaluative framing
- Isolated surveillance or legal acts not linked to power consolidation

---

### Frame 6: Judicial and institutional accountability failures
Identify explicit passages that describe how anti-corruption mechanisms fail due to institutional weakness, capture, or design flaws.

**Tag if:**
- Institutions (e.g., courts, commissions, enforcement bodies) are portrayed as ineffective, compromised, or selectively enforced
- The text points to loopholes, lack of follow-through, or inability to punish powerful actors
- Critics describe broken reform promises, politicized protection, or rule-of-law failures

**Language cues may include:**
- “repeal of the offense”, “rules to protect third parties/politicians”, “judicial (loop)holes”, “legal gaps”, “selective enforcement”, “broken promises”, “empty rhetoric”, “ambiguous laws”, “accountability is weak”, “undermining anti-corruption efforts”, “promised to combat corruption”

Use these cues only when they accompany **explicit statements of systemic failure**.

**Do not tag:**
- Neutral updates about procedure (e.g., “trial opened” or “immunity lifted”)
- Cases where accountability mechanisms are functioning as intended

---

### Frame 7: Mobilizing anti-corruption
Identify explicit passages describing public action or genuine reform efforts to fight corruption.

**Tag if:**
- Citizens, activists, or journalists engage in protests, petitions, or calls for justice
- Governments adopt meaningful anti-corruption reforms, pass new laws, or initiate serious prosecutions
- The tone conveys a real effort or progress in tackling corruption

**Language cues may include:**
- “public outrage”, “mass mobilization”, “demand for reform”, “protests”, “sweeping reforms”, “grassroots uprising”, “rooting out corruption”, “new anti-corruption laws”, “accountability now”, “justice for the people”, “we want change”, “political reckoning”, “resignation demands”, “failure of leadership”, “calls for change”, “political backlash”

Use only when framed as genuine mobilization or structural reform, not symbolic gestures.

**Do not tag:**
- Politicians making vague anti-corruption claims without action
- Token or symbolic reforms not framed as serious change



---

### Output Format:

Return ONLY a JSON list like this:

```json
[
  {
    "frame": "Frame Name",
    "highlights": ["Exact sentence", "..."],
    "rationale": "Short explanation of why the frame applies",
    "confidence": 85
  }
]

"""

FEW_SHOT_EXAMPLES = """
### Example 1:
Article:
"Criminal case against Richard de Mos concluded, Prosecutor's Office will not appeal to the Supreme Court. The criminal case against the The Hague politician Richard de Mos has been concluded. The Public Prosecution Office will not appeal to the Supreme Court; the ruling of the Hague Court of Justice is therefore final. The court largely acquitted De Mos, he only received a conditional fine of 2000 euros for passing on confidential information. The Public Prosecution Office suspected the leader of Hart voor Den Haag of, among other things, corruption and bribery."

Output:
[]

### Example 2:
Article:
"Eight charges against former minister in Aruba. The Public Prosecutor's Office in Aruba has charged former Aruban minister Otmar Oduber with eight different offences, it was announced on Friday. These include corruption, accepting bribes, forgery, abuse of power as a minister and fraud. Oduber, who has been one of the most outspoken politicians in Aruba for years, is a suspect in a major fraud investigation called Flamingo. That investigation revolves around possible fraud in the allocation of government land. Last Wednesday, searches were carried out at Oduber's home and at his ex-wife's home. He served as minister from 2009 to the end of 2019, with a brief interruption. First as Minister of Tourism, and from 2017 as Minister of Spatial Development, Infrastructure and Environment. The investigation focuses on the latter period of his ministry."

Output:
[]

### Example 3:
Article:
"members of this parliament and candidates... received money from the Russian government..."

Output:
[
  {
    "frame": "Foreign Influence Threat",
    "highlights": ["This is a clear attack on our European way of life, our Parliament and its democratic mandate by a foreign power."],
    "rationale": "The statement explicitly frames the alleged payments and propaganda activities as a hostile foreign attack targeting democratic institutions.",
    "confidence": 92
  }
]

### Example 4:
Article:
"Corruption became so widespread, as in the Odebrecht case, that it has damaged the political system."

Output:
[
  {
    "frame": "Systemic Institutional Corruption",
    "highlights": ["Corruption became so widespread, as in the Odebrecht case, that it has damaged the political system."],
    "rationale": "Explicitly states that corruption has become widespread and deeply embedded to the point of harming democratic structures, exemplifying systemic decay.",
    "confidence": 90
  }
]

### Example 5:
Article:
"Cronyism appears to be a recurring pattern within the cabinet..."

Output:
[
  {
    "frame": "Elite Collusion",
    "highlights": ["The policy of giving priority to prominent figures"],
    "rationale": "Describes a systemic pattern of cronyism in government procurement where politically connected individuals received preferential treatment, exemplifying elite collusion.",
    "confidence": 91
  }
]

### Example 6:
Article:
"Meanwhile, Saied continues what he started: consolidating power entirely in his own hands..."

Output:
[
  {
    "frame": "Authoritarian Reformism",
    "highlights": ["He introduces a new constitution that makes him virtually untouchable as president, and has prominent political opponents... arrested"],
    "rationale": "Describes both constitutional reform and repression of dissent as a strategy for power consolidation, characteristic of authoritarian reformism.",
    "confidence": 93
  }
]

### Example 7:
Article:
"One of Boris Johnson's closest allies accused a committee... of a 'witch hunt'."

Output:
[
  {
    "frame": "Politicized Investigations",
    "highlights": ["witch hunt"],
    "rationale": "The term 'witch hunt' implies biased legal action driven by political motives, which aligns with the politicized investigations frame.",
    "confidence": 89
  }
]

### Example 8:
Article:
"In 2019, Jokowi oversaw revisions that significantly weakened the KPK’s mandate..."

Output:
[
  {
    "frame": "Judicial and Institutional Accountability Failures",
    "highlights": ["oversaw revisions that significantly weakened the KPK’s mandate"],
    "rationale": "Describes a weakening of Indonesia’s main anti-corruption body, illustrating systemic accountability failure.",
    "confidence": 91
  }
]

### Example 9:
Article:
"Slovakia is working hard to reform its judiciary..."

Output:
[
  {
    "frame": "Mobilizing Anti-corruption",
    "highlights": ["Slovakia is working hard to reform its judiciary. The goal: more independent judges and less corruption"],
    "rationale": "Highlights genuine structural reform efforts framed as a response to corruption, characteristic of mobilizing anti-corruption.",
    "confidence": 92
  }
]
"""

# ========== LLM REQUEST ==========

def build_prompt(article_text: str) -> str:
    return f"{FEW_SHOT_EXAMPLES}\n\n{FRAME_PROMPT}\n\n---\n\nArticle:\n{article_text}"


def query_llm(article_text: str) -> list:
    try:
        response = requests.post(
            LLM_ENDPOINT,
            json={
                "model": LLM_MODEL_NAME,
                "messages": [{"role": "user", "content": build_prompt(article_text)}],
                "stream": False
            },
            timeout=120
        )
        response.raise_for_status()
        result = response.json()
        content = result.get("message", {}).get("content", "").strip()
        json_start = content.find("[")
        json_end = content.rfind("]") + 1
        json_str = content[json_start:json_end]
        return json.loads(json_str)

    except Exception as e:
        print(f"❌ Error querying LLM: {e}")
        return [{
            "frame": "Error",
            "rationale": str(e),
            "confidence": None,
            "highlights": []
        }]

# ========== RESULT FORMAT ==========
def format_llm_output(llm_frames: list) -> dict:
    formatted = {}
    # Filter frames to only those with non-empty highlights and rationale
    valid_frames = [
        frame for frame in llm_frames
        if frame.get("frame") and
           frame.get("highlights") and len(frame["highlights"]) > 0 and
           frame.get("rationale") and frame["rationale"].strip() != ""
    ]
    frame_map = {
        frame["frame"].strip().lower(): frame
        for frame in valid_frames
    }

    for i, frame_name in enumerate(FRAME_ORDER, 1):
        key = frame_name.lower()
        match = frame_map.get(key, {})

        if not match:
            # If frame not present or no valid evidence, skip adding fields for this frame
            continue

        confidence = match.get("confidence", "")
        rationale = match.get("rationale", "")
        evidence_list = match.get("highlights", [])

        # Prepare evidence text
        evidence_text = "\n".join(evidence_list)

        # Add warning if confidence is low
        if confidence != "" and isinstance(confidence, (int, float)) and confidence < 80:
            warning = f"\n\n⚠️ Model confidence is only {confidence}%. Please verify carefully."
            evidence_text += warning

        formatted[f"frame_{i}_name"] = match.get("frame", "")
        formatted[f"frame_{i}_rationale"] = rationale
        formatted[f"frame_{i}_confidence"] = confidence
        formatted[f"frame_{i}_evidence"] = evidence_text

    return formatted

# ========== ANNOTATION LOOP ==========
def annotate_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    # Ensure all frame columns exist (empty if not present)
    for i in range(1, len(FRAME_ORDER) + 1):
        for field in ["name", "rationale", "confidence", "evidence"]:
            col = f"frame_{i}_{field}"
            if col not in df.columns:
                df[col] = ""

    for idx, row in df.iterrows():
        # Skip if first frame is already annotated (heuristic)
        if pd.notna(row.get("frame_1_name")) and row.get("frame_1_name") != "":
            print(f"⏭️ Article {idx} already annotated. Skipping.")
            continue

        print(f"\n🔍 Annotating article {idx}...\n")
        article_text = row.get("translated_text", "")
        frames = query_llm(article_text)
        formatted = format_llm_output(frames)

        for col, val in formatted.items():
            df.at[idx, col] = val

        df.to_csv(TEMP_OUTPUT_PATH, index=False)
        print(f"✅ Saved progress after article {idx}.")
        time.sleep(SLEEP_BETWEEN_REQUESTS)

    return df

# ========== MAIN ==========

if __name__ == "__main__":
    # Expand path to raw input CSV
    INPUT_PATH = os.path.expanduser(
        "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/output/news_sample_translated_10000_with_llm_annotations.csv"
    )
    df_all = pd.read_csv(INPUT_PATH)

    # Filter to only LLM-labeled "Yes" articles
    df_yes = df_all[df_all.get("llm_label", "") == "Yes"]

    # ======= ICR 1 Sample =======
    print("\n🎯 Drawing ICR 1 sample...")
    df_icr1 = df_yes.sample(n=15, random_state=12).reset_index(drop=True)

    # Save raw ICR 1 sample before annotation
    icr1_sample_path = os.path.expanduser(
        "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/icr1_sample_raw.csv"
    )
    df_icr1.to_csv(icr1_sample_path, index=False)
    print(f"📁 Saved ICR 1 raw sample to: {icr1_sample_path}")

    # Annotate
    df_icr1 = annotate_dataframe(df_icr1)

    # Save annotated ICR 1 sample
    icr1_annotated_path = os.path.expanduser(
        "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/icr1_sample_annotated.csv"
    )
    df_icr1.to_csv(icr1_annotated_path, index=False)
    print(f"✅ Annotated ICR 1 saved to: {icr1_annotated_path}")

    # ======= ICR 2 Sample =======
    print("\n🎯 Drawing ICR 2 sample...")
    df_icr2 = df_yes.sample(n=15, random_state=42).reset_index(drop=True)

    # Save raw ICR 2 sample before annotation
    icr2_sample_path = os.path.expanduser(
        "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/icr2_sample_raw.csv"
    )
    df_icr2.to_csv(icr2_sample_path, index=False)
    print(f"📁 Saved ICR 2 raw sample to: {icr2_sample_path}")

    # Annotate
    df_icr2 = annotate_dataframe(df_icr2)

    # Save annotated ICR 2 sample
    icr2_annotated_path = os.path.expanduser(
        "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/icr2_sample_annotated.csv"
    )
    df_icr2.to_csv(icr2_annotated_path, index=False)
    print(f"✅ Annotated ICR 2 saved to: {icr2_annotated_path}")


🎯 Drawing ICR 1 sample...
📁 Saved ICR 1 raw sample to: /home/akroon/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/icr1_sample_raw.csv

🔍 Annotating article 0...

✅ Saved progress after article 0.

🔍 Annotating article 1...

✅ Saved progress after article 1.

🔍 Annotating article 2...



In [61]:
# Save Excel version of ICR 1
icr1_excel_path = icr1_annotated_path.replace(".csv", ".xlsx")
df_icr1.to_excel(icr1_excel_path, index=False)
print(f"✅ Excel-bestand ICR 1 opgeslagen als: {icr1_excel_path}")

✅ Excel-bestand opgeslagen als: /home/akroon/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/output/news_sample_with_7_frames_adjusted_prompt.xlsx


In [None]:
# Save Excel version of ICR 2
icr2_excel_path = icr2_annotated_path.replace(".csv", ".xlsx")
df_icr2.to_excel(icr2_excel_path, index=False)
print(f"✅ Excel-bestand ICR 2 opgeslagen als: {icr2_excel_path}")
