In [None]:
import os
import json
import time
import pandas as pd
import re
from openai import OpenAI

# ==========================
# CONFIGURATION
# ==========================

BASE_URL = "https://chat-ai.academiccloud.de/v1"

# Two API keys to rotate between when rate limits are hit.
API_KEYS = [
    "d20572310bbacaf2823af34cecd24a42",
    "03a2fb72225c23aa47c97742fe757224"
]

if not any(API_KEYS):
    raise RuntimeError("No API keys configured. Set ACAD_API_KEY_1 / ACAD_API_KEY_2 or put them in API_KEYS.")

current_key_index = 0

MODEL = os.getenv("MODEL", "qwen3-30b-a3b-instruct-2507")

TEXT_COL = "text"  # column name containing texts

# NEW: folders
INPUT_DIR = "input_excels"       # folder with art5.xlsx, art6.xlsx, pol23.xlsx, ...
OUTPUT_DIR = "classified_excels" # folder where classified files will be written

PER_CALL_DELAY = 0.5
BATCH_DELAY = 1.0

# ==========================
# CLIENT MANAGEMENT
# ==========================

def make_client():
    key = API_KEYS[current_key_index]
    if not key:
        raise RuntimeError(f"No API key configured at index {current_key_index}")
    return OpenAI(api_key=key, base_url=BASE_URL)

client = make_client()

def rotate_api_key():
    global current_key_index, client
    num_keys = len(API_KEYS)
    if num_keys <= 1:
        print("[INFO] Only one API key configured; cannot rotate.")
        return
    current_key_index = (current_key_index + 1) % num_keys
    client = make_client()
    print(f"[INFO] Switched to API key #{current_key_index + 1}")

# ==========================
# PROMPTS
# ==========================

PROMPT_BINARY = """
You are an annotation assistant for Appraisal Theory (Martin & White, 2005).

Task:
Decide whether the TARGET text contains any evaluative content.

You are given:
- PREVIOUS: text from the row above (may be empty)
- TARGET: the current text to label
- NEXT: text from the row below (may be empty)

Use PREVIOUS and NEXT only as context (e.g., to resolve pronouns or topics).
The label must refer ONLY to the TARGET text.

Coding rule “Evaluative” (binary):

- Evaluative = 1
  -> If the TARGET text expresses an emotion, a judgment of people/behaviour,
     or an aesthetic/quality/value evaluation of things, performances or phenomena.

- Evaluative = 0
  -> If the TARGET text is purely factual, descriptive, or neutral, with no clear
     emotional stance, no judgment of people/behaviour, and no quality/value
     evaluation of things or events.

Examples (illustrative only):
- “The room is beautiful.” -> evaluative (1): aesthetic evaluation of a thing.
- “She was very unfair to her team.” -> evaluative (1): judgment of behaviour.
- “The concert made me really happy.” -> evaluative (1): emotional reaction.
- “The meeting starts at 3 pm.” -> non-evaluative (0): purely factual.

Important constraints:
- Do NOT invent or assume information that is not explicitly present in the
  TARGET text or necessary to interpret it in context. No hallucinations.
- If you are unsure, choose the more conservative option and explain briefly.

Output format:
Return ONLY a single valid JSON object (no extra text, no explanations outside JSON).
Use exactly these fields:

- "binary": 0 or 1
    - 0 = non-evaluative TARGET
    - 1 = evaluative TARGET
- "label": "no_eval" or "eval"
- "justification": a short justification for the decision (max 40 words), based ONLY on the given texts.
- "evidence_span": a short quote from the TARGET that supports the decision (or "" if none).

Context:

PREVIOUS:
\"\"\"{prev_text}\"\"\"

TARGET:
\"\"\"{target_text}\"\"\"

NEXT:
\"\"\"{next_text}\"\"\""
"""

PROMPT_MULTICLASS = """You are an annotation assistant for Appraisal Theory (Martin & White, 2005).

The TARGET text has been identified as evaluative (binary = 1).

Task:
Classify the TARGET text into Appraisal categories. Use the three main types:
- Affect
- Judgment
- Appreciation

You are given:
- PREVIOUS: text from the row above (may be empty)
- TARGET: the current text to label
- NEXT: text from the row below (may be empty)

Use PREVIOUS and NEXT only as context (e.g., to resolve referents).
Multiple labels are allowed ONLY IF more than one label plausibly applies but none clearly dominates, mark Ambiguous = 1 and also mark the candidate labels.

Definitions (paraphrased from Appraisal Theory):

1. Affect (feelings / emotions)
   - The TARGET text expresses emotional states or reactions.
   - This can include:
     - emotional reactions to events or things (e.g., “I’m happy about it”, “That scares me”),
     - more enduring emotional dispositions (e.g., “I’m a nervous person”).
   - Typical linguistic cues:
     - emotion words (happy, sad, afraid, angry, delighted, bored, grateful, etc.),
     - phrases like “makes me feel…”, “I love / hate…”.

2. Judgment (evaluation of people and behaviour)
   - The TARGET text evaluates people or their behaviour/character in relation to social norms
     (e.g., right/wrong, fair/unfair, honest/dishonest, capable/incompetent).
   - Often about:
     - social esteem (e.g., brave, careful, talented, lazy, rude),
     - social sanction (e.g., honest, trustworthy, corrupt, unfair, immoral).
   - Typical linguistic cues:
     - adjectives for character or behaviour (“kind”, “irresponsible”, “unfair”),
     - attributions of praise or blame (“she did the right thing”, “he failed his duty”).

3. Appreciation (evaluation of things, products, events, processes, phenomena)
   - The TARGET text evaluates objects, artefacts, performances, texts, policies, situations,
     or environments in terms of quality, value, design, or aesthetics.
   - Often about:
     - reaction (how appealing or impactful something is: “boring”, “exciting”, “moving”),
     - composition (harmony, complexity, balance: “well-structured”, “chaotic”),
     - valuation (worth, significance: “important”, “valuable”, “pointless”).
   - Typical linguistic cues:
     - evaluations of things or outcomes (“beautiful view”, “poor performance”, “excellent report”).

4. Ambiguous
   - Use “ambiguous” when:
     - more than one type (Affect, Judgment, Appreciation) clearly applies,
       AND none of them is clearly dominant, OR
     - the available text is too short or vague to reliably decide which type it is.
   - In such cases, mark the types you are considering with 1 (e.g. Affect = 1, Judgment = 1)
     and set "label" to "ambiguous".

Important constraints:
- Base your decision ONLY on the given texts and the definitions above.
- Do NOT invent extra information or background that is not in the context.
- You can choose more than one type (e.g., Affect = 1 and Judgement = 1) – multi-label annotation is allowed.
- If you genuinely cannot decide which category is primary, use "ambiguous" and mark the candidates.

Output format:
Return ONLY a single valid JSON object (no extra text).

Use exactly these fields:
- "label": one of "affect", "judgment", "appreciation", "ambiguous"
- "affect": 0 or 1
- "judgment": 0 or 1
- "appreciation": 0 or 1
- "probability_affect": float between 0.0 and 1.0
- "probability_judgment": float between 0.0 and 1.0
- "probability_appreciation": float between 0.0 and 1.0
- "top_spans": list (max 3 items) of short quotes from TARGET
- "explanation": short explanation (max 60 words)

Context:

PREVIOUS:
\"\"\"{prev_text}\"\"\""

TARGET:
\"\"\"{target_text}\"\"\""

NEXT:
\"\"\"{next_text}\"\"\""
"""

# ==========================
# MODEL CALL HELPER
# ==========================

def call_model(messages, model=MODEL, max_tokens=300, temperature=0.0, retries=5):
    global client
    last_exc = None

    for attempt in range(retries):
        try:
            resp = client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=temperature,
                max_tokens=max_tokens,
            )
            content = resp.choices[0].message.content.strip()

            m = re.search(r'(\{[\s\S]*\})', content)
            json_text = m.group(1) if m else content
            parsed = json.loads(json_text)

            time.sleep(PER_CALL_DELAY)
            return parsed, content

        except Exception as e:
            msg = str(e)
            is_rate_limit = ("429" in msg) or ("rate limit" in msg.lower())

            if is_rate_limit:
                print(f"[WARN] Rate limit error: {e}")
                rotate_api_key()
                wait = 5 * (attempt + 1)
                print(f"[INFO] Sleeping {wait} seconds before retry...")
                time.sleep(wait)
                last_exc = e
                continue
            else:
                wait = 2 ** attempt
                print(f"[ERROR] Non-rate-limit error: {e}. Sleeping {wait} seconds before retry...")
                time.sleep(wait)
                last_exc = e
                continue

    raise last_exc

# ==========================
# MAIN PIPELINE FOR ONE FILE
# ==========================

def classify_excel(input_file, text_col=TEXT_COL, output_file=None):
    """
    Classify a single Excel file and write the result to output_file.
    If output_file is None, appends '_classified' before extension.
    """
    if output_file is None:
        base, ext = os.path.splitext(input_file)
        output_file = base + "_classified" + ext

    print(f"[INFO] Classifying file: {input_file}")
    df = pd.read_excel(input_file)

    df["binary"] = None
    df["binary_justification"] = None
    df["binary_evidence_span"] = None
    df["multiclass_label"] = None
    df["multiclass_probability_affect"] = None
    df["multiclass_probability_judgment"] = None
    df["multiclass_probability_appreciation"] = None
    df["multiclass_spans"] = None
    df["multiclass_explanation"] = None
    df["binary_raw_output"] = None
    df["multiclass_raw_output"] = None

    for idx, row in df.iterrows():
        text = str(row.get(text_col, "")).strip()

        if not text:
            df.at[idx, "binary"] = 0
            df.at[idx, "binary_justification"] = "leer"
            continue

        prev_text = str(df.at[idx - 1, text_col]).strip() if idx > 0 else ""
        next_text = str(df.at[idx + 1, text_col]).strip() if idx < len(df) - 1 else ""

        # ---------- BINARY ----------
        messages_bin = [
            {
                "role": "system",
                "content": "You are a precise classifier of appraisal theory. Respond only with valid JSON.",
            },
            {
                "role": "user",
                "content": PROMPT_BINARY.format(
                    prev_text=prev_text,
                    target_text=text,
                    next_text=next_text,
                ),
            },
        ]

        try:
            parsed_bin, raw_bin = call_model(messages_bin)
        except Exception as e:
            print(f"[ERROR] Binary classification failed at idx={idx} in file {input_file}: {e}")
            df.at[idx, "binary"] = None
            df.at[idx, "binary_justification"] = f"error: {e}"
            df.at[idx, "binary_raw_output"] = ""
            time.sleep(BATCH_DELAY)
            continue

        df.at[idx, "binary_raw_output"] = raw_bin

        bin_val = parsed_bin.get("binary")
        try:
            bin_val = int(bin_val)
        except Exception:
            label = str(parsed_bin.get("label", "")).lower()
            bin_val = 1 if "eval" in label else 0

        df.at[idx, "binary"] = bin_val
        df.at[idx, "binary_justification"] = parsed_bin.get("justification", "")
        df.at[idx, "binary_evidence_span"] = parsed_bin.get("evidence_span", "")

        # ---------- MULTICLASS ----------
        if bin_val == 1:
            messages_multi = [
                {
                    "role": "system",
                    "content": "You are a precise classifier of appraisal theory. Respond only with valid JSON.",
                },
                {
                    "role": "user",
                    "content": PROMPT_MULTICLASS.format(
                        prev_text=prev_text,
                        target_text=text,
                        next_text=next_text,
                    ),
                },
            ]

            try:
                parsed_multi, raw_multi = call_model(messages_multi)
            except Exception as e:
                print(f"[ERROR] Multiclass classification failed at idx={idx} in file {input_file}: {e}")
                df.at[idx, "multiclass_label"] = None
                df.at[idx, "multiclass_explanation"] = f"error: {e}"
                df.at[idx, "multiclass_raw_output"] = ""
                time.sleep(BATCH_DELAY)
                continue

            df.at[idx, "multiclass_raw_output"] = raw_multi
            df.at[idx, "multiclass_label"] = parsed_multi.get("label", None)

            try:
                df.at[idx, "multiclass_probability_affect"] = float(parsed_multi.get("probability_affect", 0.0))
            except Exception:
                df.at[idx, "multiclass_probability_affect"] = None
            try:
                df.at[idx, "multiclass_probability_judgment"] = float(parsed_multi.get("probability_judgment", 0.0))
            except Exception:
                df.at[idx, "multiclass_probability_judgment"] = None
            try:
                df.at[idx, "multiclass_probability_appreciation"] = float(parsed_multi.get("probability_appreciation", 0.0))
            except Exception:
                df.at[idx, "multiclass_probability_appreciation"] = None

            spans = parsed_multi.get("top_spans", [])
            if isinstance(spans, list):
                df.at[idx, "multiclass_spans"] = " ||| ".join(spans)
            else:
                df.at[idx, "multiclass_spans"] = spans

            df.at[idx, "multiclass_explanation"] = parsed_multi.get("explanation", "")

        time.sleep(BATCH_DELAY)

    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    df.to_excel(output_file, index=False)
    print(f"[INFO] Saved classified Excel to {output_file}")

# ==========================
# BATCH OVER A FOLDER
# ==========================

def classify_all_excels(input_dir=INPUT_DIR, output_dir=OUTPUT_DIR, text_col=TEXT_COL):
    """
    Loop over all .xlsx files in input_dir and classify each one.
    Output is written to output_dir with '<name>_classified.xlsx'.
    """
    os.makedirs(output_dir, exist_ok=True)

    files = [f for f in os.listdir(input_dir)
             if f.lower().endswith(".xlsx") and not f.startswith("~$")]

    if not files:
        print(f"[WARN] No .xlsx files found in {input_dir}")
        return

    for fname in files:
        input_path = os.path.join(input_dir, fname)
        base, ext = os.path.splitext(fname)
        output_path = os.path.join(output_dir, f"{base}_classified{ext}")

        # Avoid re-processing if output already exists (optional)
        if os.path.exists(output_path):
            print(f"[INFO] Skipping {fname}, already classified at {output_path}")
            continue

        classify_excel(input_file=input_path, text_col=text_col, output_file=output_path)

# ==========================
# ENTRY POINT
# ==========================

if __name__ == "__main__":
    classify_all_excels()
