In [1]:
from openai import OpenAI

# API configuration
api_key = 'd20572310bbacaf2823af34cecd24a42'

base_url = 'https://chat-ai.academiccloud.de/v1'

# Initialize client
client = OpenAI(
    api_key=api_key,
    base_url=base_url
)

models = client.models.list()

# Print all available model IDs
for model in models.data:
    print(model.id)


qwen3-coder-30b-a3b-instruct
meta-llama-3.1-8b-instruct
openai-gpt-oss-120b
gemma-3-27b-it
qwen3-30b-a3b-thinking-2507
qwen3-30b-a3b-instruct-2507
qwen3-32b
qwen3-235b-a22b
llama-3.3-70b-instruct
qwen2.5-vl-72b-instruct
medgemma-27b-it
qwq-32b
deepseek-r1
deepseek-r1-distill-llama-70b
mistral-large-instruct
qwen2.5-coder-32b-instruct
internvl2.5-8b
teuken-7b-instruct-research
codestral-22b
llama-3.1-sauerkrautlm-70b-instruct
meta-llama-3.1-8b-rag
qwen2.5-omni-7b


In [7]:
import os
import json
import time
import pandas as pd
import re
from openai import OpenAI  # keeps your existing client style; adjust import if your environment differs

API_KEY = "d20572310bbacaf2823af34cecd24a42"
BASE_URL = "https://chat-ai.academiccloud.de/v1"

# Recommended model choices:
# Primary: "llama-3.3-70b-instruct"
# Fallback: "qwen3-30b-a3b-instruct-2507"
MODEL = os.getenv("MODEL", "qwen3-30b-a3b-instruct-2507")

EXCEL_IN = "Art5 (1).xlsx"         # input Excel filename
TEXT_COL = "text"               # column name that contains the texts
EXCEL_OUT = "Art5_classified.xlsx"
BATCH_DELAY = 0.2

# === initialize client ===
client = OpenAI(api_key=API_KEY, base_url=BASE_URL)

# === prompts ===
PROMPT_BINARY = """
You are an annotation assistant for Appraisal Theory (Martin & White, 2005).

Task:
Decide whether the TARGET text contains any evaluative content.

You are given:
- PREVIOUS: text from the row above (may be empty)
- TARGET: the current text to label
- NEXT: text from the row below (may be empty)

Use PREVIOUS and NEXT only as context (e.g., to resolve pronouns or topics).
The label must refer ONLY to the TARGET text.

Coding rule “Evaluative” (binary):

- Evaluative = 1
  -> If the TARGET text expresses an emotion, a judgment of people/behaviour,
     or an aesthetic/quality/value evaluation of things, performances or phenomena.

- Evaluative = 0
  -> If the TARGET text is purely factual, descriptive, or neutral, with no clear
     emotional stance, no judgment of people/behaviour, and no quality/value
     evaluation of things or events.

Examples (illustrative only):
- “The room is beautiful.” -> evaluative (1): aesthetic evaluation of a thing.
- “She was very unfair to her team.” -> evaluative (1): judgment of behaviour.
- “The concert made me really happy.” -> evaluative (1): emotional reaction.
- “The meeting starts at 3 pm.” -> non-evaluative (0): purely factual.

Important constraints:
- Do NOT invent or assume information that is not explicitly present in the
  TARGET text or necessary to interpret it in context. No hallucinations.
- If you are unsure, choose the more conservative option and explain briefly.

Output format:
Return ONLY a single valid JSON object (no extra text, no explanations outside JSON).
Use exactly these fields:

- "binary": 0 or 1
    - 0 = non-evaluative TARGET
    - 1 = evaluative TARGET
- "label": "no_eval" or "eval"
- "justification": a short justification for the decision (max 40 words), based ONLY on the given texts.
- "evidence_span": a short quote from the TARGET that supports the decision (or "" if none).

Context:

PREVIOUS:
\"\"\"{prev_text}\"\"\"

TARGET:
\"\"\"{target_text}\"\"\"

NEXT:
\"\"\"{next_text}\"\"\"
"""


PROMPT_MULTICLASS = """
You are an annotation assistant for Appraisal Theory (Martin & White, 2005).

The TARGET text has been identified as evaluative (binary = 1).

Task:
Classify the TARGET text into Appraisal categories. Use the three main types:
- Affect
- Judgment
- Appreciation

You are given:
- PREVIOUS: text from the row above (may be empty)
- TARGET: the current text to label
- NEXT: text from the row below (may be empty)

Use PREVIOUS and NEXT only as context (e.g., to resolve referents).
The label must refer ONLY to the TARGET text.

Definitions (paraphrased from Appraisal Theory):

1. Affect (feelings / emotions)
   - The TARGET text expresses emotional states or reactions.
   - This can include:
     - emotional reactions to events or things (e.g., “I’m happy about it”, “That scares me”),
     - more enduring emotional dispositions (e.g., “I’m a nervous person”).
   - Typical linguistic cues:
     - emotion words (happy, sad, afraid, angry, delighted, bored, grateful, etc.),
     - phrases like “makes me feel…”, “I love / hate…”.

2. Judgment (evaluation of people and behaviour)
   - The TARGET text evaluates people or their behaviour/character in relation to social norms
     (e.g., right/wrong, fair/unfair, honest/dishonest, capable/incompetent).
   - Often about:
     - social esteem (e.g., brave, careful, talented, lazy, rude),
     - social sanction (e.g., honest, trustworthy, corrupt, unfair, immoral).
   - Typical linguistic cues:
     - adjectives for character or behaviour (“kind”, “irresponsible”, “unfair”),
     - attributions of praise or blame (“she did the right thing”, “he failed his duty”).

3. Appreciation (evaluation of things, products, events, processes, phenomena)
   - The TARGET text evaluates objects, artefacts, performances, texts, policies, situations,
     or environments in terms of quality, value, design, or aesthetics.
   - Often about:
     - reaction (how appealing or impactful something is: “boring”, “exciting”, “moving”),
     - composition (harmony, complexity, balance: “well-structured”, “chaotic”),
     - valuation (worth, significance: “important”, “valuable”, “pointless”).
   - Typical linguistic cues:
     - evaluations of things or outcomes (“beautiful view”, “poor performance”, “excellent report”).

4. Ambiguous
   - Use “ambiguous” when:
     - more than one type (Affect, Judgment, Appreciation) clearly applies,
       AND none of them is clearly dominant, OR
     - the available text is too short or vague to reliably decide which type it is.
   - In such cases, mark the types you are considering with 1 (e.g. Affect = 1, Judgment = 1)
     and set "label" to "ambiguous".

Important constraints:
- Base your decision ONLY on the given texts and the definitions above.
- Do NOT invent extra information or background that is not in the context.
- If two or more categories are clearly present and one is clearly dominant, choose the dominant one (not ambiguous).
- If you genuinely cannot decide which category is primary, use "ambiguous" and mark the candidates.

Output format:
Return ONLY a single valid JSON object (no extra text).

Use exactly these fields:
- "label": one of "affect", "judgment", "appreciation", "ambiguous"
- "affect": 0 or 1  (1 if affect is present/relevant)
- "judgment": 0 or 1  (1 if judgment is present/relevant)
- "appreciation": 0 or 1  (1 if appreciation is present/relevant)
- "probability": a float between 0.0 and 1.0 for the chosen main label in "label"
- "top_spans": a list (max 3 items) of short quotes from the TARGET that support your decision
- "explanation": a short explanation (max 60 words) of why you chose this label and flags,
                 explicitly linking to the definitions above.

Context:

PREVIOUS:
\"\"\"{prev_text}\"\"\"

TARGET:
\"\"\"{target_text}\"\"\"

NEXT:
\"\"\"{next_text}\"\"\"
"""


# === helper to call model and parse JSON robustly ===
def call_model(messages, model=MODEL, max_tokens=300, temperature=0.0, retries=3):
    for attempt in range(retries):
        try:
            resp = client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=temperature,
                max_tokens=max_tokens
            )
            content = resp.choices[0].message.content.strip()
            # Try to extract first JSON object from the output (in case model adds comments)
            m = re.search(r'(\{[\s\S]*\})', content)
            json_text = m.group(1) if m else content
            parsed = json.loads(json_text)
            return parsed, content  # return parsed dict and raw content for debugging
        except Exception as e:
            print(e)
            # small backoff
            wait = 1.5 ** attempt
            time.sleep(wait)
            last_exc = e
    # if we reach here, re-raise last exception
    raise last_exc

# === main pipeline ===
def classify_excel(input_file=EXCEL_IN, text_col=TEXT_COL, output_file=EXCEL_OUT):
    df = pd.read_excel(input_file)
    print(df)
    # prepare columns
    df["binary"] = None
    df["binary_justification"] = None
    df["binary_evidence_span"] = None
    df["multiclass_label"] = None
    df["multiclass_probability"] = None
    df["multiclass_spans"] = None
    df["multiclass_explanation"] = None
    df["binary_raw_output"] = None
    df["multiclass_raw_output"] = None

    for idx, row in df.iterrows():
        text = str(row.get(text_col, "")).strip()
        if not text:
            df.at[idx, "binary"] = 0
            df.at[idx, "binary_justification"] = "leer"
            continue

        # Binary classification
        prev_text = str(df.at[idx-1, text_col]).strip() if idx > 0 else ""
        next_text = str(df.at[idx+1, text_col]).strip() if idx < len(df)-1 else ""
        print(prev_text, text, next_text)
        messages_bin = [
            {"role": "system", "content": "You are a precise classifier of appraisal theory. Respond only with valid JSON."},
            {"role": "user", "content": PROMPT_BINARY.format(
                prev_text=prev_text,
                target_text=text,
                next_text=next_text
            )}
        ]

        try:
            parsed_bin, raw_bin = call_model(messages_bin)
            print(parsed_bin, raw_bin)
        except Exception as e:
            print(e)
            # on failure mark as unknown and continue
            df.at[idx, "binary"] = None
            df.at[idx, "binary_justification"] = f"error: {e}"
            df.at[idx, "binary_raw_output"] = ""
            continue

        # write binary results
        df.at[idx, "binary_raw_output"] = raw_bin
        # ensure numeric
        bin_val = parsed_bin.get("binary")
        try:
            bin_val = int(bin_val)
        except Exception:
            # fallback: check label field
            label = parsed_bin.get("label","").lower()
            bin_val = 1 if "eval" in label else 0
        df.at[idx, "binary"] = bin_val
        df.at[idx, "binary_justification"] = parsed_bin.get("justification", "")
        df.at[idx, "binary_evidence_span"] = parsed_bin.get("evidence_span", "")

        # If binary == 1 -> multiclass
        if bin_val == 1:
            messages_multi = [
                {"role":"system", "content":"You are a precise classifier of appraisal theory . Respond only with valid JSON."},
                {"role":"user", "content": PROMPT_MULTICLASS.format(
                    prev_text=prev_text,
                    target_text=text,
                    next_text=next_text
                )}
            ]
            try:
                parsed_multi, raw_multi = call_model(messages_multi)

                print(parsed_multi, raw_multi)
            except Exception as e:
                df.at[idx, "multiclass_label"] = None
                df.at[idx, "multiclass_explanation"] = f"error: {e}"
                df.at[idx, "multiclass_raw_output"] = ""
                time.sleep(BATCH_DELAY)
                continue

            df.at[idx, "multiclass_raw_output"] = raw_multi
            df.at[idx, "multiclass_label"] = parsed_multi.get("label")
            # ensure probability numeric
            try:
                df.at[idx, "multiclass_probability"] = float(parsed_multi.get("probability", 0.0))
            except Exception:
                df.at[idx, "multiclass_probability"] = None
            spans = parsed_multi.get("top_spans", [])
            if isinstance(spans, list):
                df.at[idx, "multiclass_spans"] = " ||| ".join(spans)
            else:
                df.at[idx, "multiclass_spans"] = spans
            df.at[idx, "multiclass_explanation"] = parsed_multi.get("explanation", "")

        # polite delay (tune as needed)
        time.sleep(BATCH_DELAY)

    # save
    df.to_excel(output_file, index=False)
    print(f"Saved classified Excel to {output_file}")

if __name__ == "__main__":
    classify_excel()


                                                  text
0                                       I'm a painter.
1    I make large-scale figurative paintings, which...
2    But I'm here tonight to tell you about somethi...
3    It's something we all go through, and my hope ...
4    To give you some background on me, I grew up t...
..                                                 ...
137  And then there's space, and in that space feel...
138  And be curious to connect to what and who is r...
139                             It's what we all want.
140  Let's take the opportunity to find something b...
141                                         Thank you.

[142 rows x 1 columns]
 I'm a painter. I make large-scale figurative paintings, which means I paint people like this.
{'binary': 0, 'label': 'no_eval', 'justification': "The statement 'I'm a painter.' is a factual self-identification without emotional tone, judgment, or evaluation of quality or value.", 'evidence_span': ''} {
  "binary":