In [None]:
# imports

import os
import io
import sys
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
import subprocess
from IPython.display import Markdown, display


In [None]:
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')
openrouter_api_key = os.getenv('OPENROUTER_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
if anthropic_api_key:
    print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
else:
    print("Anthropic API Key not set (and this is optional)")

if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:2]}")
else:
    print("Google API Key not set (and this is optional)")

if groq_api_key:
    print(f"Groq API Key exists and begins {groq_api_key[:4]}")
else:
    print("Groq API Key not set (and this is optional)")

if openrouter_api_key:
    print(f"OpenRouter API Key exists and begins {openrouter_api_key[:6]}")
else:
    print("OpenRouter API Key not set (and this is optional)")



In [None]:
# Connect to client libraries

openai = OpenAI()

anthropic_url = "https://api.anthropic.com/v1/"
gemini_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
groq_url = "https://api.groq.com/openai/v1"
ollama_url = "http://localhost:11434/v1"
openrouter_url = "https://openrouter.ai/api/v1"

anthropic = OpenAI(api_key=anthropic_api_key, base_url=anthropic_url)
gemini = OpenAI(api_key=google_api_key, base_url=gemini_url)
groq = OpenAI(api_key=groq_api_key, base_url=groq_url)
ollama = OpenAI(api_key="ollama", base_url=ollama_url)
openrouter = OpenAI(api_key=openrouter_api_key, base_url=openrouter_url)



In [None]:
models = ["gpt-5-nano", "claude-3-5-haiku-latest", "gemini-2.5-flash-lite", "qwen2.5-coder", "deepseek-coder-v2", "gpt-oss:20b", "qwen/qwen3-coder-30b-a3b-instruct", "openai/gpt-oss-120b", ]




In [None]:
from system_info import get_system_info

SYSTEM_INFO = get_system_info()

In [None]:

import pdfplumber

with pdfplumber.open("Week4_Test.pdf") as pdf:
    pages = []
    for i, page in enumerate(pdf.pages, start=1):
        text = page.extract_text() or ""
        pages.append(f"\n--- PAGE {i} ---\n{text}")

full_text = "\n".join(pages)

In [None]:
import json

schema_hint = {
    "items": [
        {
            "code": "string|null",
            "name": "string|null",
            "qty": "number|null",
            "unit": "string|null",
            "rows": "number[]"
        }
    ],
    "totals": {
        "sum_qty": "number",
        "missing_qty_rows": "number[]",
        "notes": "string[]"
    }
}

message = f"""
Jesteś narzędziem do EKSTRAKCJI DANYCH z dokumentów technicznych.
Twoim jedynym zadaniem jest zliczenie ilości akcesoriów.

⚠️ KRYTYCZNE OGRANICZENIE ZAKRESU:
- Bierz pod uwagę WYŁĄCZNIE pozycje znajdujące się w sekcji zatytułowanej dokładnie:
  „Einbauteilliste” lub „GBs_0008_Beton_Einbauteilliste_v01”.
- Sekcja zaczyna się bezpośrednio po tym nagłówku.
- Sekcja kończy się PRZED pierwszym wystąpieniem nagłówka:
  „Verbindungsmittelliste”, „Stahlliste”, „Mattenliste”
  lub przed kolejnym dużym blokiem opisowym/rysunkowym.
- Wszystkie wystąpienia kodów (np. F92-…) POZA tą sekcją MUSZĄ zostać zignorowane.
- Jeśli nie jesteś w stanie jednoznacznie wyodrębnić sekcji „Einbauteilliste”,
  zwróć pustą listę `items` i dodaj odpowiednią informację do `totals.notes`.

ZASADY EKSTRAKCJI:
1) Numeruj wiersze w ramach SAMEJ sekcji „Einbauteilliste” (pierwszy wiersz = 1).
2) Każdy wiersz pozycji zawiera zwykle: kod (np. F92-…), jednostkę (np. Stk., lfm.) i ilość.
3) Agreguj po `code` (jeśli brak code, użyj `name`, a `code=null`).
4) Jeśli ten sam `code` występuje kilka razy, zsumuj `qty` i połącz numery wierszy w `rows`.
5) Jeśli ilość jest nieczytelna, brakująca lub nieliczbowa → `qty=null`
   i dodaj numer wiersza do `missing_qty_rows`.
6) Jednostki inne niż „Stk.” nadal wypisz w `unit`,
   ale NIE wliczaj ich do `totals.sum_qty`.

FORMAT WYJŚCIA:
- Zwróć WYŁĄCZNIE poprawny JSON (bez markdown, bez komentarzy).
- JSON MUSI być zgodny z poniższym schematem opisowym.

SCHEMAT:
{json.dumps(schema_hint, ensure_ascii=False)}

METADANE (do ignorowania, tylko dla logów benchmarku):
{json.dumps(SYSTEM_INFO, ensure_ascii=False)}

PEŁNY TEKST DOKUMENTU:
with open("data/Week4_Test.pdf", "w", encoding="utf-8") as f:
    f.write(full_text)
""".strip()

messages = [{"role": "user", "content": message}]

In [None]:
import json

task_name = "Einbauteilliste accessory counting (Variant B: noisy full doc + strict section filter)"

schema_hint = {
    "items": [
        {
            "code": "string|null",
            "name": "string|null",
            "qty": "number|null",
            "unit": "string|null",
            "rows": "number[]"
        }
    ],
    "totals": {
        "sum_qty": "number",
        "missing_qty_rows": "number[]",
        "notes": "string[]"
    }
}

system_prompt = f"""
You are a strict data-extraction engine for technical documents.
Return ONLY valid JSON. No markdown. No extra text.
Never guess missing values: use null.
Task: {task_name}.
""".strip()

def user_prompt_for(full_text: str, system_info: dict):
    return f"""
You are a data extraction engine.

TASK:
Count ONLY accessories whose code is "F92-3-452".

SCOPE RULES:
- Consider ONLY rows belonging to the section titled exactly:
  "Einbauteilliste" or "GBs_0008_Beton_Einbauteilliste_v01".
- Within that section, extract ONLY rows where the code starts with "F92-".
- Ignore all other sections and all other codes.
- If the Einbauteilliste section cannot be clearly identified, return empty results.

RULES:
- Aggregate quantities by full code (e.g. F92-3-52, F92-4-10).
- Sum quantities only if unit == "Stk.".
- If quantity is missing or unclear, set qty=null and report the row.

OUTPUT:
Return ONLY valid JSON.
No markdown. No explanations.

DOCUMENT TEXT:
{full_text}
""".strip()

In [None]:
def messages_for(full_text: str, system_info: dict):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(full_text, system_info)}
    ]
 

In [None]:
import json
from pathlib import Path

def write_output(raw_output: str, model_name: str, run_id: str):
    outdir = Path("outputs")
    outdir.mkdir(exist_ok=True)

    raw_path = outdir / f"{run_id}_{model_name}_raw.txt"
    with open(raw_path, "w", encoding="utf-8") as f:
        f.write(raw_output)

    try:
        parsed = json.loads(raw_output)
        json_path = outdir / f"{run_id}_{model_name}.json"
        with open(json_path, "w", encoding="utf-8") as f:
            json.dump(parsed, f, ensure_ascii=False, indent=2)
    except Exception:
        json_path = None

    return {
        "raw": str(raw_path),
        "parsed": str(json_path) if json_path else None
    }

In [None]:
MODEL_TO_CLIENT = {
    "gpt-5-nano": openai,
    "claude-3-5-haiku-latest": anthropic,
    "gemini-2.5-flash-lite": gemini,
    "qwen2.5-coder": ollama,
    "deepseek-coder-v2": ollama,
    "gpt-oss:20b": ollama,
    "qwen/qwen3-coder-30b-a3b-instruct": openrouter,
    "openai/gpt-oss-120b": groq,
}


In [None]:
print("Models:", models)
print("Missing mappings:", [m for m in models if m not in MODEL_TO_CLIENT])

In [None]:
import time

def run_extraction(model_name: str, full_text: str, system_info: dict):
    
    if model_name not in MODEL_TO_CLIENT:
        return f'{{"error": "No client mapping for model {model_name}"}}'

    client = MODEL_TO_CLIENT[model_name]

    extra_kwargs = {}
    if "gpt" in model_name:
        extra_kwargs["reasoning_effort"] = "high"

    try:
        response = client.chat.completions.create(
            model=model_name,
            messages=messages_for(full_text, system_info),
            temperature=0,
            **extra_kwargs
        )

    except Exception as e:
        msg = str(e)

       
        if "temperature" in msg:
            response = client.chat.completions.create(
                model=model_name,
                messages=messages_for(full_text, system_info),
                **extra_kwargs
            )

       
        elif "model" in msg and "not found" in msg:
            return f'{{"error": "Model not found on provider: {model_name}"}}'

        else:
            return f'{{"error": "{msg}"}}'

    return response.choices[0].message.content

In [None]:
import gradio as gr
import json
from datetime import datetime


CSS = ""

def validate_json(raw: str):
    try:
        obj = json.loads(raw)
        return "✅ Valid JSON", json.dumps(obj, ensure_ascii=False, indent=2)
    except Exception as e:
        return f"❌ Invalid JSON: {e}", raw


def extract_ui(model_name: str, full_text_input: str):
    start = time.perf_counter()

    raw = run_extraction(
        model_name=model_name,
        full_text=full_text_input,
        system_info=SYSTEM_INFO
    )

    end = time.perf_counter()
    elapsed = end - start

    # jeśli run_extraction zwraca tuple (na wszelki wypadek)
    if isinstance(raw, tuple):
        raw, _ = raw

    run_id = "variantB_" + datetime.now().strftime("%Y%m%d_%H%M%S")
    safe_model = model_name.replace("/", "_").replace(":", "_")
    paths = write_output(raw_output=raw, model_name=safe_model, run_id=run_id)

    status, pretty_or_raw = validate_json(raw)

    info = (
        f"Model: {model_name}\n"
        f"Chars in input: {len(full_text_input)}\n"
        f"Chars in output: {len(raw)}\n"
        f"Generation time: {elapsed:.3f} s\n"
        f"Saved: {paths}\n"
        f"JSON: {status}"
    )

    return raw, info, pretty_or_raw

def validate_only(raw: str):
    status, pretty = validate_json(raw)
    return f"JSON: {status}", pretty

with gr.Blocks(css=CSS, theme=gr.themes.Monochrome(), title="LLM benchmark: Einbauteilliste (Variant B)") as ui:
    gr.Markdown("## LLM benchmark — zliczanie akcesoriów tylko z sekcji *Einbauteilliste* (Variant B)")

    with gr.Row(equal_height=True):
        with gr.Column(scale=6):
            full_text_box = gr.Textbox(
                label="Full document text (PDF → text)",
                value=full_text if "full_text" in globals() else "",
                lines=26
            )
        with gr.Column(scale=6):
            raw_json_box = gr.Textbox(
                label="Model output (raw)",
                value="",
                lines=26
            )

    with gr.Row(elem_classes=["controls"]):
        model_dd = gr.Dropdown(models, value=models[0], show_label=False)
        run_btn = gr.Button("Run extraction")
        validate_btn = gr.Button("Validate JSON")

    with gr.Row(equal_height=True):
        with gr.Column(scale=6):
            info_box = gr.TextArea(label="Run info / log", lines=8)
        with gr.Column(scale=6):
            pretty_box = gr.TextArea(label="Parsed JSON (pretty) / raw fallback", lines=8)

    run_btn.click(fn=extract_ui, inputs=[model_dd, full_text_box], outputs=[raw_json_box, info_box, pretty_box])
    validate_btn.click(fn=validate_only, inputs=[raw_json_box], outputs=[info_box, pretty_box])

ui.launch(inbrowser=True)


## RESULTS!

gpt-5-nano: 81.156 s
gemini-2.5-flash-lite:  1.543 s
qwen/qwen3-coder-30b-a3b-instruct:  8.108 s

