# CSE 476 Final

## API and Model Check

In [33]:
# %% Minimal setup
# If needed (uncomment in a notebook):
# !pip install requests python-dotenv



import os, json, textwrap, re, time
import requests

API_KEY  = "cse476"
API_BASE = "http://10.4.58.53:41701/v1"
MODEL    = "bens_model"

def call_model_chat_completions(prompt: str,
                                system: str = "You are a helpful assistant. Reply with only the final answer—no explanation.",
                                model: str = MODEL,
                                temperature: float = 0.0,
                                timeout: int = 60) -> dict:
    """
    Calls an OpenAI-style /v1/chat/completions endpoint and returns:
    { 'ok': bool, 'text': str or None, 'raw': dict or None, 'status': int, 'error': str or None, 'headers': dict }
    """
    url = f"{API_BASE}/chat/completions"
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type":  "application/json",
    }
    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": system},
            {"role": "user",   "content": prompt}
        ],
        "temperature": temperature,
        "max_tokens": 128,
    }

    try:
        resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
        status = resp.status_code
        hdrs   = dict(resp.headers)
        if status == 200:
            data = resp.json()
            text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
            return {"ok": True, "text": text, "raw": data, "status": status, "error": None, "headers": hdrs}
        else:
            # try best-effort to surface error text
            err_text = None
            try:
                err_text = resp.json()
            except Exception:
                err_text = resp.text
            return {"ok": False, "text": None, "raw": None, "status": status, "error": str(err_text), "headers": hdrs}
    except requests.RequestException as e:
        return {"ok": False, "text": None, "raw": None, "status": -1, "error": str(e), "headers": {}}


In [2]:
# %% Direct call example
demo_prompt = "What is 17 + 28? Answer with just the number."
result = call_model_chat_completions(demo_prompt)
print("OK:", result["ok"], "HTTP:", result["status"])
print("MODEL SAYS:", (result["text"] or "").strip())

# Optional: Inspect rate-limit headers if your provider exposes them
for k in ["x-ratelimit-remaining-requests", "x-ratelimit-limit-requests", "x-request-id"]:
    if k in result["headers"]:
        print(f"{k}: {result['headers'][k]}")


OK: True HTTP: 200
MODEL SAYS: 45


### Functions for the agent

In [None]:
from collections import Counter

system = "You are a helpful assistant. Explain the steps you used to solve the problem you were given within 1-3 steps."
test_prompt = ("In a race, you pass the person in second place. What position are you now in? "
            "Answer with a single word like 'first', 'second', 'third'.")

def chain_of_thought(question):
    prompt = f"Give 3 bullet points maximum of concise reasoning and then the final answer: {question}"
    result = call_model_chat_completions(prompt, system=system, model=MODEL, temperature=0.1)
    return result["text"].strip()

#print(chain_of_thought(test_prompt))

def self_consistency(question, steps=5):
    result_list = []

    for _ in range(steps):
        result_list.append(chain_of_thought(question).strip())

    clean_result_list = [c.strip() for c in result_list]
    counter = Counter(clean_result_list)
    return counter.most_common(1)[0][0]

#print(self_consistency(test_prompt))


- Passing the second-place runner means you take their position.  
- You were originally in first place.  
- Therefore, you are now in second place.  

**Answer:** second


### Agent Loop

In [None]:
def agent_loop():
    pass

### Evaulation

In [None]:
# %% Simple normalization and evaluation helpers
def normalize_text(s: str) -> str:
    s = (s or "").strip().lower()
    # Remove surrounding punctuation and extra whitespace
    s = re.sub(r"[^\w\s\-']", " ", s)
    s = re.sub(r"\s+", " ", s).strip()

    # Map common synonyms used in these tests
    synonyms = {
        "unchanged": "stay the same",
        "no change": "stay the same",
        "same": "stay the same",
        "second place": "second",
        "2nd": "second",
        "first place": "first",
        "third place": "third",
    }
    return synonyms.get(s, s)

def extract_number(s: str):
    # Returns first number occurrence as string if found, else None
    if not s:
        return None
    m = re.search(r"[-+]?\d+(\.\d+)?", s)
    return m.group(0) if m else None

def grade(expected: str, got: str, kind: str) -> bool:
    if kind == "numeric":
        exp_num = extract_number(expected)
        got_num = extract_number(got)
        return (exp_num is not None) and (got_num == exp_num)
    else:
        return normalize_text(got) == normalize_text(expected)

def evaluate_tests(tests, model=MODEL):
    rows = []
    for t in tests:
        r = call_model_chat_completions(
            t["prompt"],
            system="You are a careful solver. Reply ONLY with the final answer, nothing else.",
            model=model,
            temperature=0.0,
        )
        got = (r["text"] or "").strip()
        is_correct = grade(t["expected"], got, t["type"])
        rows.append({
            "id": t["id"],
            "expected": t["expected"],
            "got": got,
            "correct": is_correct,
            "status": r["status"],
            "error": r["error"],
        })
        # Tiny pacing to be polite to the API
        time.sleep(0.2)

    # Print a small report
    correct = sum(1 for x in rows if x["correct"])
    print(f"Score: {correct}/{len(rows)} correct")
    for x in rows:
        mark = "✅" if x["correct"] else "❌"
        print(f"{mark} {x['id']}: expected={x['expected']!r}, got={x['got']!r} (HTTP {x['status']})")
        if x["error"]:
            print("   error:", x["error"])
    return rows

results = evaluate_tests(tests)


#### References
- Self Consistency: https://www.promptingguide.ai/techniques/consistency