# What needs to be done
Chain-of-thought prompting
Self-consistency (5–10 samples → majority vote)
Reflexion (LLM critiques & fixes its own answer)

Agent Loop

# API and Model Check

In [1]:
# %% Minimal setup
# If needed (uncomment in a notebook):
# !pip install requests python-dotenv



import os, json, textwrap, re, time
import requests

API_KEY  = "cse476"
API_BASE = "http://10.4.58.53:41701/v1"
MODEL    = "bens_model"

def call_model_chat_completions(prompt: str,
                                system: str = "You are a helpful assistant. Reply with only the final answer—no explanation.",
                                model: str = MODEL,
                                temperature: float = 0.0,
                                timeout: int = 60) -> dict:
    """
    Calls an OpenAI-style /v1/chat/completions endpoint and returns:
    { 'ok': bool, 'text': str or None, 'raw': dict or None, 'status': int, 'error': str or None, 'headers': dict }
    """
    url = f"{API_BASE}/chat/completions"
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type":  "application/json",
    }
    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": system},
            {"role": "user",   "content": prompt}
        ],
        "temperature": temperature,
        "max_tokens": 128,
    }

    try:
        resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
        status = resp.status_code
        hdrs   = dict(resp.headers)
        if status == 200:
            data = resp.json()
            text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
            return {"ok": True, "text": text, "raw": data, "status": status, "error": None, "headers": hdrs}
        else:
            # try best-effort to surface error text
            err_text = None
            try:
                err_text = resp.json()
            except Exception:
                err_text = resp.text
            return {"ok": False, "text": None, "raw": None, "status": status, "error": str(err_text), "headers": hdrs}
    except requests.RequestException as e:
        return {"ok": False, "text": None, "raw": None, "status": -1, "error": str(e), "headers": {}}




In [2]:
# %% Direct call example
demo_prompt = "What is 17 + 28? Answer with just the number."
result = call_model_chat_completions(demo_prompt)
print("OK:", result["ok"], "HTTP:", result["status"])
print("MODEL SAYS:", (result["text"] or "").strip())

# Optional: Inspect rate-limit headers if your provider exposes them
for k in ["x-ratelimit-remaining-requests", "x-ratelimit-limit-requests", "x-request-id"]:
    if k in result["headers"]:
        print(f"{k}: {result['headers'][k]}")


OK: True HTTP: 200
MODEL SAYS: 45


### Chain of Thought Prompting

In [8]:
system = "You are a helpful assistant. Explain the steps you used to solve the problem you were given."

def chain_of_thought(question):
    prompt = f"Explain the steps you used to solve the problem: {question}"
    result = call_model_chat_completions(prompt, system=system, model=MODEL, temperature=0.5)
    return result["text"]

print(chain_of_thought("What is 800 * 43? Answer with just the number."))

def self_consistency():
    pass

To solve 800 * 43:

1. Multiply 800 by 40:  
   $ 800 \times 40 = 32,000 $

2. Multiply 800 by 3:  
   $ 800 \times 3 = 2,400 $

3. Add the two results together:  
   $ 32,000 + 2,400 = 34,400 $

**Answer: 34400**


### Agent Loop

In [None]:
def agent_loop():
    pass