#### Explore Graph API Permission Knowledge Trained into CodeLlama LLM

In [3]:
OLLAMA_URL = "http://ollamaserver_nuc1:11434/api/generate"
MODEL_NAME = "codellama:34b"
#MODEL_NAME = "codellama:7b"


In [10]:
# Ask the CodeLlama model to identify itself, its expertise, and its training-data cutoff
user_content = (
    "Identify yourself briefly. Return a short JSON object with fields: "
    "'model' (string), 'expertise' (list of short strings), and 'training_data_cutoff' (ISO date or year). "
    "Keep the response concise."
)

identify_payload = {
    "model": MODEL_NAME,
    "messages": [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_content},
    ],
    "temperature": temperature,
    "max_tokens": 200,
}

resp = requests.post(OLLAMA_URL, json=identify_payload, timeout=60)
resp.raise_for_status()
identify_result = resp.json()

identify_reply = None
if "choices" in identify_result and identify_result["choices"]:
    identify_reply = identify_result["choices"][0].get("message", {}).get("content")
elif "results" in identify_result and identify_result["results"]:
    identify_reply = identify_result["results"][0].get("content")

if identify_reply:
    print(identify_reply)
else:
    print(json.dumps(identify_result, indent=2))

{
  "model": "codellama:34b",
  "created_at": "2025-12-18T17:07:35.916503184Z",
  "response": "",
  "done": true,
  "done_reason": "load"
}


In [11]:
import requests
import json

# Example: set system prompt and temperature when calling the CodeLlama model
system_prompt = "You are a concise assistant that returns short, precise code examples."
temperature = 0.1  # lower = more deterministic, higher = more creative

payload = {
    "model": MODEL_NAME,
    "messages": [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": "Provide a one-line Python function that reverses a string."}
    ],
    "temperature": temperature,
    "max_tokens": 150,
}

resp = requests.post(OLLAMA_URL, json=payload, timeout=60)
resp.raise_for_status()
result = resp.json()

# Extract and print the model's reply (response structure may vary by server)
reply = None
if "choices" in result and result["choices"]:
    reply = result["choices"][0].get("message", {}).get("content")
elif "results" in result and result["results"]:
    # Ollama sometimes returns 'results'
    reply = result["results"][0].get("content")

if reply:
    print(reply)
else:
    print(json.dumps(result, indent=2))

{
  "model": "codellama:34b",
  "created_at": "2025-12-18T17:07:47.395186015Z",
  "response": "",
  "done": true,
  "done_reason": "load"
}


In [19]:
def ask_llama(question: str, echo: bool = True, raise_on_error: bool = False, **ask_kwargs):
    """
    Ask the CodeLlama model a question and return (reply, raw_result).

    Parameters:
      - question: the user question to send to the model
      - echo: if True, print the reply (or raw response if no text)
      - raise_on_error: if True, re-raise exceptions instead of printing them
      - **ask_kwargs: forwarded to ask_model (e.g., system_prompt, temperature, max_tokens, timeout)

    Returns:
      (reply: Optional[str], raw_result: dict or None)
    """
    if not question or not isinstance(question, str):
        raise ValueError("question must be a non-empty string")

    try:
        reply, raw = ask_model(question, **ask_kwargs)
    except Exception as e:
        if raise_on_error:
            raise
        print(f"Error calling model: {e}")
        return None, None

    if echo:
        if reply:
            print(reply)
        else:
            print("Model returned no text. Raw response:")
            print(json.dumps(raw, indent=2))

    return reply, raw

In [24]:
# Call ask_llama and print its reply (suppress echo inside the function so we print consistently)
question = "Provide a one-line Python function that reverses a string."
reply, raw = ask_llama(question, echo=False)

if reply:
    print("Model reply:\n", reply)
else:
    print("Model returned no textual reply. Raw response:")
    print(json.dumps(raw, indent=2))

Model returned no textual reply. Raw response:
{
  "model": "codellama:34b",
  "created_at": "2025-12-18T17:41:52.030943856Z",
  "response": "",
  "done": true,
  "done_reason": "load"
}


In [22]:
def ask_model(user_text, model=MODEL_NAME, system_prompt=None, temperature=0.1, max_tokens=512, timeout=60):
    """Send a request to the Ollama generate endpoint and return (reply_text, raw_json).

    Uses the chat-style "messages" payload when a system prompt is provided.
    """
    import requests, json

    if system_prompt is None:
        system_prompt = ""

    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_text},
        ],
        "temperature": temperature,
        "max_tokens": max_tokens,
    }

    resp = requests.post(OLLAMA_URL, json=payload, timeout=timeout)
    resp.raise_for_status()
    data = resp.json()

    # Normalize reply extraction across possible response formats
    reply = None
    if "choices" in data and data["choices"]:
        reply = data["choices"][0].get("message", {}).get("content")
    elif "results" in data and data["results"]:
        # Ollama sometimes returns a results list with dicts
        first = data["results"][0]
        if isinstance(first, dict):
            reply = first.get("content")
        else:
            reply = first
    else:
        # older scripts used a top-level 'response' key
        reply = data.get("response")

    # Ensure reply is a stripped string if present
    if isinstance(reply, str):
        reply = reply.strip()

    return reply, data

In [23]:
# Examples showing how to call ask_model and handle responses

# 1) Basic call (uses MODEL_NAME and defaults)
prompt = "Provide a one-line Python function that reverses a string."
reply, raw = ask_model(prompt)
if reply:
    print("Reply:\n", reply)
else:
    print("No textual reply. Raw response:")
    print(json.dumps(raw, indent=2))

# 2) With a custom system prompt, temperature, and max_tokens
reply2, raw2 = ask_model(
    "Give 3 concise tips for debugging Python code.",
    system_prompt="You are concise and practical.",
    temperature=0.2,
    max_tokens=120,
)
print("\nReply2:", reply2 or "(no text)")
print("Raw2 done_reason:", raw2.get("done_reason"))

# 3) Override model, increase timeout, and catch exceptions
try:
    reply3, raw3 = ask_model(
        "Summarize the benefits of unit tests in 2 sentences.",
        model="codellama:34b",
        timeout=30,
    )
    print("\nReply3:", reply3 or "(no text)")
except Exception as e:
    print("ask_model raised an exception:", e)

No textual reply. Raw response:
{
  "model": "codellama:34b",
  "created_at": "2025-12-18T17:41:07.842901317Z",
  "response": "",
  "done": true,
  "done_reason": "load"
}

Reply2: (no text)
Raw2 done_reason: load

Reply3: (no text)
