## Log Probs

In [1]:
import requests
from tqdm import tqdm

In [2]:
url = "http://localhost:11434/api/generate"
payload = {
    "model": "dolphin-mistral",
    "prompt": "Why is the sky blue?",
    "stream": False,
}
ret = requests.post(url, json=payload)

In [3]:
import json
print(ret.content.decode('utf8'))
resp = json.loads(ret.content)

{"model":"dolphin-mistral","created_at":"2026-01-01T09:53:10.188414Z","response":"The sky appears blue due to a combination of factors related to light and our atmosphere. When sunlight passes through Earth's atmosphere, it scatters in all directions. The shorter wavelengths, such as blue, are scattered more efficiently than the longer wavelengths, like red or yellow. This is because blue light has smaller particles that can scatter it more easily than other colors. As a result, we perceive the sky as blue because our eyes are most sensitive to blue light and our brains interpret this scattered light as a blue color.","done":true,"done_reason":"stop","context":[32001,1587,13,1976,460,15052,721,262,28725,264,10865,16107,13892,28723,13,32000,28705,13,32001,2188,13,7638,349,272,7212,5045,28804,32000,28705,13,32001,13892,13,1014,7212,8045,5045,2940,298,264,9470,302,8612,5202,298,2061,304,813,13789,28723,1684,22950,15167,1059,8599,28742,28713,13789,28725,378,752,270,1532,297,544,14278,28723

In [None]:
# Check for classification results, not exactly as in training dataset

def prompt_ollama(prompt_text, model="dolphin-mistral"):
    url = "http://localhost:11434/api/generate"
    payload = {
        "model": model,
        "prompt": prompt_text,
        "stream": False,
        "temperature": 0,
        "logprobs": True,
        "top_logprobs": 2
    }
    return requests.post(url, json=payload).json()


resp = prompt_ollama("Please classify the following sentence as either True or False and only return True or False and nothing else: Auckland is the capital of New Zealand")
pprint(resp)

{'context': [32001,
             1587,
             13,
             1976,
             460,
             15052,
             721,
             262,
             28725,
             264,
             10865,
             16107,
             13892,
             28723,
             13,
             32000,
             28705,
             13,
             32001,
             2188,
             13,
             12069,
             875,
             1575,
             272,
             2296,
             12271,
             390,
             2477,
             6110,
             442,
             8250,
             304,
             865,
             604,
             6110,
             442,
             8250,
             304,
             2511,
             1112,
             28747,
             330,
             1384,
             1207,
             349,
             272,
             5565,
             302,
             1450,
             12696,
             32000,
             28705,
  

In [6]:
resp['logprobs']

[{'token': 'False',
  'logprob': -0.2837532162666321,
  'bytes': [70, 97, 108, 115, 101],
  'top_logprobs': [{'token': 'False',
    'logprob': -0.2837532162666321,
    'bytes': [70, 97, 108, 115, 101]},
   {'token': 'True',
    'logprob': -1.440993070602417,
    'bytes': [84, 114, 117, 101]}]}]

In [7]:
import math
for logp in resp['logprobs'][0]['top_logprobs']:
    # print(logp, log[])
    print(logp['token'], logp['logprob'])

    p = math.e ** logp['logprob']
    print(p)


False -0.2837532162666321
0.7529524381935939
True -1.440993070602417
0.2366925894792158


In [15]:
url = "http://localhost:11434/api/tags"
payload = {
    # "model": "dolphin-mistral",
    # "prompt": "Why is the sky blue?",
    # "stream": False
}
ret = requests.get(url, json=payload)
model_ids = [x['name'] for x in ret.json()['models']]
model_ids

['llama3:8b',
 'nous-hermes:latest',
 'dolphin-mistral:latest',
 'deepseek-r1:14b']

In [23]:
def classify_prompt_with_confidence_scores(prompt_text, model="dolphin-mistral"):
    resp = prompt_ollama(prompt_text=prompt_text, model=model)

    result = {}
    for logp in resp['logprobs'][0]['top_logprobs']:
        # print(logp, log[])
        # print(logp['token'], logp['logprob'])

        p = math.e ** logp['logprob']
        result[logp['token']] = p

    return result

prompt = "Please answer the following question with True or False only. Amsterdam is the capital of Netherlands?"
# pred = classify_prompt_with_confidence_scores(prompt)
# pprint(pred)

In [26]:
def try_models(prompt, models=None):
    if not models:
        models = model_ids[:]
    for m in model_ids:
        pred = classify_prompt_with_confidence_scores(prompt, m)
        print(m, pred)


try_models("The quick brown fox jumped over the", ["llama3:8b"])

llama3:8b {'lazy': 0.5253462036219801, 'I': 0.147728060287421}
nous-hermes:latest {' The': 0.44549545274199737, ' No': 0.03655876495276468}
dolphin-mistral:latest {'l': 0.4884169001941493, 'The': 0.35448640103338597}
deepseek-r1:14b {'<think>': 0.9999999852942977, '</think>': 2.385354525698507e-09}


In [32]:
resp = prompt_ollama("Please finish this sentence in 3 words, responding only with the missing words, not repeating the input. Here is the sentence: The quick brown fox jumped over ")
pprint(resp)

{'context': [32001,
             1587,
             13,
             1976,
             460,
             15052,
             721,
             262,
             28725,
             264,
             10865,
             16107,
             13892,
             28723,
             13,
             32000,
             28705,
             13,
             32001,
             2188,
             13,
             12069,
             7446,
             456,
             12271,
             297,
             28705,
             28770,
             3085,
             28725,
             26167,
             865,
             395,
             272,
             6925,
             3085,
             28725,
             459,
             5683,
             1077,
             272,
             2787,
             28723,
             4003,
             349,
             272,
             12271,
             28747,
             415,
             2936,
             9060,
             285,
             11