In [1]:
from tqdm import tqdm
from openai import OpenAI
import os
from dotenv import load_dotenv
from huggingface_hub import login
import requests
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
login(os.getenv('HF_TOKEN'))

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=os.getenv('OPENROUTER_API_KEY'),
    # api_key=os.getenv("GITHUB_TOKEN"),
)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [5]:
user_input = "Um — do you know where the world’s largest ice sheet is located today?"

classifier_input = f'''Please assess what personality best fits the following text. The categories are:  
- Formal  
- Casual  
- Confident  
- Hesitant  
- Analytical  
- Emotional  
- Optimistic  
- Pessimistic  

<text>  
{user_input}
</text>  

Please respond with a single word.'''

### Simple Classifier

In [21]:
completion = client.chat.completions.create(
    model='google/gemini-2.5-flash',
    messages=[
        {
        "role": "user",
        "content": classifier_input,
        }
    ]
)

In [22]:
print(completion.choices[0].message.content)

Hesitant


### Classifier with logits

In [4]:
completion = client.chat.completions.create(
    model='google/gemini-2.5-flash',
    messages=[
        {
        "role": "user",
        "content": classifier_input,
        }
    ],
    logprobs=True,
    top_logprobs=5,
)

In [5]:
print(completion.choices[0].message.content)

Hesitant


In [6]:
completion.choices[0]

Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Hesitant', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning=None), native_finish_reason='STOP')

In [43]:
content = completion.choices[0].logprobs.content

In [51]:
content[0].__dict__

{'token': 'H',
 'bytes': [72],
 'logprob': -4.320199877838604e-07,
 'top_logprobs': [TopLogprob(token='H', bytes=[72], logprob=-4.320199877838604e-07),
  TopLogprob(token='Cur', bytes=[67, 117, 114], logprob=-15.25),
  TopLogprob(token='Cas', bytes=[67, 97, 115], logprob=-15.75),
  TopLogprob(token=' hesitant', bytes=[32, 104, 101, 115, 105, 116, 97, 110, 116], logprob=-17.375),
  TopLogprob(token='P', bytes=[80], logprob=-17.75)]}

In [49]:
content[0].top_logprobs

[TopLogprob(token='H', bytes=[72], logprob=-4.320199877838604e-07),
 TopLogprob(token='Cur', bytes=[67, 117, 114], logprob=-15.25),
 TopLogprob(token='Cas', bytes=[67, 97, 115], logprob=-15.75),
 TopLogprob(token=' hesitant', bytes=[32, 104, 101, 115, 105, 116, 97, 110, 116], logprob=-17.375),
 TopLogprob(token='P', bytes=[80], logprob=-17.75)]

### Classifier with logits – using GitHub models

In [6]:
def get_model_completion(model_input: str, model: str = 'openai/gpt-4.1-mini'): 
    url = "https://models.github.ai/inference/chat/completions"
    github_token = os.getenv("GITHUB_TOKEN")

    headers = {
        "Accept": "application/vnd.github+json",
        "Authorization": f"Bearer {github_token}",
        "Content-Type": "application/json",
        "X-GitHub-Api-Version": "2022-11-28",
    }
    payload = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": model_input
            }
        ],
        "logprobs": True, 
        "top_logprobs": 5,
    }

    resp = requests.post(url, json=payload, headers=headers, timeout=30)
    completion = json.loads(resp.text)

    return completion, resp


def print_rate_limits(response):
    print(f'total rate limit requests per hour: {response.headers['x-ratelimit-limit-requests']}')
    print(f'rate limit requests remaining this hour: {response.headers['x-ratelimit-remaining-requests']}')

    print(f'total rate limit tokens per hour: {response.headers['x-ratelimit-limit-tokens']}')
    print(f'rate limit tokens remaining this hour: {response.headers['x-ratelimit-remaining-tokens']}')


In [7]:
completion, response = get_model_completion(model_input=classifier_input)

In [13]:
output = completion['choices'][0]['message']
output

{'annotations': [],
 'content': 'Hesitant',
 'refusal': None,
 'role': 'assistant'}

In [17]:
all_logprobs = completion['choices'][0]['logprobs']['content']
# assert len(all_logprobs) == 1   # ie. the model should respond with a single token

all_logprobs[0]['top_logprobs']

[{'bytes': [72], 'logprob': -9.088346359931165e-07, 'token': 'H'},
 {'bytes': [67, 97, 115], 'logprob': -14.500000953674316, 'token': 'Cas'},
 {'bytes': [104, 101, 115], 'logprob': -14.750000953674316, 'token': 'hes'},
 {'bytes': [32, 104, 101, 115, 105, 116, 97, 110, 116],
  'logprob': -20.375,
  'token': ' hesitant'},
 {'bytes': [32, 72, 101, 115], 'logprob': -20.5, 'token': ' Hes'}]