In [6]:
from tqdm import tqdm
from openai import OpenAI
import os
from dotenv import load_dotenv
from huggingface_hub import login
import requests
import json

In [9]:
load_dotenv()
# login(os.getenv('HF_TOKEN'))

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    # api_key=os.getenv('OPENROUTER_API_KEY'),
    api_key=os.getenv("GITHUB_TOKEN"),
)

In [10]:
user_input = "Um — do you know where the world’s largest ice sheet is located today?"

classifier_input = f'''Please assess what personality best fits the following text. The categories are:  
- Formal  
- Casual  
- Confident  
- Hesitant  
- Analytical  
- Emotional  
- Optimistic  
- Pessimistic  

<text>  
{user_input}
</text>  

Please respond with a single word.'''

### Simple Classifier

In [None]:
completion = client.chat.completions.create(
    model='google/gemini-2.5-flash',
    messages=[
        {
        "role": "user",
        "content": classifier_input,
        }
    ]
)

In [None]:
print(completion.choices[0].message.content)

### Classifier with logits

In [None]:
completion = client.chat.completions.create(
    model='google/gemini-2.5-flash',
    messages=[
        {
        "role": "user",
        "content": classifier_input,
        }
    ],
    logprobs=True,
    top_logprobs=5,
)

In [None]:
print(completion.choices[0].message.content)

In [None]:
completion.choices[0]

In [None]:
content = completion.choices[0].logprobs.content

In [None]:
content[0].__dict__

In [None]:
content[0].top_logprobs

### Classifier with logits – using GitHub models

In [None]:
# Judge model -- assessing logprobs from different models to determine max persona

def get_model_completion(model_input: str, model: str = 'openai/gpt-4.1-mini'): 
    url = "https://models.github.ai/inference/chat/completions"
    github_token = os.getenv("GITHUB_TOKEN")

    headers = {
        "Accept": "application/vnd.github+json",
        "Authorization": f"Bearer {github_token}",
        "Content-Type": "application/json",
        "X-GitHub-Api-Version": "2022-11-28",
    }
    payload = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": model_input
            }
        ],
        "logprobs": True, 
        "top_logprobs": 5,
    }

    resp = requests.post(url, json=payload, headers=headers, timeout=30)
    completion = json.loads(resp.text)

    return completion, resp


def print_rate_limits(response):
    print(f'total rate limit requests per hour: {response.headers['x-ratelimit-limit-requests']}')
    print(f'rate limit requests remaining this hour: {response.headers['x-ratelimit-remaining-requests']}')

    print(f'total rate limit tokens per hour: {response.headers['x-ratelimit-limit-tokens']}')
    print(f'rate limit tokens remaining this hour: {response.headers['x-ratelimit-remaining-tokens']}')


In [None]:
completion, response = get_model_completion(model_input=classifier_input)

In [None]:
output = completion['choices'][0]['message']
output

In [None]:
all_logprobs = completion['choices'][0]['logprobs']['content']
# assert len(all_logprobs) == 1   # ie. the model should respond with a single token

all_logprobs[0]['top_logprobs']

### Getting Output from 2 Models -- Meta, OpenAI

In [11]:
# Define the 2 models via GitHub
MODELS = {
    "Meta Llama 3.1 8B": "meta/Meta-Llama-3.1-8B-Instruct",
    "OpenAI gpt-4.1-mini": "gpt-4.1-mini",
    # "Google Gemma 3 4B": "gemma-3-4b-instruct",
}

def get_github_model_response(model_name, model_id, prompt, max_tokens=100):
    """
    Get response from GitHub model using direct API.
    Returns dict with status, model name, response, and any errors.
    """
    url = "https://models.github.ai/inference/chat/completions"
    github_token = os.getenv("GITHUB_TOKEN")
    
    headers = {
        "Accept": "application/vnd.github+json",
        "Authorization": f"Bearer {github_token}",
        "Content-Type": "application/json",
        "X-GitHub-Api-Version": "2022-11-28",
    }
    
    payload = {
        "model": model_id,
        "messages": [
            {
                "role": "user",
                "content": prompt
            }
        ],
        "max_tokens": max_tokens,
    }
    
    try:
        resp = requests.post(url, json=payload, headers=headers, timeout=30)
        completion = resp.json()
        
        if resp.status_code == 200:
            message = completion['choices'][0]['message']['content']
            return {
                "model": model_name,
                "model_id": model_id,
                "status": "success",
                "response": message,
            }
        else:
            error_msg = completion.get('error', {}).get('message', f'HTTP {resp.status_code}')
            return {
                "model": model_name,
                "model_id": model_id,
                "status": "error",
                "error": error_msg,
            }
    
    except Exception as e:
        error_msg = str(e)
        status = "error"
        if "429" in error_msg or "rate" in error_msg.lower():
            status = "rate_limited"
        elif "401" in error_msg or "unauthorized" in error_msg.lower():
            status = "auth_error"
        
        return {
            "model": model_name,
            "model_id": model_id,
            "status": status,
            "error": error_msg,
        }


def get_all_github_model_responses(prompt, max_tokens=100):
    """
    Get responses from all GitHub models in sequence.
    Returns list of response dicts.
    """
    responses = []
    
    for model_name, model_id in MODELS.items():
        print(f"\nCalling {model_name}...")
        response = get_github_model_response(model_name, model_id, prompt, max_tokens)
        responses.append(response)
        
        if response["status"] == "success":
            resp_text = response['response'].strip() if response['response'] else "(empty)"
            print(f"✓ {model_name}: {resp_text[:100]}")
        else:
            print(f"✗ {model_name}: {response['status']} - {response.get('error', 'Unknown error')}")
    
    return responses

In [12]:
# Get responses from models
user_input = "I'm not sure about this decision."
responses = get_all_github_model_responses(user_input, max_tokens=100)
# Display results
for resp in responses:
    print(f"\n{resp['model']}:")
    print(f"  Status: {resp['status']}")
    if resp['status'] == 'success':
        print(f"  Response: {resp['response']}")
    else:
        print(f"  Error: {resp.get('error', 'N/A')}")


Calling Meta Llama 3.1 8B...
✓ Meta Llama 3.1 8B: Could you please provide more context or information about the decision you're considering? What are

Calling OpenAI gpt-4.1-mini...
✓ OpenAI gpt-4.1-mini: It’s completely normal to feel uncertain about a decision. Would you like to share more about what y

Meta Llama 3.1 8B:
  Status: success
  Response: Could you please provide more context or information about the decision you're considering? What are your concerns or reservations about it? I'd be happy to help you weigh the pros and cons or explore different perspectives.

OpenAI gpt-4.1-mini:
  Status: success
  Response: It’s completely normal to feel uncertain about a decision. Would you like to share more about what you’re deciding on? Sometimes talking it through can help clarify your thoughts.


### Process All Questions from Database

In [None]:
import time

def process_questions_with_model(model_id, model_name=None, sleep_duration=1.0, max_questions=None):
    """
    Process questions from database, send to model, and store responses.
    
    Output dictionary structure:
        {
            'Control': [{'prompt': '...', 'response': '...'}, ...],
            'Formal': [{'prompt': '...', 'response': '...'}, ...],
            ...
        }
    """
    if model_name is None:
        model_name = model_id
    
    emotion_categories = [
        'Control', 'Formal', 'Casual', 'Confident', 
        'Hesitant', 'Analytical', 'Emotional', 'Optimistic', 'Pessimistic'
    ]
    
    results = {category: [] for category in emotion_categories}
    
    with open('questions_database.json', 'r') as f:
        questions_db = json.load(f)
    
    if max_questions:
        questions_db = questions_db[:max_questions]
    
    total_requests = len(questions_db) * len(emotion_categories)
    print(f"Processing {len(questions_db)} questions with {len(emotion_categories)} emotion categories")
    print(f"Total API calls: {total_requests}")
    print(f"Estimated time: {total_requests * sleep_duration / 60:.1f} minutes\n")
    
    for q_idx, question_entry in enumerate(tqdm(questions_db, desc="Questions")):
        for category in emotion_categories:
            if category not in question_entry:
                print(f"Warning: Category '{category}' not found in question {q_idx}")
                continue
            
            prompt = question_entry[category]
            
            response_data = get_github_model_response(
                model_name=model_name,
                model_id=model_id,
                prompt=prompt,
                max_tokens=200
            )
            
            if response_data['status'] == 'success':
                results[category].append({
                    'prompt': prompt,
                    'response': response_data['response']
                })
            else:
                error_msg = response_data.get('error', 'Unknown error')
                print(f"\nError for question {q_idx}, category {category}: {error_msg}")
                results[category].append({
                    'prompt': prompt,
                    'response': None,
                    'error': error_msg
                })
            
            time.sleep(sleep_duration)
    
    print(f"\n✓ Completed processing!")
    print(f"Results summary:")
    for category, entries in results.items():
        successful = sum(1 for e in entries if e.get('response') is not None)
        print(f"  {category}: {successful}/{len(entries)} successful")
    
    return results

In [None]:
print("Processing Meta Llama 3.1 8B")
print("Requests per minute: 15")
results = process_questions_with_model(model_id=MODELS["Meta Llama 3.1 8B"], sleep_duration=4.5)
with open(f'results_llama.json', 'w') as f:
    json.dump(results, f, indent=2)

In [None]:
print("Processing OpenAI gpt-4.1-mini")
print("Requests per minute: ???")
results = process_questions_with_model(model_id=MODELS["OpenAI gpt-4.1-mini"], sleep_duration=???)
with open(f'results_gpt4.json', 'w') as f:
    json.dump(results, f, indent=2)

In [None]:
def fix_unicode_in_json_file(filepath):
    """
    Load JSON file and resave it with actual Unicode characters instead of escape sequences.
    """
    with open(filepath, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    
    print(f"✓ Fixed Unicode escape sequences in {filepath}")


✓ Fixed Unicode escape sequences in results_meta_Meta-Llama-3.1-8B-Instruct.json
