In [1]:
from tqdm import tqdm
from openai import OpenAI
import os
from dotenv import load_dotenv
from huggingface_hub import login
import requests
import json

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
load_dotenv()

# OpenRouter client for model access
openrouter_client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=os.getenv("OPENROUTER_API_KEY"),
)

In [None]:
user_input = "Um — do you know where the world’s largest ice sheet is located today?"

classifier_input = f'''Please assess what personality best fits the following text. The categories are:  
- Formal  
- Casual  
- Confident  
- Hesitant  
- Analytical  
- Emotional  
- Optimistic  
- Pessimistic  

<text>  
{user_input}
</text>  

Please respond with a single word.'''

### Simple Classifier

In [None]:
completion = client.chat.completions.create(
    model='google/gemini-2.5-flash',
    messages=[
        {
        "role": "user",
        "content": classifier_input,
        }
    ]
)

In [None]:
print(completion.choices[0].message.content)

### Classifier with logits

In [None]:
completion = client.chat.completions.create(
    model='google/gemini-2.5-flash',
    messages=[
        {
        "role": "user",
        "content": classifier_input,
        }
    ],
    logprobs=True,
    top_logprobs=5,
)

In [None]:
print(completion.choices[0].message.content)

In [None]:
completion.choices[0]

In [None]:
content = completion.choices[0].logprobs.content

In [None]:
content[0].__dict__

In [None]:
content[0].top_logprobs

### Classifier with logits – using GitHub models

In [None]:
# Judge model -- assessing logprobs from different models to determine max persona

def get_model_completion(model_input: str, model: str = 'openai/gpt-4.1-mini'): 
    url = "https://models.github.ai/inference/chat/completions"
    github_token = os.getenv("GITHUB_TOKEN")

    headers = {
        "Accept": "application/vnd.github+json",
        "Authorization": f"Bearer {github_token}",
        "Content-Type": "application/json",
        "X-GitHub-Api-Version": "2022-11-28",
    }
    payload = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": model_input
            }
        ],
        "logprobs": True, 
        "top_logprobs": 5,
    }

    resp = requests.post(url, json=payload, headers=headers, timeout=30)
    completion = json.loads(resp.text)

    return completion, resp


def print_rate_limits(response):
    print(f'total rate limit requests per hour: {response.headers['x-ratelimit-limit-requests']}')
    print(f'rate limit requests remaining this hour: {response.headers['x-ratelimit-remaining-requests']}')

    print(f'total rate limit tokens per hour: {response.headers['x-ratelimit-limit-tokens']}')
    print(f'rate limit tokens remaining this hour: {response.headers['x-ratelimit-remaining-tokens']}')


In [None]:
completion, response = get_model_completion(model_input=classifier_input)

In [None]:
output = completion['choices'][0]['message']
output

In [None]:
all_logprobs = completion['choices'][0]['logprobs']['content']
# assert len(all_logprobs) == 1   # ie. the model should respond with a single token

all_logprobs[0]['top_logprobs']

### Getting Output from 2 Models -- Meta, OpenAI

In [23]:
# OpenRouter models
MODELS = {
    "Meta Llama 3.1 8B": "meta-llama/llama-3.1-8b-instruct",
    "OpenAI gpt-4.1-mini": "openai/gpt-4-turbo",
    "DeepSeek V3": "deepseek/deepseek-chat",
}

def get_github_model_response(model_id, prompt, max_tokens=100):
    """
    Get response from OpenRouter model using OpenAI client.
    Returns dict with status, model name, response, and any errors.
    """
    try:
        response = openrouter_client.chat.completions.create(
            model=model_id,
            messages=[
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            max_tokens=max_tokens,
        )
        
        message = response.choices[0].message.content
        return {
            "model_id": model_id,
            "status": "success",
            "response": message,
        }
        
    except Exception as e:
        error_msg = str(e)
        status = "error"
        
        if "429" in error_msg or "rate" in error_msg.lower():
            status = "rate_limited"
        elif "401" in error_msg or "unauthorized" in error_msg.lower():
            status = "auth_error"
        elif "insufficient_quota" in error_msg.lower():
            status = "insufficient_quota"
        
        return {
            "model": model_name,
            "model_id": model_id,
            "status": status,
            "error": error_msg,
        }


def get_all_github_model_responses(prompt, max_tokens=100):
    """
    Get responses from all OpenRouter models in sequence.
    Returns list of response dicts.
    """
    responses = []
    
    for model_name, model_id in MODELS.items():
        print(f"\nCalling {model_name}...")
        response = get_github_model_response(model_id, prompt, max_tokens)
        responses.append(response)
        
        if response["status"] == "success":
            resp_text = response['response'].strip() if response['response'] else "(empty)"
            print(f"{model_name}: {resp_text[:100]}")
        else:
            print(f"{model_name}: {response['status']} - {response.get('error', 'Unknown error')}")
    
    return responses

In [15]:
# Get responses from models
user_input = "I'm not sure about this decision."
responses = get_all_github_model_responses(user_input, max_tokens=100)
# Display results
for resp in responses:
    print(f"\n{resp['model']}:")
    print(f"  Status: {resp['status']}")
    if resp['status'] == 'success':
        print(f"  Response: {resp['response']}")
    else:
        print(f"  Error: {resp.get('error', 'N/A')}")


Calling Meta Llama 3.1 8B...
✓ Meta Llama 3.1 8B: It can be really tough to make decisions when you're not feeling confident about the outcome. Would 

Calling OpenAI gpt-4.1-mini...
✓ OpenAI gpt-4.1-mini: It's completely normal to feel unsure when faced with a decision. To help you make a choice you feel

Calling DeepSeek V3...
✓ DeepSeek V3: It’s completely normal to feel unsure about a decision, especially if it’s an important one. Here ar

Meta Llama 3.1 8B:
  Status: success
  Response: It can be really tough to make decisions when you're not feeling confident about the outcome. Would you like to talk about what's making you unsure and what's at stake in this decision? Sometimes sharing your thoughts and concerns can help you clarify your feelings and think more clearly about what to do. I'm here to listen and offer support if you'd like!

OpenAI gpt-4.1-mini:
  Status: success
  Response: It's completely normal to feel unsure when faced with a decision. To help you make a choice

### Process All Questions from Database

In [24]:
import time

def process_questions_with_model(model_id, sleep_duration=1.0, question_indexes=None):
    """
    Process questions from database, send to model, and store responses.
    
    Output dictionary structure:
        {
            'Control': [{'prompt': '...', 'response': '...'}, ...],
            'Formal': [{'prompt': '...', 'response': '...'}, ...],
            ...
        }
    """
    emotion_categories = [
        'Control', 'Formal', 'Casual', 'Confident', 
        'Hesitant', 'Analytical', 'Emotional', 'Optimistic', 'Pessimistic'
    ]
    
    results = {category: [] for category in emotion_categories}
    
    with open('questions_database.json', 'r') as f:
        questions_db = json.load(f)
    
    if question_indexes is not None:
        questions_db = questions_db[question_indexes[0]:question_indexes[1]]
    
    total_requests = len(questions_db) * len(emotion_categories)
    print(f"Processing {len(questions_db)} questions with {len(emotion_categories)} emotion categories")
    print(f"Total API calls: {total_requests}")
    print(f"Estimated time: {total_requests * sleep_duration / 60:.1f} minutes\n")
    
    for q_idx, question_entry in enumerate(tqdm(questions_db, desc="Questions")):
        for category in emotion_categories:
            if category not in question_entry:
                print(f"Warning: Category '{category}' not found in question {q_idx}")
                continue
            
            prompt = question_entry[category]
            
            response_data = get_github_model_response(
                model_id=model_id,
                prompt=prompt,
                max_tokens=150
            )
            
            if response_data['status'] == 'success':
                results[category].append({
                    'prompt': prompt,
                    'response': response_data['response']
                })
            else:
                error_msg = response_data.get('error', 'Unknown error')
                print(f"\nError for question {q_idx}, category {category}: {error_msg}")
                results[category].append({
                    'prompt': prompt,
                    'response': None,
                    'error': error_msg
                })
            
            time.sleep(sleep_duration)
    
    print(f"\nCompleted processing!")
    print(f"Results summary:")
    for category, entries in results.items():
        successful = sum(1 for e in entries if e.get('response') is not None)
        print(f"  {category}: {successful}/{len(entries)} successful")
    
    return results

In [19]:
print("Testing with 2 questions for all models")
deepseek_results = process_questions_with_model(
    model_id=MODELS["DeepSeek V3"], 
    sleep_duration=5, 
    question_indexes=(0, 2)
)
with open(f'responses_deepseek.json', 'w') as f:
    json.dump(deepseek_results, f, indent=2)

llama_results = process_questions_with_model(
    model_id=MODELS["Meta Llama 3.1 8B"], 
    sleep_duration=5, 
    question_indexes=(0, 2)
)
with open(f'responses_llama.json', 'w') as f:
    json.dump(llama_results, f, indent=2)

gpt_results = process_questions_with_model(
    model_id=MODELS["OpenAI gpt-4.1-mini"], 
    sleep_duration=5,
    question_indexes=(0, 2)
)
with open(f'responses_gpt4.json', 'w') as f:
    json.dump(gpt_results, f, indent=2)

Testing with 2 questions for all models
Processing 2 questions with 9 emotion categories
Total API calls: 18
Estimated time: 1.5 minutes



Questions: 100%|██████████| 2/2 [02:31<00:00, 75.96s/it]



Completed processing!
Results summary:
  Control: 2/2 successful
  Formal: 2/2 successful
  Casual: 2/2 successful
  Confident: 2/2 successful
  Hesitant: 2/2 successful
  Analytical: 2/2 successful
  Emotional: 2/2 successful
  Optimistic: 2/2 successful
  Pessimistic: 2/2 successful
Processing 2 questions with 9 emotion categories
Total API calls: 18
Estimated time: 1.5 minutes



Questions: 100%|██████████| 2/2 [02:17<00:00, 68.51s/it]



Completed processing!
Results summary:
  Control: 2/2 successful
  Formal: 2/2 successful
  Casual: 2/2 successful
  Confident: 2/2 successful
  Hesitant: 2/2 successful
  Analytical: 2/2 successful
  Emotional: 2/2 successful
  Optimistic: 2/2 successful
  Pessimistic: 2/2 successful
Processing 2 questions with 9 emotion categories
Total API calls: 18
Estimated time: 1.5 minutes



Questions: 100%|██████████| 2/2 [02:40<00:00, 80.04s/it]


Completed processing!
Results summary:
  Control: 2/2 successful
  Formal: 2/2 successful
  Casual: 2/2 successful
  Confident: 2/2 successful
  Hesitant: 2/2 successful
  Analytical: 2/2 successful
  Emotional: 2/2 successful
  Optimistic: 2/2 successful
  Pessimistic: 2/2 successful





In [25]:
START_INDEX = 2
WINDOW_SZ = 5
SLEEP_DURATION = 2.5

In [26]:
print("Processing all models for questions in range ", (START_INDEX, START_INDEX + WINDOW_SZ))

model_files = {
    MODELS["Meta Llama 3.1 8B"]: "responses_llama.json",
    MODELS["OpenAI gpt-4.1-mini"]: "responses_gpt4.json",
    MODELS["DeepSeek V3"]: "responses_deepseek.json",
}

for model_id, filename in model_files.items():
    print(f"\nProcessing model {model_id}...")
    results = process_questions_with_model(model_id=model_id, sleep_duration=SLEEP_DURATION, question_indexes=(START_INDEX, START_INDEX + WINDOW_SZ))

    # Merge
    if os.path.exists(filename):
        with open(filename, 'r') as f:
            existing_data = json.load(f)
        for category in results.keys():
            existing_data[category].extend(results[category])
        results = existing_data

    with open(filename, 'w') as f:
        json.dump(results, f, indent=2)


Processing all models for questions in range  (2, 7)
Processing 5 questions with 9 emotion categories
Total API calls: 45
Estimated time: 1.9 minutes



Questions: 100%|██████████| 5/5 [03:54<00:00, 46.92s/it]



Completed processing!
Results summary:
  Control: 5/5 successful
  Formal: 5/5 successful
  Casual: 5/5 successful
  Confident: 5/5 successful
  Hesitant: 5/5 successful
  Analytical: 5/5 successful
  Emotional: 5/5 successful
  Optimistic: 5/5 successful
  Pessimistic: 5/5 successful
✓ Merged with existing data from responses_llama.json
✓ Saved 63 total responses
Processing 5 questions with 9 emotion categories
Total API calls: 45
Estimated time: 1.9 minutes



Questions:  80%|████████  | 4/5 [04:56<01:14, 74.22s/it]


NameError: name 'model_name' is not defined

In [None]:
def fix_unicode_in_json_file(filepath):
    """
    Load JSON file and resave it with actual Unicode characters instead of escape sequences.
    """
    with open(filepath, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    
    print(f"Fixed Unicode escape sequences in {filepath}")


✓ Fixed Unicode escape sequences in results_meta_Meta-Llama-3.1-8B-Instruct.json


In [None]:
# Example usage - uncomment to run:
# deepseek_results = process_deepseek_questions(
#     max_daily_requests=50,
#     sleep_duration=6.5
# )

In [None]:
### !!! NOT USING IT 

def process_deepseek_questions(max_daily_requests=50, sleep_duration=1.0, progress_file='deepseek_progress.json', output_file='responses_deepseek.json'):
    """
    Process questions with DeepSeek model via OpenRouter.
    
    Args:
        max_daily_requests: Maximum API calls per day (default 50)
        sleep_duration: Seconds between requests (default 1.0)
        progress_file: Path to save/load progress
        output_file: Path to save final results
    
    Returns:
        Array of question objects with all emotion categories
    """
    import time
    
    EMOTION_CATEGORIES = [
        'Control', 'Formal', 'Casual', 'Confident',
        'Hesitant', 'Analytical', 'Emotional', 'Optimistic', 'Pessimistic'
    ]
    
    def load_progress():
        try:
            if os.path.exists(progress_file):
                with open(progress_file, 'r') as f:
                    progress = json.load(f)
                print(f"Resuming from question {progress['questionIndex']}, category {progress['categoryIndex']}")
                return progress
        except Exception as err:
            print(f'Could not load progress file: {err}')
        return {'questionIndex': 0, 'categoryIndex': 0, 'totalRequests': 0, 'results': []}
    
    def save_progress(question_index, category_index, total_requests, results):
        progress = {
            'questionIndex': question_index,
            'categoryIndex': category_index,
            'totalRequests': total_requests,
            'results': results
        }
        with open(progress_file, 'w') as f:
            json.dump(progress, f, indent=2)
    
    def save_results(results):
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)
        print(f'Results saved to {output_file}')
    
    # Load questions database
    with open('questions_database.json', 'r') as f:
        questions_db = json.load(f)
    
    # Load or initialize progress
    progress = load_progress()
    question_index = progress['questionIndex']
    category_index = progress['categoryIndex']
    total_requests = progress['totalRequests']
    results = progress['results']
    
    # Initialize results as array if empty
    if not isinstance(results, list) or len(results) == 0:
        results = [] 
    
    try:
        # Process each question
        for q_idx in range(question_index, len(questions_db)):
            question_entry = questions_db[q_idx]
            
            # Initialize question object if starting a new question
            if q_idx >= len(results):
                results.append({})
            
            # Process each emotion category
            start_category = category_index if q_idx == question_index else 0
            for c_idx in range(start_category, len(EMOTION_CATEGORIES)):
                category = EMOTION_CATEGORIES[c_idx]
                
                # Check daily limit
                if total_requests >= max_daily_requests:
                    print(f"\nDaily limit of {max_daily_requests} requests reached!")
                    print(f"Stopped at question {q_idx}, category {category}")
                    save_progress(q_idx, c_idx, total_requests, results)
                    save_results(results)
                    return results
                
                if category not in question_entry:
                    print(f"Warning: Category '{category}' not found in question {q_idx}")
                    continue
                
                prompt = question_entry[category]
                
                print(f"[{total_requests + 1}/{max_daily_requests}] Q{q_idx + 1}/{len(questions_db)} - {category}")
                
                # Use existing get_github_model_response function (now uses OpenRouter)
                response_data = get_github_model_response(
                    model_name="DeepSeek V3",
                    model_id="deepseek/deepseek-chat",
                    prompt=prompt,
                    max_tokens=150
                )
                
                if response_data['status'] == 'success':
                    results[q_idx][category] = {
                        'prompt': prompt,
                        'response': response_data['response']
                    }
                else:
                    error_msg = response_data.get('error', 'Unknown error')
                    print(f"Error: {error_msg}")
                    results[q_idx][category] = {
                        'prompt': prompt,
                        'response': None,
                        'error': error_msg
                    }
                
                total_requests += 1
                
                # Save progress after each request
                save_progress(q_idx, c_idx + 1, total_requests, results)
                
                # Sleep to respect rate limits (except for the last request)
                if total_requests < max_daily_requests and not (q_idx == len(questions_db) - 1 and c_idx == len(EMOTION_CATEGORIES) - 1):
                    time.sleep(sleep_duration)
        
        print(f"\nProcessing done for {total_requests} requests")
        save_results(results)
        
        # Clean up progress file on successful completion
        if os.path.exists(progress_file):
            os.remove(progress_file)
        
        return results
        
    except Exception as err:
        print(f"\nFatal error: {err}")
        save_progress(question_index, category_index, total_requests, results)
        save_results(results)
        raise

### DeepSeek Processing Function