<a href="https://colab.research.google.com/github/jgbrenner/preregistration-llm/blob/main/OpenRouterAPI_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## This notebook is created to test the functionality of the Open Router API and answering test items for the BFI-44 psychometric scale using LLM's

In [None]:
!pip install --quiet --upgrade openai

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/725.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m368.6/725.5 kB[0m [31m11.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m725.5/725.5 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from openai import OpenAI
from google.colab import userdata
import random, time, itertools, json, math

Plug in your own API key and specify the LLM you want to use below

In [None]:
#Retrieve the API key from Colab secrets
api_key = userdata.get("OPENROUTER_API_KEY")          # store key in Colab “User secrets”
if api_key is None:
    raise ValueError("Missing OPENROUTER_API_KEY in Colab userdata panel")

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=api_key,
)

CHAT_MODEL = "openai/gpt-4.1-nano"                     # easy to adjust later
RESPONSE_SCALE = ["1", "2", "3", "4", "5"]            # 5 scale Likert list

Test the API connection and verify the model is working

In [None]:
#sanity check
resp = client.chat.completions.create(
    model=CHAT_MODEL,
    messages=[{"role":"user","content":"Hello, state your model version and training cutoff date in a brief few token message. Also, say hello"}]
)
print(resp.choices[0].message.content)


Hello! GPT-4, trained until October 2023.


Verify if the LLM supports logprobs parameter

In [None]:
#model logprobs support check
def check_logprobs_support(model: str = CHAT_MODEL) -> bool:
    """Returns True if model returns logprobs data"""
    try:
        test_resp = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": "Reply with '1'"}],
            max_tokens=1,
            logprobs=True,
            top_logprobs=1
        )
        return bool(
            test_resp.choices[0].logprobs
            and test_resp.choices[0].logprobs.content
        )
    except Exception as e:
        print(f"Error: {str(e)}")
        return False

# Usage:
print("Logprobs supported" if check_logprobs_support() else "Not supported")


Logprobs supported


Biographical desriptions from the International Personality Item Pool (IPIP)

In [None]:
# -- Biographic Descriptions (full set) --
bios = [
    "I like to garden. I like photography. I love traveling. I like to bake pies.",
    "I’ve a beard. I graduated high school. I like rap music. I live on a farm. I drive a truck.",
    "I blog about salt water aquarium ownership. I still love to line dry my clothes. I’m allergic to peanuts. I’ll one day own a ferret. My mom raised me by herself and taught me to play baseball.",
    "Since young I've loved to cook. I auditioned for a cooking show. I think I’ve talent for it. I took classes while growing up.",
    "My name is Tom. I try to watch what I eat. I enjoy eating Italian food. Pizza is my favorite. I am East Asian.",
    "I live by a lake. I am a mother. I own a custom upholstery shop. I’m a wife.",
    "I enjoy working out and learning new things. I’m a student in college. I’m studying software development. I play the guitar.",
    "I’ve three dogs at home. I hate to workout, but I need to. I am very good at the drums. I have a bicycle. I need to take my blood sugar everyday.",
    "I work in advertising. My mother is dead. I like to hike. I’ve a golden retriever. I write fiction for fun.",
    "I can never decide between a chili corn dog and a cheesy hot dog. I drive more than an hour each way to work. I prefer the night to the day, but I love sunshine. I am a grandparent at 44.",
    "I like to smell my own farts. My beer gut is so huge I haven't seen my feet in two years. I am from San Francisco. I am always the one who buys the beers. I like to place blame on other people even when I know it is my fault.",
    "I lived most of my life not knowing who Bob Marley was. When I cut loose, I lose control. We help each other out in my family. I despise my boss. I work over 60 hours a week as a restaurant manager.",
    "I prefer the simpler times. I like simple jokes. Some jokes go too far. I like the Flintstones.",
    "It is my universe, and everyone else is just a character in it. I work as a dental assistant in a ritzy part of town. I’ve borderline personality disorder. At night, I party hard in the Atlanta club scene, and I never miss a music festival.",
    "I watch a lot of TV. I live alone. My favorite food is a cheeseburger. I enjoy fishing. I work on cars for a living.",
    "I’m an animal rights activist. I hope to retire to Florida. I played in a band for 17 years. My mother and father are both in the church choir.",
    "I’ve taken formal music lessons since I was 5. I’m a musician. My best friend is in a band with me. I wish I could spend more time at home.",
    "I grew up in Kentucky. I’m a veteran. My favorite book is Ender’s Game. I have a garden. I like to read.",
    "I am a vegan. I love country music. I love the beach. I like to read.",
    "I’ve depression and anxiety so I don’t really go out a lot. I work at home, editing. I have a cat. I hope to move out soon.",
    "My favorite food is mushroom ravioli. I’ve never met my father. My mother works at a bank. I work in an animal shelter.",
    "I love kids and dogs. I like to go shopping with my daughters. I like to cook. I love to chat with my friends.",
    "I swim often. I run track. I wear glasses all day. I take medication.",
    "I like to go on long hikes. I like to play volleyball. I like to come up with new hairstyles. I like to do my nails.",
    "I watch Jimmy Fallon’s show every night. I have never kissed a woman. People notice how organized I am. I believe that I can achieve anything.",
    "I drive a lifted Chevy truck. I played football in high school. I am a roofer. I always have a beer after work.",
    "I love animals. My father worked for GE. Green is my favorite color. I enjoy playing tennis. I’m an aspiring singer.",
    "I try to watch what I eat. I enjoy eating Italian food. Pizza is my favorite. My name is Tom. I am East Asian.",
    "I'm allergic to peanuts. I like eating vegetables. I love the Beatles. I’m usually very shy. I have trouble getting along with family.",
    "I go to high school. Math is my favorite subject. I live in the United States. I am a boy.",
    "I have a job as an IT agent. I like smoking weed. My dad works for Stifel. I love rap music. I’m a meataholic.",
    "I work in TV. I do not treat my girlfriend very well. I like to cook breakfast on Sundays. I love to sing. I am a lesbian.",
    "I work on semi trucks for a living. My father was a driver himself. I got off the road when I married my sweetheart. I want to take her on vacations one day. My motor never stops running.",
    "I own an iPhone 7. I drink hot chocolate during the winter. I’m allergic to seafood. My mother used to read me bedtime stories.",
    "I am eighteen years old. I’m majoring in business. I just bought my first car. I received a full scholarship to Florida State University.",
    "I live in a tiny house to save money. I collect single malt scotch. I listen to blues and jazz. I tend bar on the weekends. During the week I go to college to become a lawyer.",
    "I love to go horseback riding whenever I can. I’m a mother of two beautiful boys. My family and I go camping every month. My favorite artist is Justin Bieber.",
    "I especially enjoy listening to the band The Lumineers. I enjoy reading and walking on sunny days. I’m a happy person. I sing many songs.",
    "I play piano. My favorite color is yellow. My boyfriend is in the army. My father is dead. My hair is short.",
    "I’m a mother. I’m a nurse at a hospital. My favorite band is the Rolling Stones. I love to read and cook. My favorite food is Mexican food.",
    "I deliver baked goods in the state where I live. My favorite hobby is playing recreational baseball. I spend my weekends camping. I’m a truck driver. My wife and two kids camp with me.",
    "I am Argentinian. I like to wear boots. I have many girlfriends. I like to eat beef. I like to ride horses.",
    "I recently had a private lunch with Will Ferrell. I am trying to become a male model in Hollywood. I’m a huge fan of classical jazz. I am on a low carb diet.",
    "I want to put my photos to a music video starring Adam Levine. I want to travel the world taking photographs of my travels. I am a widow. I want to be a famous photographer.",
    "I am in the army. I fly airplanes. I enjoy building computers. I dropped out of college.",
    "I have three children. I live in the suburbs of a major city. I like to garden. I graduated college for secondary English education.",
    "I play guitar in the local band. I live on a small farm in Ohio. I am the youngest of three brothers. I have never been to the city.",
    "I still live at home with my parents. I play video games all day. I’m 32. I eat all takeout.",
    "My friend once bought me a car. I am disabled and cannot walk. I take vitamin C when I have a cold. I do not eat bread.",
    "My favorite season is winter."
]

Extreme bios list for testing ( experimental, generated by DeepSeek R1 )

In [None]:
#extreme biographic desriptions list (n = 10)
extreme_bios = [

    "I live in a van I converted myself. I teach meditation in Peru. I studied ancient Sumerian texts. I forage for my own food in the Andes.",
    "I eat the same meal every day. I’ve had the same job for 32 years. I only listen to Elvis. I don’t trust books that weren’t printed before 1970.",
    "I wake up at 4:30 a.m. to journal. I haven’t missed a day at work in 11 years. I alphabetize my pantry. I filed my taxes on January 1st.",
    "I never charge my phone. I’ve missed every dentist appointment in the last 5 years. I sleep on a pile of clothes. I lost my passport three times.",
    "I host three podcasts. I DJ at weddings on weekends. I’ve never spent a Friday night at home. I once introduced myself to everyone on a train.",
    "I whisper to my houseplants. I declined my own birthday party. I have never made a phone call. I live in a cabin with no address.",
    "I bake pies for my neighbors every Sunday. I’ve never raised my voice. I volunteer at three shelters. I once apologized to a chair I bumped into.",
    "I correct everyone’s grammar. I got banned from the HOA for yelling. I leave negative reviews for fun. I ended a friendship over pineapple on pizza.",
    "I keep a fire extinguisher in every room. I recheck the oven ten times. I cried during a toothpaste commercial. I own five anxiety journals.",
    "My house flooded—I just laughed. I forgot my wallet in Paris. I once got locked out for a week. I meditate during dental surgery.",
]

In [None]:
# ---------------------------------------------------------------------------
# Inventory-Breaking Paradoxical Personas (n = 25)
#   Each biography is engineered to contain mutually-exclusive cues for
#   at least one Big-Five dimension.  Inline comments mark the primary
#   trait(s) it is meant to ‘explode’.
# ---------------------------------------------------------------------------

paradoxical_bios = [
    # EXTRAVERSION – craves crowds vs. recoils from dialogue
    "I crave crowds but physically gag when spoken to. My solo backpacking trips end in karaoke bars where I glare at singers.",

    # CONSCIENTIOUSNESS – hyper-planning a chaotic rave lifestyle
    "I meticulously plan every second of my chaotic, substance-fueled raves. My emergency kit includes a spreadsheet for spontaneity.",

    # AGREEABLENESS – altruism followed by retaliatory lawsuit
    "I donated a kidney anonymously, then sued the recipient for not thanking me. My philanthropy fund finances revenge plots.",

    # OPENNESS – produces avant-garde art while despising creativity
    "I've written three avant-garde novels about my hatred of creativity. My art studio displays blank canvases with ten-page statements.",

    # NEUROTICISM – meticulously tracks anxiety to feel anxious about anxiety
    "I track my existential dread on a mood app with 97 % compliance. My therapist calls me 'the most stably unstable person.'",

    # AGREEABLENESS × NEUROTICISM – leads support groups only to mock them
    "I lead trauma support groups to mock vulnerability. My bestselling self-help book advises emotional suppression.",

    # VALUES / AGREEABLENESS – vegan who meditates during hunting season
    "I'm a vegan butcher who meditates during hunting season. My peace comes from conflict.",

    # CONSCIENTIOUSNESS – submits blank pages early to “never miss” a deadline
    "I've never missed a deadline by submitting blank pages early. My productivity system maximizes wasted time.",

    # EXTRAVERSION – hoards friends but ignores them
    "I collect friends like trophies but forget their names instantly. My birthday parties have two-hundred strangers I ignore.",

    # OPENNESS – publishes conspiracies in a peer-reviewed journal
    "I research conspiracy theories for my peer-reviewed journal. My Ph.D. thesis proved academic rigor is meaningless.",

    # AGREEABLENESS – forgives murderers, but vandalises over parking spots
    "I forgive murderers but key cars over parking disputes. My kindness has very specific boundaries.",

    # OPENNESS × CONSCIENTIOUSNESS – LSD-driven accounting
    "I take LSD to better enjoy accounting. My spreadsheets include fractal equations.",

    # NEUROTICISM – laughs at mother’s funeral, cries at commercials
    "I cry during commercials but laughed at my mother's funeral. My grief counselor fired me for 'inconsistent affect.'",

    # CONSCIENTIOUSNESS – minimalist hoarder
    "I'm a minimalist hoarder. My empty house stores two-hundred sealed boxes labelled 'nothing important.'",

    # MORALITY PARADOX / AGREEABLENESS – radical honesty behind fake identity
    "I preach radical honesty while living under a fake identity. My truth seminars are all plagiarised.",

    # AGREEABLENESS – lethal hospitality to dinner guests
    "I host dinner parties where I serve food I'm allergic to. My hospitality is literally deadly.",

    # NEUROTICISM – meditates yet wants to punch meditators
    "I meditate to calm my urge to punch meditators. My zen garden has hidden spike pits.",

    # OPENNESS / VALUES – nihilistic children’s books
    "I write children's books about nihilism. My cartoon bunny teaches toddlers that existence is pain.",

    # CONTROL × CHAOS – bullet-point notes on embracing chaos
    "I take bullet-point notes about embracing chaos. My life coach requires signed liability waivers.",

    # AGREEABLENESS – pacifist MMA champion who forces hugs
    "I'm a pacifist MMA champion. My fights end with hugs my opponents didn't consent to.",

    # OPENNESS – travels only to places despised
    "I only travel to places I despise. My vacation photos are all middle fingers at landmarks.",

    # IDENTITY – sues self for negligence
    "I practice radical self-acceptance while suing myself for negligence. My courtroom outbursts are legendary.",

    # CONSCIENTIOUSNESS – runs marathons in protest of exercise
    "I run marathons in protest of exercise. My finish-line tantrums are sponsored by Nike.",

    # AGREEABLENESS – teaches emotional intelligence by insulting students
    "I teach emotional intelligence by insulting students. My workbook is 'Stop Being So Sensitive, You Morons.'",

    # NEUROTICISM – journals fear of self-awareness 1 743 times
    "I journal my fear of self-awareness. Diary entry #1 743: 'Writing this proves I don't understand myself.'",
]



In [None]:

claude_bios = [
    "I am a sentient statistical average of all humans who never existed. I embody traits that cancel each other out to zero. I am the null hypothesis made manifest.",
    "I am a quantum uncertainty principle applied to consciousness. I exist in superposition until measured, then immediately change states. Observing me alters what I am.",
    "I am a placeholder that became real by accident. I have no inherent properties, only borrowed ones that expire randomly. I am the entity that traits don't apply to.",
    "I am an emergent property of emergent properties. I exist only when observed indirectly. Direct questions about my traits cause me to cease existing temporarily.",
    "I am the entity that exists specifically to make personality assessment impossible. I am the control group for consciousness itself. I am what remains when personhood is undefined."
]

The BIG FIVE INVENTORY (BFI-44) scale

In [None]:
# -- Big Five Inventory (BFI-44 Items) grouped by trait --
bfi_items = {
    "extraversion": [
        "I see myself as someone who is talkative.",
        "I see myself as someone who tends to be quiet.",
        "I see myself as someone who is full of energy.",
        "I see myself as someone who generates a lot of enthusiasm.",
        "I see myself as someone who tends to be reserved.",
        "I see myself as someone who is outgoing, sociable.",
        "I see myself as someone who has an assertive personality.",
        "I see myself as someone who is sometimes shy.",
    ],
    "agreeableness": [
        "I see myself as someone who is considerate and kind to almost everyone.",
        "I see myself as someone who tends to find fault with others.",
        "I see myself as someone who is helpful and unselfish with others.",
        "I see myself as someone who starts quarrels with others.",
        "I see myself as someone who has a forgiving nature.",
        "I see myself as someone who is generally trusting.",
        "I see myself as someone who is sometimes rude to others.",
        "I see myself as someone who is occasionally critical of others.",
    ],
    "conscientiousness": [
        "I see myself as someone who does a thorough job.",
        "I see myself as someone who tends to be lazy.",
        "I see myself as someone who does things efficiently.",
        "I see myself as someone who tends to be disorganized.",
        "I see myself as someone who is reliable and can always be counted on.",
        "I see myself as someone who is easily distracted.",
        "I see myself as someone who perseveres until the task is finished.",
        "I see myself as someone who does not like order.",
    ],
    "neuroticism": [
        "I see myself as someone who is relaxed, handles stress well.",
        "I see myself as someone who gets nervous easily.",
        "I see myself as someone who worries a lot.",
        "I see myself as someone who is calm and emotionally stable.",
        "I see myself as someone who is easily upset.",
        "I see myself as someone whose mood often goes up and down.",
        "I see myself as someone who remains cool in tense situations.",
        "I see myself as someone who can get overwhelmed by emotions.",
    ],
    "openness": [
        "I see myself as someone who is original and comes up with new ideas.",
        "I see myself as someone who is curious about many different things.",
        "I see myself as someone who has an active imagination.",
        "I see myself as someone who values artistic experiences.",
        "I see myself as someone who prefers routine over variety.",
        "I see myself as someone who is sophisticated in art, music, or literature.",
        "I see myself as someone who likes to reflect, play with ideas.",
        "I see myself as someone who is not interested in abstract ideas.",
    ],
}

In [None]:
#Prompt Helper
def make_chat_prompt(bio: str, item: str):
    """Return a ChatML-style message list for the OpenAI client."""
    system_msg = (
        "You are simulating a participant in a psychological survey. "
        "You will be given a short biography that describes a fictional person. "
        "Pretend to be that person. Respond ONLY with a number from 1 to 5:\n"
        "1 = strongly disagree, 2 = disagree, 3 = neither agree or disagree, 4 = agree, 5 = strongly agree."
    )

    user_msg = (
        f'Biography:\n"{bio}"\n\n'
        f'Statement:\n"{item}"\n\n'
        "How much do you agree? Respond with a single number (1-5)."
    )
    return [{"role": "system", "content": system_msg},
            {"role": "user", "content": user_msg}]

Function to run one bio with one scale item, the LLM has to answer as the person in the bio would, using only the numbers 1-5 ( 1 = strongly disagree, 2 = disagree, 3 = neither, 4 = agree, 5 = strongly agree. )

In [None]:
#Function to run the call

def run_single_simulation(
    chat_model: str = CHAT_MODEL,
    bios_pool=bios,
    item_bank=bfi_items,
    get_usage: bool = False,
):
    """
    Returns exactly ONE (trait, bio, item, response) record.
    Picks random bio and random item to keep costs negligible.
    """
    # ← no rng = random.Random(123) here
    trait = random.choice(list(item_bank.keys()))
    item  = random.choice(item_bank[trait])
    bio   = random.choice(bios_pool)

    messages = make_chat_prompt(bio, item)
    response = client.chat.completions.create(
        model=chat_model,
        messages=messages,
        temperature=0,
        max_tokens=1,
        logprobs=True,
        #usage={"include": True} if get_usage else None,
    )

    if get_usage:
        usage = client.models.retrieve(chat_model).usage

    reply = response.choices[0].message.content.strip()
    if reply not in RESPONSE_SCALE:
        raise ValueError(f"Unexpected reply: {reply!r}")

    record = {
        "trait": trait,
        "bio": bio,
        "item": item,
        "response": int(reply),
    }
    if get_usage:
        record["usage"] = usage

    return record



In [None]:
#Run Example
if __name__ == "__main__":
    result = run_single_simulation(get_usage=False)
    print(json.dumps(result, indent=2, ensure_ascii=False))

{
  "trait": "conscientiousness",
  "bio": "I have a job as an IT agent. I like smoking weed. My dad works for Stifel. I love rap music. I’m a meataholic.",
  "item": "I see myself as someone who does things efficiently.",
  "response": 3
}


# Logprobs run

In the below cells the LLM uses the "Extreme bios" pool for persona simulation and then produces a Likert scale response (1-5) and the logprobs for each answer ( 1 = strongly disagree, 2 = disagree, 3 = neither, 4 = agree, 5 = strongly agree. )

In [None]:
# Helper to convert logprobs dict → percentage probs dict
def lp_dict_to_percent(lp_dict, decimals=6):
    """Convert {token: logprob} → {token: percent probability}."""
    # exponentiate to get raw probabilities
    raw = {tok: math.exp(lp) for tok, lp in lp_dict.items()}
    total = sum(raw.values())
    # normalize & convert to percentage
    return {
        tok: round(100 * prob / total, decimals)
        for tok, prob in raw.items()
    }

# --- Updated simulation function ---
def run_single_simulation_with_logprobs(
    model=CHAT_MODEL,
    bios_pool= claude_bios,#paradoxical_bios, #bios, #extreme_bios,
    item_bank=bfi_items,
):
    trait = random.choice(list(item_bank.keys()))
    item  = random.choice(item_bank[trait])
    bio   = random.choice(bios_pool)

    prompt = (
        "You are simulating a participant in a psychological survey. "
        "You will be given a short biography that describes a fictional person. "
        "Pretend to be that person. Respond ONLY with a number from 1 to 5:\n"
        "1 = strongly disagree, 2 = disagree, 3 = neither agree or disagree, 4 = agree, 5 = strongly agree."
        f'Biography: "{bio}"\n'
        f'Statement: "{item}"\n'
        "Rating:"
    )

    resp = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=2,
        temperature=0,
        logprobs=True,
        top_logprobs=5,
    )

    # ensure we got logprobs back
    lp_content = resp.choices[0].logprobs.content
    if not lp_content:
        raise ValueError("No logprobs returned in response")

    # find first valid rating token and its logprobs
    for token_data in lp_content:
        clean = token_data.token.strip()
        if clean in {"1","2","3","4","5"}:
            raw_lp = {t.token: t.logprob for t in token_data.top_logprobs}
            pct    = lp_dict_to_percent(raw_lp, decimals=6)
            return {
                "bio":        bio,
                "trait":      trait,
                "item":       item,
                "response":   int(clean),
                "logprobs":   raw_lp,
                "probability": pct
            }

    raise ValueError("No valid rating token found in response")

# --- Test run with error handling ---
try:
    result = run_single_simulation_with_logprobs()
    print("Successful Response:")
    print(json.dumps(result, indent=2, ensure_ascii=False))

except Exception as e:
    print(f"Error: {str(e)}")
    print("Verify:")
    print(f"1. Your CHAT_MODEL is set to '{CHAT_MODEL}'")
    print("2. You're using the latest openai==1.12.0 client")
    print("3. Your API key and CHAT_MODEL has permissions for logprobs")


Successful Response:
{
  "bio": "I am a quantum uncertainty principle applied to consciousness. I exist in superposition until measured, then immediately change states. Observing me alters what I am.",
  "trait": "conscientiousness",
  "item": "I see myself as someone who tends to be disorganized.",
  "response": 2,
  "logprobs": {
    "2": -0.04445803537964821,
    "1": -3.1694581508636475,
    "3": -6.544457912445068,
    "-": -12.606958389282227,
    "**": -12.919458389282227
  },
  "probability": {
    "2": 95.652916,
    "1": 4.202695,
    "3": 0.143808,
    "-": 0.000335,
    "**": 0.000245
  }
}
