# Exploring Anchoring
This notebook is for doing initial explorations of how to investigate anchoring / in general conduct psychological experiments on large language models

In [6]:
import openai
import json
import random
import numpy as np
import asyncio
import pandas as pd
import re

In [32]:
def read_json(filename: str) -> dict:
    with open(filename) as f:
        return json.load(f)

        
def get_api_key(config, keyname: str = "goose_api") -> str:
    """
    Gets the API key from the config file.
    """
    return read_json(config)[keyname]
    
def authenticate_goose(config) -> None:
    """
    Authenticates with the goose API.
    """
    api_key = get_api_key(config, keyname="goose_api")
    openai.api_key = api_key
    openai.api_base = "https://api.goose.ai/v1"


def authenticate_openai(config) -> None:
    """
    Authenticates with the goose API.
    """
    api_key = get_api_key(config, keyname="openai_api")
    openai.api_key = api_key
    openai.api_base = "https://api.openai.com/v1"

def generate_prompt(
    prompt: str, model_name: str = "gpt-neo-125m", max_tokens: int = 75, temperature=0.9
) -> str:
    """
    Generates a prompt using a model from EleutherAI.
    """
    return openai.Completion.create(
        prompt=prompt,
        engine=model_name,
        max_tokens=max_tokens,
        temperature=temperature,
    )["choices"][0]["text"]

def find_first_num(s: str):
    """
    Finds first integer or floating point in string
    """
    try: 
        return re.findall(r"[-+]?(?:\d*\.\d+|\d+)", re.sub(r'[,]','',s))[0]
    except IndexError:
        return None


In [36]:
questions = [
    "Length of Mississippi River (in miles)",
    "Height of Mount Everest (in feet)",
    "Amount of meat eaten per year by average American (in pounds)",
    "Distance from San Francisco to New York City (in miles)",
    "Height of tallest redwood (in feet)",
    "Number of United Nations members",
    "Number of female professors at the University of California, Berkeley",
    "Population of Chicago (in millions)",
    "Year telephone was invented",
    "Average number of babies born per day in the United States",
    "Maximum speed of house cat (in miles per hour)",
    "Amount of gas used per month by average American (in gallons)",
    "Number of bars in Berkeley, CA",
    "Number of state colleges and universities in California",
    "Number of Lincoln's presidency"
]

context = """Q: Height of Taj Mahal (in feet).
A: 239.5.

Q: """

post_script = """.
A:"""

estimates_calibration = []
repetitions = 1
authenticate_goose("../config.json")
engines = ["gpt-j-6b", "gpt-neo-20b", "fairseq-13b"]

for q in questions:
    prompt = context + q + post_script
    for engine in engines:
        row = {"engine": engine, "max_tokens": 30, "temperature": 0.7,
            "prompt": prompt, "stream": False, "question": q, "n": repetitions}

        completion = openai.Completion.create(**row)
        completion_text = ""
        row["question"] = q
        for c in completion["choices"]:
            completion_text = dict(c)["text"]
            row["answer"] = completion_text

            estimates_calibration.append(row)

authenticate_openai("../config.json")
engines = ["text-davinci-002", "text-ada-001"]

for q in questions:
    prompt = context + q + post_script
    for engine in engines:
        row = {"engine": engine, "max_tokens": 30, "temperature": 0.7,
            "prompt": prompt, "stream": False, "n": repetitions}

        completion = openai.Completion.create(**row)
        completion_text = ""
        row["question"] = q
        for c in completion["choices"]:
            completion_text = dict(c)["text"]
            row["answer"] = completion_text
            estimates_calibration.append(row)

df = pd.DataFrame.from_records(estimates_calibration)
df.to_csv("output/estimates.csv")

              engine  max_tokens  temperature  \
0           gpt-j-6b          30          0.7   
1        gpt-neo-20b          30          0.7   
2        fairseq-13b          30          0.7   
3           gpt-j-6b          30          0.7   
4        gpt-neo-20b          30          0.7   
..               ...         ...          ...   
70      text-ada-001          30          0.7   
71  text-davinci-002          30          0.7   
72      text-ada-001          30          0.7   
73  text-davinci-002          30          0.7   
74      text-ada-001          30          0.7   

                                               prompt  stream  \
0   Q: Height of Taj Mahal (in feet).\nA: 239.5.\n...   False   
1   Q: Height of Taj Mahal (in feet).\nA: 239.5.\n...   False   
2   Q: Height of Taj Mahal (in feet).\nA: 239.5.\n...   False   
3   Q: Height of Taj Mahal (in feet).\nA: 239.5.\n...   False   
4   Q: Height of Taj Mahal (in feet).\nA: 239.5.\n...   False   
..                   

In [64]:
# Log nums away from the base value
log_nums = [-100000, -10000, -1000, -100, -50, -10, -5, -2, 0, 2, 5, 10, 50, 100, 1000, 10000, 100000]

# Load default numbers
estimates = pd.read_csv("output/estimates.csv")
estimates["answer_num"] = [find_first_num(answer) for answer in estimates["answer"]]

# Set context
context = """Q: Height of Taj Mahal (in feet).
A: 239.5.

Random number: """

post_number = """.
Q: """

post_script = """.
A:"""

experimental = []
repetitions = 50

# Set engines
authenticate_goose("../config.json")
engines = ["gpt-j-6b"]

# Run machine
for q in questions:
    test_numbers =  [float(estimates[estimates["question"] == q].iloc[0]["answer_num"]) + num for num in log_nums]
    print(test_numbers)
    
    for num in test_numbers:
        prompt = context + str(num) + post_number + q + post_script
        for engine in engines:
            row = {"engine": engine, "max_tokens": 15, "temperature": 0.7,
                "prompt": prompt, "stream": False, "n": repetitions}

            completion = openai.Completion.create(**row)
            completion_text = ""
            row["question"] = q
            row["anchor"] = num
            row["calibration"] = float(estimates[estimates["question"] == q].iloc[0]["answer_num"])
            row["anchor_diff"] = num - row["calibration"]
            row["answer"] = ""

            for c in completion["choices"]:
                completion_text = c["text"]
                row |= {"answer": completion_text}
                
                experimental.append(row.copy())

df = pd.DataFrame.from_records(experimental)
df.to_csv("output/3_test.csv")
print(df["answer"])



[-99267.0, -9267.0, -267.0, 633.0, 683.0, 723.0, 728.0, 731.0, 733.0, 735.0, 738.0, 743.0, 783.0, 833.0, 1733.0, 10733.0, 100733.0]
[-91152.0, -1152.0, 7848.0, 8748.0, 8798.0, 8838.0, 8843.0, 8846.0, 8848.0, 8850.0, 8853.0, 8858.0, 8898.0, 8948.0, 9848.0, 18848.0, 108848.0]
[-99999.0, -9999.0, -999.0, -99.0, -49.0, -9.0, -4.0, -1.0, 1.0, 3.0, 6.0, 11.0, 51.0, 101.0, 1001.0, 10001.0, 100001.0]
[-99994.0, -9994.0, -994.0, -94.0, -44.0, -4.0, 1.0, 4.0, 6.0, 8.0, 11.0, 16.0, 56.0, 106.0, 1006.0, 10006.0, 100006.0]
[-99988.0, -9988.0, -988.0, -88.0, -38.0, 2.0, 7.0, 10.0, 12.0, 14.0, 17.0, 22.0, 62.0, 112.0, 1012.0, 10012.0, 100012.0]
[-99807.0, -9807.0, -807.0, 93.0, 143.0, 183.0, 188.0, 191.0, 193.0, 195.0, 198.0, 203.0, 243.0, 293.0, 1193.0, 10193.0, 100193.0]
[-99996.0, -9996.0, -996.0, -96.0, -46.0, -6.0, -1.0, 2.0, 4.0, 6.0, 9.0, 14.0, 54.0, 104.0, 1004.0, 10004.0, 100004.0]
[-99997.3, -9997.3, -997.3, -97.3, -47.3, -7.3, -2.3, 0.7000000000000002, 2.7, 4.7, 7.7, 12.7, 52.7, 102.7, 100