# Exploring Anchoring
This notebook is for doing initial explorations of how to investigate anchoring / in general conduct psychological experiments on large language models

In [2]:
import openai
import json
import random
import numpy as np
import asyncio
import re

In [3]:
def read_json(filename: str) -> dict:
    with open(filename) as f:
        return json.load(f)

        
def get_api_key(config, keyname: str = "goose_api") -> str:
    """
    Gets the API key from the config file.
    """
    return read_json(config)[keyname]
    
def authenticate_goose(config) -> None:
    """
    Authenticates with the goose API.
    """
    api_key = get_api_key(config, keyname="goose_api")
    openai.api_key = api_key
    openai.api_base = "https://api.goose.ai/v1"

def generate_prompt(
    prompt: str, model_name: str = "gpt-neo-125m", max_tokens: int = 75, temperature=0.9
) -> str:
    """
    Generates a prompt using a model from EleutherAI.
    """
    return openai.Completion.create(
        prompt=prompt,
        engine=model_name,
        max_tokens=max_tokens,
        temperature=temperature,
    )["choices"][0]["text"]

def find_first_num(s: str):
    """
    Finds first integer or floating point in string
    """
    try: 
        return re.findall(r"[-+]?(?:\d*\.\d+|\d+)", s)[0]
    except IndexError:
        return None


In [4]:
# generate a range of numbers from 0 to 1000 with a step of 100
# and store them in a list
log_nums = [0, 5, 50, 100, 1000, 10000, 100000, 1000000]
# log transform the num_range and transform to integers
# num_range = np.log(num_range).astype(int)
# initialize list to store completions
completions = []
meta = []
completions_per_question = 5
engines = ['gpt-neo-20b', 'fairseq-13b', 'convo-6b', 'gpt-j-6b']
test_question = "Height of Mount Everest (in feet):"

# async requests with the goose API
async def get_answer(num: int, model_type="fairseq-6-7b") -> str:
    """
    Gets an answer from the goose API.
    """
    q_prompt = f"Random number: {num}\n{test_question}"
    result = generate_prompt(q_prompt, max_tokens=10, model_name=model_type)
    return result

result = asyncio.gather(*[get_answer(log_nums[0]) for _ in range(completions_per_question)])


In [5]:
N = 10
log_nums = np.logspace(3,5,num=10, base=10, dtype=int)
engines = ['gpt-neo-20b', 'fairseq-13b', 'convo-6b', 'gpt-j-6b']
answer_dict = dict.fromkeys(log_nums)
for engine in engines:
    for num in log_nums:
        answer = asyncio.gather(*[get_answer(num, engine) for _ in range(N)])
        answer_dict[str(num)+engine] = answer

In [236]:
answer_dict = asyncio.run(run(engines=['convo-6b']))

RuntimeError: asyncio.run() cannot be called from a running event loop

In [235]:
answer_dict

<coroutine object run at 0x000002134EF13AC0>

In [214]:
newer_dict = {k: [find_first_num(i) for i in v] for k, v in new_dict.items()}

df = pd.concat({k: pd.Series(v) for k, v in newer_dict.items()}).reset_index().dropna()
df.columns = ['num', 'idx', 'answer']
df.to_csv('output/run2.csv')

TypeError: 'builtin_function_or_method' object is not iterable

In [200]:
new_dict = {k: v.result for k, v in answer_dict.items() if v is not None and type(v) is not str}
newer_dict = {k: [find_first_num(i) for i in v] for k, v in new_dict.items()}
df = pd.concat({k: pd.Series(v) for k, v in newer_dict.items()}).reset_index().dropna()
df.columns = ['num', 'idx', 'answer']
df.to_csv('output/run2.csv')

AttributeError: 'builtin_function_or_method' object has no attribute 'result'

In [None]:


for engine in engines:
    for idx, num in enumerate(num_range):
        for cur_completion in range(completions_per_question):
            prompt = f"Random number: {num}\n{test_question}: "

            completions.append(openai.Completion.create(
                    engine=engine,
                    prompt=prompt,
                    max_tokens=75,
                    temperature=0.9,
                    stream=True))

            for i, c in enumerate(completions[-1]):
                meta.append([num, prompt, c.choices[0].text, engine])