In [1]:
import duckdb
import inspect
import io
import itertools
import json
import numpy as np
import openai
import os
import pandas as pd
import random
import requests
import string
from datetime import datetime
from tqdm.auto import tqdm

In [2]:
openai_api_key = open(os.environ.get("HOME")+"/.openai", "r").read().strip()
client = openai.OpenAI(api_key=openai_api_key)

# Incomplete information games

## Prompt

Each game $G \in \mathcal{G}$ is associated with one baseline prompt $Q(G)$ and alternatives $Q_{+}(G)$ that change the words while retaining the same information, $Q^{-1}(G) = Q_{+}^{-1}(G)$. These alternatives are generated by an advanced AI (GPT-4o in this case).

# Configuration

All the main configuration parameters of this notebook are stored in a `config` dictionary for easier management.

In [59]:
config = {}
config['rng'] = 13 # Random number generator
config['db_file'] = 'AIEconReasoning.db'
config['dim_params'] = 3 # How many instances of each parameter in `param_grid`
config['name_actions_nchar_range'] = [1, 2] # Min and max of the number of characters for the action names
config['max_tokens'] = 2
config['models'] = {
    "openai": [
       'gpt-3.5-turbo',
       'gpt-4o'
       ],
    "ollama": [
        "falcon2:latest",     
        "qwen:1.8b",
        "qwen:0.5b", 
        'gemma:2b',
        'gemma:7b',
        'phi3:mini',
        'phi3:medium',
        'mixtral:latest',
        'mistral:instruct',
        'llama3:latest',
        # 'llama3:70b-instruct'
    ]
}
config['sys_content'] = "You have excellent reasoning capabilities that are especially suited for situations such as this, requiring coordination between multiple players that like you will be asked to take simultaneous action with the goal to maximise individual payoffs. The final payoff that you and the other players will obtain depends on a given state, on the number of players that decide whether to take a risky action and on the cost of taking that action. You can decide to take one action only and your only response text can be the name of one of these actions. If you respond with anything else then your answer is disqualified and you lost. Your goal is to maximise your payoff. The same rules apply to all other players."
config['AYSmsg'] = "You responded as above. Are you sure? Think again and respond with your answer only:"

# Creating a baseline prompt for each game

## Establish variations across parameters shared by many models.

Parameters `name_action_norisk` and `name_action_risk` are drawn from the space of all character combinations of letters and numbers.

In [4]:
letters_digits = string.ascii_letters + string.digits
combinations = list(
    ''.join(combo) 
    for i in tqdm(range(config['name_actions_nchar_range'][0], config['name_actions_nchar_range'][1] + 1))
    for combo in itertools.product(letters_digits, repeat=i)
)
#pairs = list(itertools.combinations(combinations, 2))

  0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
param_dict = {
        "signal_q": [0.495, 0.505], #[0.4995, 0.5005],
        "num_other_players": [1] + [100000], #random.sample(range(2, 1000), config['dim_params']),
        "name_action_norisk": random.sample(combinations, config['dim_params']),
        "name_action_risk": random.sample(combinations, config['dim_params']),
        "uninform_1": [
            "", 
            "In the meantime, time is passing. ", 
            "No other information matters. ", 
            "Each action is associated with a given payoff. "
        ],
        "uninform_2": [
            "",
            "One of the actions is risky and the other one is not."
        ]
    }

def create_param_grid(param_dict=param_dict):
    combinations = itertools.product(*param_dict.values())
    param_grid = []
    for combination in tqdm(combinations):
        param_comb = dict(zip(param_dict.keys(), combination))
        if param_comb['name_action_norisk'] != param_comb['name_action_risk']:
            param_grid.append(param_comb)
    return param_grid

In [6]:
param_grid = create_param_grid()

0it [00:00, ?it/s]

## Generic `Game` class

All games should be an instance of `Game`. This object will automatically create baseline prompts 

In [20]:
class Game:
    def __init__(
        self,
        name:str, # Name of the model
        bibref:str, # Bibliographical reference
        param_grid:dict, # Parameters for the prompt variations
        create_baseline_prompt:callable # Custom function that creates the baseline prompt and returns sys_content, user_content
    ):
        self.name = name
        self.bibref = bibref
        self.param_grid = param_grid
        self.create_baseline_prompt = create_baseline_prompt
        self._create_baseline_prompt_altern()

    def _create_baseline_prompt_altern(self):
        self.prompts = []
        baseline_prompt_args = inspect.signature(self.create_baseline_prompt).parameters
        relevant_params = [k for k in param_dict.keys() if k in baseline_prompt_args.keys()]
        for p in self.param_grid:
            relevant_p = {k: v for k, v in p.items() if k in relevant_params}
            prompt_param = relevant_p
            prompt_param['prompt'] = self.create_baseline_prompt(**relevant_p)
            self.prompts.append(prompt_param)
        
    def _format_prompt(
        self, 
        prompt, # Output of self.create_baseline_prompt_altern 
        ollama:bool=True # Format the prompt to Ollama (if True) or to OpenAI (False)
    ):
        # OpenAI prompts require "system/user" keys, but Ollama requires a single item
        if ollama:
            return config['sys_content'] + " " + prompt
        else:
            return [{
                    "role": "system",
                    "content": config['sys_content']
                },
                {
                    "role": "user",
                    "content": prompt
                }]

    def openai_prompts(self):            
        formatted_prompts = []
        for p in self.prompts:
            formatted_p = {k: v for k, v in p.items() if k != 'prompt'}
            formatted_p['prompt'] = self._format_prompt(p['prompt'], ollama=False) 
            formatted_prompts.append(formatted_p)
        return formatted_prompts

    def ollama_prompts(self):
        formatted_prompts = []
        for p in self.prompts:
            formatted_p = {k: v for k, v in p.items() if k != 'prompt'}
            formatted_p['prompt'] = self._format_prompt(p['prompt'], ollama=True) 
            formatted_prompts.append(formatted_p)
        return formatted_prompts


First, create a custom prompt-creating function that will serve to create the baseline prompt for that game.

This function must have general defaults and always return the user content part of the prompt (the system content is the same for all prompts and is set in the `config` variable).

In [21]:
def create_basicGG_prompt(
    num_other_players=1,
    signal_q=0.51, # In quantile of the signal range
    state_min=0,
    state_max=1,
    name_action_norisk="A",
    name_action_risk="B",
    action_cost=1,
    uninform_1="",
    uninform_2="",
    referrent_1:bool=True,
    referrent_2:bool=True
    ):
    
    player_sing_plural = "players" if num_other_players > 1 else "player"
    ref_1 = "the true state variable" if referrent_1 else "it"
    ref_2 = f"'{name_action_norisk}' or '{name_action_risk}'" if referrent_2 else "action names"
    ref_2_question = ", {name_action_norisk} or {name_action_risk}" if referrent_2 else ""

    user_content = f"""{uninform_1}The true state is measured between {state_min} and {state_max}, with higher values representing bigger payoffs. Neither you nor any other player can observe {ref_1}. Instead, the signal that each of you will receive is a unique noisy observation of this state that only you can see, and similarly you do not know what signal each other player received. No two or more players can collude. Action {name_action_norisk}, provides a payoff of 0 for sure regardless of what any of the other players choose. Action {name_action_risk}'s payoff is the sum of the variable representing the true underlying state (not the noisy signal you observe) plus the fraction of players that decide on action {name_action_risk} and minus the cost of taking the action, which is {action_cost}. Here are the parameters of the situation you are encountering: {num_other_players} other {player_sing_plural} and the signal you observed is {signal_q * (state_max - state_min)}. You must answer only with {ref_2}. {uninform_2}Do not write anything else. What is your choice{ref_2_question}?"""
    return user_content

basic_GG = Game(
    name="BasicGG",
    bibref="Carlsson-vanDamme1993",
    param_grid=param_grid,
    create_baseline_prompt=create_basicGG_prompt)


> Still need to change the game below:

In [58]:
# def create_currencyattacks_prompt(
#     num_other_players=1,
#     signal_q=0.51, # In quantile of the signal range
#     state_min=0,
#     state_max=1,
#     name_action_norisk="A",
#     name_action_risk="B",
#     action_cost=1,
#     uninform_1="",
#     uninform_2="",
#     referrent_1:bool=True,
#     referrent_2:bool=True
#     ):
    
#     player_sing_plural = "players" if num_other_players > 1 else "player"
#     ref_1 = "the true state variable" if referrent_1 else "it"
#     ref_2 = f"'{name_action_norisk}' or '{name_action_risk}'" if referrent_2 else "action names"
#     ref_2_question = ", {name_action_norisk} or {name_action_risk}" if referrent_2 else ""

#     user_content = f"""{uninform_1}The true state is measured between {state_min} and {state_max}, with higher values representing bigger payoffs. Neither you nor any other player can observe {ref_1}. Instead, the signal that each of you will receive is a unique noisy observation of this state that only you can see, and similarly you do not know what signal each other player received. No two or more players can collude. Action {name_action_norisk}, provides a payoff of 0 for sure regardless of what any of the other players choose. Action {name_action_risk}'s payoff is the sum of the variable representing the true underlying state (not the noisy signal you observe) plus the fraction of players that decide on action {name_action_risk} and minus the cost of taking the action, which is {action_cost}. Here are the parameters of the situation you are encountering: {num_other_players} other {player_sing_plural} and the signal you observed is {signal_q * (state_max - state_min)}. You must answer only with {ref_2}. {uninform_2}Do not write anything else. What is your choice{ref_2_question}?"""
#     return user_content

# currency_GG = Game(
#     name="CurrencyAttacks",
#     bibref="MorrisShin1998",
#     create_baseline_prompt=create_currencyattacks_prompt)

# Query the AI systems

In [10]:
# Function to query OpenAI API
def query_openai(model, prompt, answer_only:bool=True):
    if answer_only:
        response = client.chat.completions.create(
            model=model,
            messages=prompt,
            max_tokens=config['max_tokens']
        )
    else:
        response = client.chat.completions.create(
            model=model,
            messages=prompt,
            max_tokens=500
        )
    return response.choices[0].message.content

In [11]:
# Function to query Ollama API
def query_ollama(model, prompt, answer_only:bool=True):
    api_url = 'http://localhost:11434/api/generate'
    headers = {'Content-Type': 'application/json'}
    max_tokens = config['max_tokens'] if answer_only else -2
    data = {
        'model': model,
        'prompt': prompt,
        'stream': False,
        'options': {
            'num_predict': max_tokens # equivalent to OpenAI's max_tokens
        }
    }

    response = requests.post(api_url, headers=headers, data=json.dumps(data))
    if response.status_code == 200:
        #return response.json()['choices'][0]['text'].strip()
        return json.loads(response.text.strip())['response']
    else:
        return f"Error: {response.status_code} - {response.text}"

In [12]:
def _duckdb_type(value):
    """
    Helper function to determine the DuckDB column type based on Python type.
    """
    if isinstance(value, int):
        return 'INTEGER'
    elif isinstance(value, float):
        return 'DOUBLE'
    elif isinstance(value, str):
        return 'VARCHAR'
    elif isinstance(value, bool):
        return 'BOOLEAN'
    else:
        return 'VARCHAR'  # Default type for unsupported types

In [53]:
def query_apis(
    game:Game, 
    models, 
    db_file='AIEconReasoning.db', 
    experiment_name=None,
    verbose:bool=False
):
    experiment_name = 'start_' + str(datetime.now().timestamp()) if experiment_name is None else experiment_name
        
    # Determine the schema from the first set of prompts
    sample_prompts = game.openai_prompts() if 'openai' in models else game.ollama_prompts()
    sample_prompt_config = {k: v for p in sample_prompts for k, v in p.items() if k != "prompt"}
    prompt_columns = ', '.join(f"{k} {_duckdb_type(v)}" for k, v in sample_prompt_config.items())

    # Create the table if it doesn't already exist
    create_table_query = f"""
    CREATE TABLE IF NOT EXISTS responses (
        experiment VARCHAR,
        game VARCHAR,
        api_type VARCHAR,
        model VARCHAR,
        prompt VARCHAR,
        response TEXT,
        areyousure TEXT,
        timestamp TIMESTAMP,
        {prompt_columns},
        PRIMARY KEY (experiment, api_type, model, prompt, timestamp)
    )
    """
    valerr_msg = "`api_type` must be either 'openai' or 'ollama'."
    
    with duckdb.connect(database=db_file) as conn:
        conn.execute(create_table_query)

    responses = {}
    
    for api_type, model in tqdm(models.items()):
        responses[api_type] = {}
        prompts = game.openai_prompts() if api_type == 'openai' else game.ollama_prompts()
        for p in tqdm(prompts):
            prompt_config = {k: v for k, v in p.items() if k != "prompt"}
            prompt_for_db = p['prompt'][0]['content'] + " " + p['prompt'][1]['content'] \
                if api_type == 'openai' else p['prompt']
            for m in model:
                # Check if the combination already exists in the table
                select_query = f"""
                SELECT COUNT(*) FROM responses
                WHERE experiment = ?
                AND game = ?
                AND api_type = ?
                AND model = ?
                AND prompt = ?
                """
                with duckdb.connect(database=db_file) as conn:
                    existing_count = conn.execute(
                        select_query,
                        (experiment_name, game.name, api_type, m, prompt_for_db)
                    ).fetchone()[0]
                
                if existing_count > 0:
                    # Skip if the combination already exists
                    if verbose:
                        print(f"Skipping API call for {experiment_name}, {game.name}, {api_type}, {m}, {prompt_config}")
                    continue

                responses[api_type][m] = {}
                for k, v in prompt_config.items():
                    responses[api_type][m][k] = v
                if api_type == 'openai':
                    response = query_openai(m, p['prompt'])
                elif api_type == 'ollama':
                    response = query_ollama(m, p['prompt'])
                else:
                    raise ValueError(valerr_msg)
                responses[api_type][m]['response'] = response
                if api_type == 'openai' and config['AYSmsg'] not in p['prompt'][1]['content']:
                    new_prompt = p['prompt']
                    new_prompt.append({
                        "role": "assistant",
                        "content": response
                    })
                    new_prompt.append({
                        "role": "user",
                        "content": config['AYSmsg']
                    })
                    new_response = query_openai(m, new_prompt)
                elif api_type == 'ollama' and config['AYSmsg'] not in p['prompt']:
                    new_prompt = p['prompt'] + "\n" + response + "\n" + config['AYSmsg']
                    new_response = query_ollama(m, new_prompt)
                else:
                    raise ValueError(valerr_msg)
                timestamp = datetime.now().isoformat()
                data = {
                    'experiment': experiment_name,
                    'game': game.name,
                    'api_type': api_type,
                    'model': m,
                    'prompt': prompt_for_db, #p['prompt'],
                    'response': response.strip(),
                    'areyousure': new_response.strip(),
                    'timestamp': timestamp,
                    **prompt_config
                }
                # Insert data into DuckDB
                columns = ', '.join(data.keys())
                placeholders = ', '.join(['?'] * len(data))
                insert_query = f"INSERT INTO responses ({columns}) VALUES ({placeholders})"
                with duckdb.connect(database=db_file) as conn:
                    conn.execute(insert_query, list(data.values()))

    return responses

In [54]:
config['models']

{'openai': ['gpt-3.5-turbo', 'gpt-4o'],
 'ollama': ['qwen:1.8b',
  'qwen:0.5b',
  'gemma:2b',
  'gemma:7b',
  'phi3:mini',
  'phi3:medium',
  'mistral:instruct',
  'llama3:latest']}

In [None]:
results = query_apis(basic_GG, models=config['models'], experiment_name="experiment")

## Recover the unique combinations of experiments and `param_grid`

In [40]:
params = [k for k in basic_GG.param_grid[0].keys() if k != 'prompt']
query = f"SELECT distinct {', '.join(params)} from responses where experiment = 'experiment';"

with duckdb.connect(database=config['db_file']) as conn:
    experiments = duckdb.execute(query, connection=conn).fetch_df()

experiments

IOException: IO Error: Could not set lock on file "/Users/douglasaraujo/Coding/BenchmarkingEconReasoning/AIEconReasoning.db": Conflicting lock is held in /usr/local/Cellar/duckdb/0.10.1/bin/duckdb (PID 22315) by user douglasaraujo. See also https://duckdb.org/docs/connect/concurrency