## Setup

In [1]:
import dotenv, os, openai, csv

EXAMPLES_DIR = 'examples'
RESULTS_FILENAME = 'results.csv'

dotenv.load_dotenv()
gpt = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

def prompt_gpt(prompt: str) -> str:
    response = gpt.chat.completions.create(
        model='gpt-3.5',
        messages=[
            {
                'role': 'user',
                'content': prompt,
            }
        ]
    )
    output = response.choices[0].message.content.strip()
    return output

def format_examples(examples: list[dict[str, str]]) -> str:
    addition = ""
    
    for example in examples:
        addition += f"Q: {example['q']}"
        addition += "\n"
        addition += f"A: {example['a']}"
        addition += "\n"
    
    return addition

## Load examples

Data is formatted in CSV files as follows:
```
q_1,a_1
q2,a_2
...
q_n,a_n
```

In [2]:
examples_by_task_class = {}  # TODO add examples into CSVs (ideally 12 or 16 total)

for filename in os.listdir(EXAMPLES_DIR):
    if filename.endswith('.csv'):
        path = os.path.join(EXAMPLES_DIR, filename)
        
        with open(path, newline='', encoding='utf-8') as f:
            reader = csv.reader(f)
            task_class = filename[:-4]
            examples_by_task_class[task_class] = [{'q': row[0], 'a': row[1]} for row in reader]
            
tasks_by_task_class = {  # TODO for each task class, (key, value) = (task class, answer)
    
}

## Run trials

In [3]:
results = [["Task Class", "Case", "Response"]]

for task_class, examples in examples_by_task_class.items():
    task = tasks_by_task_class[task_class]
    half_n = len(examples) // 2
    # case 1: use n / 2 examples
        # case 1a: use the first n / 2 examples
    case1a_examples = examples[:half_n]
    case1a_prompt = format_examples(case1a_examples)
    case1a_prompt += f"Q: {task}"
    case1a_result = prompt_gpt(case1a_prompt)
    results.append([task_class, "1a", case1a_result])
        # case 1b: use the other n / 2 examples
    case1b_examples = examples[half_n:]
    case1b_prompt = format_examples(case1b_examples)
    case1b_prompt += f"Q: {task}"
    case1b_result = prompt_gpt(case1b_prompt)
    results.append([task_class, "1b", case1b_result])
    # case 2: use all n examples
    case2_examples = examples
    case2_prompt = format_examples(case2_examples)
    case2_prompt += f"Q: {task}"
    case2_result = prompt_gpt(case2_prompt)
    results.append([task_class, "2", case2_result])
    # case 3: use n / 2 examples, duplicated (for n total examples)
        # case 3a: use the first n / 2 examples
    case3a_examples = examples[:half_n] + examples[:half_n]
    case3a_prompt = format_examples(case3a_examples)
    case3a_prompt += f"Q: {task}"
    case3a_result = prompt_gpt(case3a_prompt)
    results.append([task_class, "3a", case3a_result])
        # case 3b: use the other n / 2 examples
    case3b_examples = examples[half_n:] + examples[half_n:]
    case3b_prompt = format_examples(case3b_examples)
    case3b_prompt += f"Q: {task}"
    case3b_result = prompt_gpt(case3b_prompt)
    results.append([task_class, "3b", case3b_result])
    # TODO other cases

with open(RESULTS_FILENAME, 'w', newline='') as f:
    writer = csv.writer(f)
    
    for row in results:
        writer.writerow(row)