# Create samples from LLMs / endpoints
In this notebook we ask LLMs to produce samples using given prompts. Results are saved as "samples_....json" files.

In [1]:
from human_eval.data import write_jsonl, read_problems, extract_python

In [2]:
directory = "../data/"
problem_file = 'human-eval-bia.jsonl'
num_samples_per_task = 2
ollama_base_url = "http://127.0.0.1:11434/v1"

In [3]:
#if not running OpenAI API, comment out the following line
#import os
#os.environ["OPENAI_API_KEY"] = "AACACA"

In [4]:
use_reference = False
use_gpt_4o_2024_05_13 = False
use_claude_35_sonnet = True
use_deepseek_coder_v2_lite = False


## Helper functions

In [5]:
def setup_prompt(input_code):

    with open('bia_bob_system_message.txt', 'r') as file:
        bia_bob_system_message = file.read()
    
    prompt = f"""{bia_bob_system_message} 
    
    Complete the following code:
```python
{input_code}
```
    """
    return prompt

## Models

In [6]:
code_generators = {}

In [7]:
if use_reference:
    # actually not a model, but to the evaluation framework it appears like:
    model_reference = 'reference'
    problems_data = read_problems(directory + problem_file)

    def generate_reference(input_code):
        # This is a computationally wasteful solution, 
        # but like this it fits well in the framework
        for task_id, problem in problems_data.items():
            if problem['prompt'] == input_code:
                return problem['canonical_solution']
    
    code_generators[model_reference] = generate_reference

In [8]:
if use_deepseek_coder_v2_lite:
    model_ollama_deepseek_coder_v2_lite = "deepseek-coder-v2"
    def generate_one_completion_deepseek_coder_v2_lite(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model=model_ollama_deepseek_coder_v2_lite,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_deepseek_coder_v2_lite + "_biabob"] = generate_one_completion_deepseek_coder_v2_lite

In [9]:
if use_gpt_4o_2024_05_13:
    model_gpt_4o_2024_05_13 = "gpt-4o-2024-05-13"
    def generate_one_completion_gpt_4o_2024_05_13(input_code):
        import openai
        client = openai.OpenAI()
        response = client.chat.completions.create(
            model=model_gpt_4o_2024_05_13,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()
        
    code_generators[model_gpt_4o_2024_05_13 + "_biabob"] = generate_one_completion_gpt_4o_2024_05_13

In [10]:
if use_claude_35_sonnet:
    model_claude_35_sonnet = "claude-3-5-sonnet-20240620"

    def generate_one_completion_claude_35_sonnet(input_code):
        #import os
        from anthropic import Anthropic
        client = Anthropic(
            # This is the default and can be omitted
            #api_key=os.environ.get("ANTHROPIC_API_KEY"),
        )
        
        message = client.messages.create(
            max_tokens=1024,
            messages=[
                {
                    "role": "user",
                    "content": setup_prompt(input_code),
                }
            ],
            model=model_claude_35_sonnet,
        )
        return message.content[0].text
    code_generators[model_claude_35_sonnet + "_biabob"] = generate_one_completion_claude_35_sonnet

## Sanity check

In [11]:
for key, func in code_generators.items():
    print(key, func("def print_hello_world():\n"))

claude-3-5-sonnet-20240620_biabob ### Summary
I will complete the given function to print "Hello, World!".

### Plan
1. Define the function `print_hello_world()`.
2. Add a print statement inside the function to output "Hello, World!".

### Code
```python
def print_hello_world():
    print("Hello, World!")
```

This code defines a simple function that, when called, will print the classic "Hello, World!" message to the console.


## Sampling

In [12]:
problems = read_problems(directory + problem_file)

for model_name, generate_one_completion in code_generators.items():
    samples = []

    for i in range(num_samples_per_task):
        for task_id in problems:
            print(model_name, task_id, i)

            response = generate_one_completion(problems[task_id]["prompt"])
            code = extract_python(response)
            
            samples.append(dict(task_id=task_id, completion=code, full_response=response))
    
            write_jsonl(f"{directory}samples_{model_name}.jsonl", samples)

claude-3-5-sonnet-20240620_biabob ../test_cases/apply_otsu_threshold_and_count_postiive_pixels.ipynb 0
claude-3-5-sonnet-20240620_biabob ../test_cases/binary_closing.ipynb 0
claude-3-5-sonnet-20240620_biabob ../test_cases/binary_skeleton.ipynb 0
claude-3-5-sonnet-20240620_biabob ../test_cases/bland_altman.ipynb 0
claude-3-5-sonnet-20240620_biabob ../test_cases/combine_columns_of_tables.ipynb 0
claude-3-5-sonnet-20240620_biabob ../test_cases/convex_hull_measure_area.ipynb 0
claude-3-5-sonnet-20240620_biabob ../test_cases/convolve_images.ipynb 0
claude-3-5-sonnet-20240620_biabob ../test_cases/count_number_of_touching_neighbors.ipynb 0
claude-3-5-sonnet-20240620_biabob ../test_cases/count_objects_over_time.ipynb 0
claude-3-5-sonnet-20240620_biabob ../test_cases/count_overlapping_regions.ipynb 0
claude-3-5-sonnet-20240620_biabob ../test_cases/create_umap.ipynb 0
claude-3-5-sonnet-20240620_biabob ../test_cases/crop_quarter_image.ipynb 0
claude-3-5-sonnet-20240620_biabob ../test_cases/deconv