# Create samples from LLMs / endpoints
In this notebook we ask LLMs to produce samples using given prompts. Results are saved as "samples_....json" files.

In [1]:
from human_eval.data import write_jsonl, read_problems, extract_python

In [2]:
directory = "../data/"
problem_file = 'human-eval-bia.jsonl'
num_samples_per_task = 10
ollama_base_url = "http://127.0.0.1:11434/v1"

In [3]:
#if not running OpenAI API, comment out the following line
#import os
#os.environ["OPENAI_API_KEY"] = "AACACA"

In [4]:
use_reference = False
use_gpt35 = False
use_gpt4 = False
use_gpt4_20240409 = False
use_gpt_4o_2024_05_13 = False
use_gpt_4o_2024_08_06 = False
use_gpt_4omini_2024_07_18 = False
use_blablador_mistral = False
use_gemini_pro = False
use_gemini_15_pro = False
use_gemini_15_flash = False
use_gemini_ultra = False
use_claude = False
use_claude_35_sonnet = False
use_ollama_mixtral8x7b = False
use_ollama_mixtral8x22b = False
use_ollama_llama3 = False
use_ollama_codegemma_instruct7b = False
use_ollama_codegemma_code7b = False
use_ollama_codegemma_code2b = False
use_ollama_codellama_instruct70b = False
use_ollama_codellama_code70b = False
use_ollama_codellama_python70b = False
use_ollama_command_r_plus = False
use_ollama_phi3 = False
use_ollama_wizardlm2 = False
use_ollama_mistral_nemo = False
use_deepseek_coder_v2_lite = True


## Helper functions

In [5]:
def setup_prompt(input_code):
    prompt = f"""Complete the following code. 
    First, write down a plan as comments how to solve the problem step-by-step.
    Then, import the python libraries you think you will use.
    Then, write the function you were asked for.
    Write python code only.
    Do NOT write any code for testing the function.
    Return the complete code including my code.

```python
{input_code}
```
    """
    return prompt

## Models

In [6]:
code_generators = {}

In [7]:
if use_reference:
    # actually not a model, but to the evaluation framework it appears like:
    model_reference = 'reference'
    problems_data = read_problems(directory + problem_file)

    def generate_reference(input_code):
        # This is a computationally wasteful solution, 
        # but like this it fits well in the framework
        for task_id, problem in problems_data.items():
            if problem['prompt'] == input_code:
                return problem['canonical_solution']
    
    code_generators[model_reference] = generate_reference

In [8]:
if use_ollama_mistral_nemo:
    model_ollama_mistral = "mistral-nemo"
    def generate_one_completion_mistral_nemo(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model=model_ollama_mistral,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_mistral] = generate_one_completion_mistral_nemo

In [9]:
if use_deepseek_coder_v2_lite:
    model_ollama_deepseek_coder_v2_lite = "deepseek-coder-v2"
    def generate_one_completion_deepseek_coder_v2_lite(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model=model_ollama_deepseek_coder_v2_lite,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_deepseek_coder_v2_lite] = generate_one_completion_deepseek_coder_v2_lite

In [10]:
if use_ollama_mixtral8x7b:
    model_ollama_mixtral = "mixtral:8x7b-instruct-v0.1-q5_0"
    def generate_one_completion_mixtral8x7b(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model=model_ollama_mixtral,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_mixtral] = generate_one_completion_mixtral8x7b

In [11]:
if use_ollama_mixtral8x22b:
    model_ollama_mixtral = "mixtral:8x22b-instruct-v0.1-q4_0"
    def generate_one_completion_mixtral8x22b(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model=model_ollama_mixtral,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_mixtral] = generate_one_completion_mixtral8x22b

In [12]:
if use_ollama_llama3:
    # model_ollama_llama3 = "llama3:70b-instruct-q8_0"
    # model_ollama_llama3 = "llama3:70b-instruct-q4_0"
    model_ollama_llama3 = "llama3:8b-instruct-fp16"
    def generate_one_completion_llama3(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model=model_ollama_llama3,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_llama3] = generate_one_completion_llama3

In [13]:
if use_ollama_codegemma_instruct7b:
    model_ollama_codegemma_instruct7b = "codegemma:7b-instruct-fp16"
    def generate_one_completion_codegemma_instruct7b(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model=model_ollama_codegemma_instruct7b,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_codegemma_instruct7b] = generate_one_completion_codegemma_instruct7b

In [14]:
if use_ollama_codegemma_code7b:
    model_ollama_codegemma_code7b = "codegemma:7b-code-fp16"
    def generate_one_completion_codegemma_code7b(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model=model_ollama_codegemma_code7b,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_codegemma_code7b] = generate_one_completion_codegemma_code7b

In [15]:
if use_ollama_codegemma_code2b:
    model_ollama_codegemma_code2b = "codegemma:2b-code-fp16"
    def generate_one_completion_codegemma_code2b(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model=model_ollama_codegemma_code2b,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_codegemma_code2b] = generate_one_completion_codegemma_code2b

In [16]:
if use_ollama_codellama_instruct70b:
    #model_ollama_codellama_instruct70b = "codellama:70b-instruct-q4_0"
    model_ollama_codellama_instruct70b = "codellama:70b-instruct-q8_0"
    def generate_one_completion_codellama_instruct70b(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model= model_ollama_codellama_instruct70b,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_codellama_instruct70b] = generate_one_completion_codellama_instruct70b

In [17]:
if use_ollama_codellama_code70b:
    model_ollama_codellama_code70b = "codellama:70b-code-q4_0"
    def generate_one_completion_codellama_code70b(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model= model_ollama_codellama_code70b,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_codellama_code70b] = generate_one_completion_codellama_code70b

In [18]:
if use_ollama_codellama_python70b:
    model_ollama_codellama_python70b = "codellama:70b-python-q4_0"
    def generate_one_completion_codellama_python70b(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model= model_ollama_codellama_python70b,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_codellama_python70b] = generate_one_completion_codellama_python70b

In [19]:
if use_ollama_command_r_plus:
    model_ollama_command_r_plus = "command-r-plus:104b-q4_0"
    def generate_one_completion_command_r_plus(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model= model_ollama_command_r_plus,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_command_r_plus] = generate_one_completion_command_r_plus

In [20]:
if use_ollama_phi3:
    model_ollama_phi3 = "phi3:3.8b-mini-instruct-4k-fp16"
    def generate_one_completion_phi3(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model= model_ollama_phi3,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_phi3] = generate_one_completion_phi3

In [21]:
if use_ollama_wizardlm2:
    model_ollama_wizardlm2 = "wizardlm2:8x22b-q4_0"
    def generate_one_completion_wizardlm2(input_code):
        import openai
        
        client = openai.OpenAI()
        client.base_url = ollama_base_url
        response = client.chat.completions.create(
            model= model_ollama_wizardlm2,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_ollama_wizardlm2] = generate_one_completion_wizardlm2

In [22]:
if use_gpt35:
    model_gpt35 = "gpt-3.5-turbo-1106"
    def generate_one_completion_gpt35(input_code):
        import openai
        client = openai.OpenAI()
        response = client.chat.completions.create(
            model=model_gpt35,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_gpt35] = generate_one_completion_gpt35

In [23]:
if use_gpt4_20240409:
    model_gpt4_20240409 = "gpt-4-turbo-2024-04-09"
    def generate_one_completion_gpt4_20240409(input_code):
        import openai
        client = openai.OpenAI()
        response = client.chat.completions.create(
            model=model_gpt4_20240409,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()
        
    code_generators[model_gpt4_20240409] = generate_one_completion_gpt4_20240409

In [24]:
if use_gpt_4o_2024_05_13:
    model_gpt_4o_2024_05_13 = "gpt-4o-2024-05-13"
    def generate_one_completion_gpt_4o_2024_05_13(input_code):
        import openai
        client = openai.OpenAI()
        response = client.chat.completions.create(
            model=model_gpt_4o_2024_05_13,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()
        
    code_generators[model_gpt_4o_2024_05_13] = generate_one_completion_gpt_4o_2024_05_13

In [25]:
if use_gpt_4o_2024_08_06:
    model_gpt_4o_2024_08_06 = "gpt-4o-2024-08-06"
    def generate_one_completion_gpt_4o_2024_08_06(input_code):
        import openai
        client = openai.OpenAI()
        response = client.chat.completions.create(
            model=model_gpt_4o_2024_08_06,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()
        
    code_generators[model_gpt_4o_2024_08_06] = generate_one_completion_gpt_4o_2024_08_06

In [26]:
if use_gpt_4omini_2024_07_18:
    model_gpt_4omini_2024_07_18 = "gpt-4o-mini-2024-07-18"
    def generate_one_completion_gpt_4omini_2024_07_18(input_code):
        import openai
        client = openai.OpenAI()
        response = client.chat.completions.create(
            model=model_gpt_4omini_2024_07_18,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()
        
    code_generators[model_gpt_4omini_2024_07_18] = generate_one_completion_gpt_4omini_2024_07_18

In [27]:
if use_gpt4:
    model_gpt4 = "gpt-4-1106-preview"
    def generate_one_completion_gpt4(input_code):
        import openai
        client = openai.OpenAI()
        response = client.chat.completions.create(
            model=model_gpt4,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()
        
    code_generators[model_gpt4] = generate_one_completion_gpt4

In [28]:
if use_blablador_mistral:
    model_blablador_mistral = "Mistral-7B-Instruct-v0.2"
    def generate_one_completion_blablador_mistral(input_code):
        import openai
        import os

        client = openai.OpenAI()
        client.base_url = 'https://helmholtz-blablador.fz-juelich.de:8000/v1'
        client.api_key = os.environ.get('BLABLADOR_API_KEY')
        response = client.chat.completions.create(
            model=model_blablador_mistral,
            messages=[{"role": "user", "content": setup_prompt(input_code)}],
        )
        return response.choices[0].message.content.strip()

    code_generators[model_blablador_mistral] = generate_one_completion_blablador_mistral

In [29]:
if use_gemini_pro:
    model_gemini_pro = 'gemini-pro'
    
    def generate_one_completion_gemini_pro(input_code):
        from vertexai.preview.generative_models import (
            GenerationConfig,
            GenerativeModel,
            Image,
            Part,
            ChatSession,
        )
        gemini_model = GenerativeModel(model_gemini_pro)
        client = gemini_model.start_chat()
        response = client.send_message(setup_prompt(input_code)).text

        return response

    code_generators[model_gemini_pro] = generate_one_completion_gemini_pro

In [30]:
if use_gemini_15_pro:
    model_gemini_15_pro = 'gemini-1.5-pro-001'

    def generate_one_completion_gemini_15_pro(input_code):
        from google import generativeai as genai
        import os
        genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
        client = genai.GenerativeModel(model_gemini_15_pro)
        result = client.generate_content(setup_prompt(input_code))
        return result.text
        
    code_generators[model_gemini_15_pro] = generate_one_completion_gemini_15_pro

In [31]:
if use_gemini_15_flash:
    model_gemini_15_flash = 'gemini-1.5-flash-001'

    def generate_one_completion_gemini_15_flash(input_code):
        from google import generativeai as genai
        import os
        genai.configure(api_key=os.environ['GOOGLE_API_KEY'])

        import time
        time.sleep(10)
        
        client = genai.GenerativeModel(model_gemini_15_flash)
        result = client.generate_content(setup_prompt(input_code))
        return result.text
        
    code_generators[model_gemini_15_flash] = generate_one_completion_gemini_15_flash

In [32]:
if use_gemini_ultra:
    model_gemini_ultra = 'gemini-ultra'

    def generate_one_completion_gemini_ultra(input_code):
        from google import generativeai as genai
        import os
        genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
        client = genai.GenerativeModel(model_gemini_ultra)
        result = client.generate_content(setup_prompt(input_code))
        return result.text
        
    code_generators[model_gemini_ultra] = generate_one_completion_gemini_ultra

In [33]:
if use_claude:
    model_claude = "claude-3-opus-20240229"

    def generate_one_completion_claude(input_code):
        #import os
        from anthropic import Anthropic
        
        client = Anthropic(
            # This is the default and can be omitted
            #api_key=os.environ.get("ANTHROPIC_API_KEY"),
        )
        
        message = client.messages.create(
            max_tokens=1024,
            messages=[
                {
                    "role": "user",
                    "content": setup_prompt(input_code),
                }
            ],
            model=model_claude,
        )
        return message.content[0].text
    code_generators[model_claude] = generate_one_completion_claude

In [34]:
if use_claude_35_sonnet:
    model_claude_35_sonnet = "claude-3-5-sonnet-20240620"

    def generate_one_completion_claude_35_sonnet(input_code):
        #import os
        from anthropic import Anthropic
        
        import time
        time.sleep(10)
        
        client = Anthropic(
            # This is the default and can be omitted
            #api_key=os.environ.get("ANTHROPIC_API_KEY"),
        )
        
        message = client.messages.create(
            max_tokens=1024,
            messages=[
                {
                    "role": "user",
                    "content": setup_prompt(input_code),
                }
            ],
            model=model_claude_35_sonnet,
        )
        return message.content[0].text
    code_generators[model_claude_35_sonnet] = generate_one_completion_claude_35_sonnet

## Sanity check

In [35]:
for key, func in code_generators.items():
    print(key, func("def print_hello_world():\n"))

deepseek-coder-v2 Here's the complete Python code with comments explaining the plan and the function implementation:

```python
# Plan:
# 1. Define a function named `print_hello_world` that does not take any arguments.
# 2. Inside the function, write the string "Hello World" to stdout (by default, this will print to the console).
# 3. End the function with a return statement.

def print_hello_world():
    # Print "Hello World" to the console
    print("Hello World")
```


## Sampling

In [36]:
problems = read_problems(directory + problem_file)

for model_name, generate_one_completion in code_generators.items():
    samples = []

    for i in range(num_samples_per_task):
        for task_id in problems:
            print(model_name, task_id, i)

            response = generate_one_completion(problems[task_id]["prompt"])
            code = extract_python(response)
            
            samples.append(dict(task_id=task_id, completion=code, full_response=response))
    
    write_jsonl(f"{directory}samples_{model_name}.jsonl", samples)

deepseek-coder-v2 ../test_cases/apply_otsu_threshold_and_count_postiive_pixels.ipynb 0
deepseek-coder-v2 ../test_cases/binary_closing.ipynb 0
deepseek-coder-v2 ../test_cases/binary_skeleton.ipynb 0
deepseek-coder-v2 ../test_cases/bland_altman.ipynb 0
deepseek-coder-v2 ../test_cases/combine_columns_of_tables.ipynb 0
deepseek-coder-v2 ../test_cases/convex_hull_measure_area.ipynb 0
deepseek-coder-v2 ../test_cases/convolve_images.ipynb 0
deepseek-coder-v2 ../test_cases/count_number_of_touching_neighbors.ipynb 0
deepseek-coder-v2 ../test_cases/count_objects_over_time.ipynb 0
deepseek-coder-v2 ../test_cases/count_overlapping_regions.ipynb 0
deepseek-coder-v2 ../test_cases/create_umap.ipynb 0
deepseek-coder-v2 ../test_cases/crop_quarter_image.ipynb 0
deepseek-coder-v2 ../test_cases/deconvolve_image.ipynb 0
deepseek-coder-v2 ../test_cases/detect_edges.ipynb 0
deepseek-coder-v2 ../test_cases/expand_labels_without_overlap.ipynb 0
deepseek-coder-v2 ../test_cases/extract_surface_measure_area.ipynb