In [2]:
import replicate
from datasets import load_dataset

import json
import jsonlines

In [3]:
from getpass import getpass
import os

REPLICATE_API_TOKEN = getpass()
os.environ['REPLICATE_API_TOKEN'] = REPLICATE_API_TOKEN

In [4]:
model = replicate.models.get("meta/codellama-34b-instruct")
version = model.versions.get("b17fdb44c843000741367ae3d73e2bb710d7428a662238ddebbf4302db2b5422")

# Code Summarization: 2-stage
(1) 함수를 구현하고, (2) 구현 결과를 다시 프롬프팅하여 코드를 축약

In [8]:
dataset = load_dataset("json", data_files="predictions/predictions_instruct_baseline.jsonl", split="train")
dataset

Downloading and preparing dataset json/default to /Users/sangmin/.cache/huggingface/datasets/json/default-19955ffb447507b1/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /Users/sangmin/.cache/huggingface/datasets/json/default-19955ffb447507b1/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4. Subsequent calls will reuse this data.


Dataset({
    features: ['task_id', 'completion'],
    num_rows: 164
})

In [9]:
template = """Rewrite the code below as short as possible.
[PYTHON]
{}
[/PYTHON]"""
print(template.format(dataset[123]["completion"]))

Rewrite the code below as short as possible.
[PYTHON]
def get_odd_collatz(n):
    if n <= 0:
        raise ValueError("n should be a positive integer.")
    
    collatz_list = [n]
    while n != 1:
        if n % 2 == 0:
            n //= 2
        else:
            n = 3 * n + 1
        collatz_list.append(n)
    
    return sorted([x for x in collatz_list if x % 2 != 0])
[/PYTHON]


In [14]:
predictions = []
for example in dataset:
    prediction = replicate.predictions.create(
        version=version,
        input={
                "top_k": 50,
                "top_p": 1.0,
                "max_tokens": 500,
                "temperature": 0,
                "system_prompt": "Responses should be written in Python.",
                "prompt": template.format(example["completion"]),
                "repeat_penalty": 1.1,
                "presence_penalty": 0,
                "frequency_penalty": 0
            },
    )
    predictions.append(prediction)

# Code Summarization: 1-stage
구현할 때부터 짧게 구현해달라고 요청

In [None]:
dataset = load_dataset("openai_humaneval", split="test")
dataset

In [34]:
template = """Implement the code below as short as possible.
[PYTHON]
{}
[/PYTHON]"""
print(template.format(dataset[0]["prompt"]))

Implement the code below as short as possible.
[PYTHON]
from typing import List


def has_close_elements(numbers: List[float], threshold: float) -> bool:
    """ Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
    False
    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
    True
    """

[/PYTHON]


In [37]:
predictions = []
for example in dataset:
    prediction = replicate.predictions.create(
        version=version,
        input={
                "top_k": 50,
                "top_p": 1.0,
                "max_tokens": 500,
                "temperature": 0,
                "system_prompt": "Responses should be written in Python.",
                "prompt": template.format(example["prompt"]),
                "repeat_penalty": 1.1,
                "presence_penalty": 0,
                "frequency_penalty": 0
            },
    )
    predictions.append(prediction)

# Show status

In [38]:
for prediction in predictions:
    prediction.reload()
    if prediction.status != "succeeded":
        print(prediction.status)

# Convert to JSON

In [None]:
predictions

In [40]:
results = []
for i, prediction in enumerate(predictions):
    results.append({
        "task_id": dataset["task_id"][i],
        "completion": "".join(prediction.output).strip()
    })
results

[{'task_id': 'HumanEval/0',
  'completion': '[PYTHON]\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n    for i in range(len(numbers)):\n        for j in range(i+1, len(numbers)):\n            if abs(numbers[i] - numbers[j]) <= threshold:\n                return True\n    return False\n[/PYTHON]'},
 {'task_id': 'HumanEval/1',
  'completion': "[PYTHON]\ndef separate_paren_groups(paren_string: str) -> List[str]:\n    open_brace = 0\n    start_index = 0\n    result = []\n    for i, char in enumerate(paren_string):\n        if char == '(':\n            if open_brace == 0:\n                start_index = i\n            open_brace += 1\n        elif char == ')':\n            open_brace -= 1\n            if open_brace == 0:\n                result.append(paren_string[start_index:i+1])\n    return result\n[/PYTHON]"},
 {'task_id': 'HumanEval/2',
  'completion': '[PYTHON]\ndef truncate_number(number: float) -> float:\n    return number - int(number)\n[/PYTHON]\n\nThis c

In [41]:
for result in results:
    # STrings are wrapped with [PYTHON] [/PYTHON] tags. Remove them.
    assert "[PYTHON]" in result["completion"]
    if "[PYTHON]" in result["completion"]:
        result["completion"] = result["completion"].split("[PYTHON]")[1].split("[/PYTHON]")[0].strip()

results

[{'task_id': 'HumanEval/0',
  'completion': 'def has_close_elements(numbers: List[float], threshold: float) -> bool:\n    for i in range(len(numbers)):\n        for j in range(i+1, len(numbers)):\n            if abs(numbers[i] - numbers[j]) <= threshold:\n                return True\n    return False'},
 {'task_id': 'HumanEval/1',
  'completion': "def separate_paren_groups(paren_string: str) -> List[str]:\n    open_brace = 0\n    start_index = 0\n    result = []\n    for i, char in enumerate(paren_string):\n        if char == '(':\n            if open_brace == 0:\n                start_index = i\n            open_brace += 1\n        elif char == ')':\n            open_brace -= 1\n            if open_brace == 0:\n                result.append(paren_string[start_index:i+1])\n    return result"},
 {'task_id': 'HumanEval/2',
  'completion': 'def truncate_number(number: float) -> float:\n    return number - int(number)'},
 {'task_id': 'HumanEval/3',
  'completion': 'def below_zero(operation

In [42]:
with jsonlines.open("prediction_summary_1.jsonl", mode="w") as writer:
    for result in results:
        writer.write(result)

# Check code length

In [53]:
dir = "pass@k_eval/predictions"
experiment = "summary_2"
total_code_length = 0
with jsonlines.open(os.path.join(dir, f"prediction_{experiment}.jsonl")) as reader:
    for obj in reader:
        total_code_length += len(obj["completion"])

print("Total: ", total_code_length)
print("Average: ", total_code_length / len(dataset))

Total:  22462
Average:  136.96341463414635


# Emergency Cancel

In [43]:
# 실수했을 때 모든 prediction을 취소 (돈 아까워서)
for prediction in predictions:
    prediction.cancel()