Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 71 additions & 26 deletions codegen/generate.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os
from os import PathLike
from typing import List
Expand Down Expand Up @@ -36,7 +37,7 @@ def construct_contract_prompt(prompt: str, contract_type: str, contract: str) ->


def codegen(
workdir: PathLike,
target_path: PathLike,
model: DecoderBase,
dataset: str,
greedy=False,
Expand All @@ -45,6 +46,15 @@ def codegen(
version="default",
resume=True,
):
task2nexist = {}
if resume and target_path.endswith(".jsonl") and os.path.isfile(target_path):
with open(target_path, "r") as f:
for line in f:
if not line.strip():
continue
task_id = json.loads(line)["task_id"]
task2nexist[task_id] = task2nexist.get(task_id, 0) + 1

with Progress(
TextColumn(f"{dataset} •" + "[progress.percentage]{task.percentage:>3.0f}%"),
BarColumn(),
Expand All @@ -69,26 +79,26 @@ def codegen(
p.console.print(f"Skipping {task_id} as it is not in {id_range}")
continue

p_name = task_id.replace("/", "_")
os.makedirs(os.path.join(workdir, p_name), exist_ok=True)
log = f"Codegen: {p_name} @ {model}"
n_existing = 0
if resume:
# count existing .py files
n_existing = len(
if not target_path.endswith(".jsonl"):
p_name = task_id.replace("/", "_")
os.makedirs(os.path.join(target_path, p_name), exist_ok=True)
task2nexist[task_id] = len(
[
f
for f in os.listdir(os.path.join(workdir, p_name))
for f in os.listdir(os.path.join(target_path, p_name))
if f.endswith(".py")
]
)
if n_existing > 0:
log += f" (resuming from {n_existing})"

nsamples = n_samples - n_existing
n_more_samples = n_samples
log = f"Codegen: {task_id} @ {model}"
if resume and task2nexist.get(task_id, 0) > 0:
log += f" (resuming from {task2nexist[task_id]})"
n_more_samples -= task2nexist[task_id]

p.console.print(log)

sidx = n_samples - nsamples
sidx = n_samples - n_more_samples
while sidx < n_samples:
outputs = model.codegen(
task["prompt"],
Expand All @@ -97,18 +107,22 @@ def codegen(
)
assert outputs, "No outputs from model!"
for impl in outputs:
try:
solution = (
task["prompt"] + impl if model.is_direct_completion() else impl
)
if target_path.endswith(".jsonl"):
with open(target_path, "a") as f:
f.write(
json.dumps({"task_id": task_id, "solution": solution})
+ "\n"
)
else:
with open(
os.path.join(workdir, p_name, f"{sidx}.py"),
os.path.join(target_path, p_name, f"{sidx}.py"),
"w",
encoding="utf-8",
) as f:
if model.is_direct_completion():
f.write(task["prompt"] + impl)
else:
f.write(impl)
except UnicodeEncodeError:
continue
f.write(solution)
sidx += 1


Expand All @@ -126,12 +140,19 @@ def main(
backend: str = "vllm",
base_url: str = None,
tp: int = 1,
evalperf_type: str = None, # This is for EvalPerf
jsonl_fmt: bool = False,
):
assert dataset in ["humaneval", "mbpp"], f"Invalid dataset {dataset}"
assert backend in ["vllm", "hf", "openai"]
assert evalperf_type is None or evalperf_type in [
"instruct",
"perf-instruct",
"perf-CoT",
]

if greedy and (temperature != 0 or bs != 1 or n_samples != 1):
temperature = 0
temperature = 0.0
bs = 1
n_samples = 1
print("Greedy decoding ON (--greedy): setting bs=1, n_samples=1, temperature=0")
Expand All @@ -145,7 +166,21 @@ def main(
os.makedirs(root, exist_ok=True)
# Make dataset dir
os.makedirs(os.path.join(root, dataset), exist_ok=True)
# Make dir for codes generated by each model

# Model instructions
instruction_prefix = "Please provide a self-contained Python script that solves the following problem in a markdown code block:"
response_prefix = "Below is a Python script with a self-contained function that solves the problem and passes corresponding tests:"

if evalperf_type == "perf-instruct":
instruction_prefix = "Please provide an efficient and self-contained Python script that solves the following problem in a markdown code block:"
response_prefix = "Below is a Python script with a self-contained function that efficiently solves the problem and passes corresponding tests:"
elif evalperf_type == "perf-CoT":
instruction_prefix = "Think step by step: please provide an efficient and self-contained Python script that solves the following problem in a markdown code block:"
response_prefix = "Below is a Python script with a self-contained function that efficiently solves the problem and passes corresponding tests:"
elif evalperf_type is not None and evalperf_type != "instruct":
raise ValueError(f"Invalid evalperf_type: {evalperf_type}")

# Model creation
model_runner = make_model(
model=model,
backend=backend,
Expand All @@ -154,12 +189,22 @@ def main(
dataset=dataset,
base_url=base_url,
tp=tp,
instruction_prefix=instruction_prefix,
response_prefix=response_prefix,
)

# Make dir for codes generated by each model
identifier = model.replace("/", "--") + f"_{backend}_temp_{temperature}"
workdir = os.path.join(root, dataset, identifier)
os.makedirs(workdir, exist_ok=True)
if evalperf_type:
identifier += f"-{evalperf_type}"

target_path = os.path.join(root, dataset, identifier)
if jsonl_fmt:
target_path += ".jsonl"
else:
os.makedirs(target_path, exist_ok=True)
codegen(
workdir=workdir,
target_path=target_path,
dataset=dataset,
greedy=greedy,
model=model_runner,
Expand Down
65 changes: 47 additions & 18 deletions codegen/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,31 +53,39 @@ def extra_eos_for_direct_completion(dataset) -> List[str]:
_MAGIC_SPLITTER_ = "-[[]]-this-is-really-our-highest-priority-[[]]-"


def make_chat_prompt(prompt: str, tokenizer: AutoTokenizer) -> str:
def make_chat_prompt(
task_prompt: str,
instruction_prefix: str,
response_prefix: str,
tokenizer: AutoTokenizer,
) -> str:
# directly return prompt if it does not have a tokenizer.chat_template
if tokenizer.chat_template is None:
return prompt
return task_prompt

prompt = f"""\
Please provide a self-contained Python script that solves the following problem in a markdown code block:
assert instruction_prefix is not None, "Instruction prefix is required!"
assert response_prefix is not None, "Response prefix is required!"

task_prompt = f"""\
{instruction_prefix}
```
{prompt.strip()}
{task_prompt.strip()}
```
"""
response = f"""\
Below is a Python script with a self-contained function that solves the problem and passes corresponding tests:
{response_prefix}
```python
{_MAGIC_SPLITTER_}
```
"""
prompt = tokenizer.apply_chat_template(
task_prompt = tokenizer.apply_chat_template(
[
{"role": "user", "content": prompt},
{"role": "user", "content": task_prompt},
{"role": "assistant", "content": response},
],
tokenize=False,
).split(_MAGIC_SPLITTER_)[0]
return prompt
return task_prompt


class DecoderBase(ABC):
Expand All @@ -89,6 +97,8 @@ def __init__(
max_new_tokens: int = 512,
dtype: str = "bfloat16", # default
trust_remote_code: bool = False,
instruction_prefix: str = None,
response_prefix: str = None,
) -> None:
print("Initializing a decoder model: {} ...".format(name))
self.name = name
Expand All @@ -99,6 +109,8 @@ def __init__(
self.max_new_tokens = max_new_tokens
self.dtype = dtype
self.trust_remote_code = trust_remote_code
self.instruction_prefix = instruction_prefix
self.response_prefix = response_prefix

@abstractmethod
def codegen(
Expand Down Expand Up @@ -166,7 +178,9 @@ def __init__(self, name: str, **kwargs) -> None:
def codegen(
self, prompt: str, do_sample: bool = True, num_samples: int = 200
) -> List[str]:
prompt = make_chat_prompt(prompt, self.tokenizer)
prompt = make_chat_prompt(
prompt, self.instruction_prefix, self.response_prefix, self.tokenizer
)
return VllmDecoder.codegen(self, prompt, do_sample, num_samples)


Expand Down Expand Up @@ -255,7 +269,9 @@ def __init__(self, name: str, **kwargs):
def codegen(
self, prompt: str, do_sample: bool = True, num_samples: int = 200
) -> List[str]:
prompt = make_chat_prompt(prompt, self.tokenizer)
prompt = make_chat_prompt(
prompt, self.instruction_prefix, self.response_prefix, self.tokenizer
)
return HfTorchDecoder.codegen(self, prompt, do_sample, num_samples)


Expand All @@ -271,14 +287,15 @@ def codegen(
assert self.temperature > 0, "Temperature must be positive for sampling"
batch_size = min(self.batch_size, num_samples)

message = self.instruction_prefix
# construct prompt
message += f"\n```python\n{prompt.strip()}\n```"

fmt = "json_object" if self.name == "gpt-4-1106-preview" else "text"
if fmt == "json_object":
message = r'Please complete the following code snippet by generating JSON like {"code": ""}'
else:
message = r"Please generate code to complete the following problem:"

message += f"\n```python\n{prompt.strip()}\n```"
message += (
r'Note: the output code should follow a JSON schema of {"code": ""}'
)

ret = openai_request.make_auto_request(
self.client,
Expand Down Expand Up @@ -337,7 +354,7 @@ def codegen(
messages=[
ChatMessage(
role="user",
content="Please generate code to solve the following problem in a Python markdown block:"
content=self.instruction_prefix
+ f"\n```python\n{prompt.strip()}\n```",
)
],
Expand Down Expand Up @@ -381,7 +398,7 @@ def codegen(
messages=[
{
"role": "user",
"content": "Please generate code to complete the following problem wrapped in a Python markdown block:"
"content": self.instruction_prefix
+ f"\n```python\n{prompt.strip()}\n```\n",
}
],
Expand All @@ -402,6 +419,8 @@ def make_model(
temperature: float = 0.0,
tp=1,
base_url=None,
instruction_prefix=None,
response_prefix=None,
):
if backend == "vllm":
return GeneralVllmDecoder(
Expand All @@ -410,30 +429,40 @@ def make_model(
temperature=temperature,
dataset=dataset,
tp=tp,
instruction_prefix=instruction_prefix,
response_prefix=response_prefix,
)
elif backend == "hf":
return GenenralHfTorchDecoder(
name=model,
batch_size=batch_size,
temperature=temperature,
dataset=dataset,
instruction_prefix=instruction_prefix,
response_prefix=response_prefix,
)
elif backend == "openai":
return OpenAIChatDecoder(
name=model,
batch_size=batch_size,
temperature=temperature,
base_url=base_url,
instruction_prefix=instruction_prefix,
response_prefix=response_prefix,
)
elif backend == "mistral":
return MistralChatDecoder(
name=model,
batch_size=batch_size,
temperature=temperature,
instruction_prefix=instruction_prefix,
response_prefix=response_prefix,
)
elif backend == "anthropic":
return AnthropicMessageDecoder(
name=model,
batch_size=batch_size,
temperature=temperature,
instruction_prefix=instruction_prefix,
response_prefix=response_prefix,
)