diff --git a/bigcode_eval/tasks/humaneval.py b/bigcode_eval/tasks/humaneval.py index e99f602cb..0676ef3a1 100644 --- a/bigcode_eval/tasks/humaneval.py +++ b/bigcode_eval/tasks/humaneval.py @@ -49,7 +49,7 @@ class GeneralHumanEval(Task): def __init__(self, strip_prompt, k=[1, 10, 100], num_workers=16, timeout=3.0): super().__init__( - stop_words=["\nclass", "\ndef", "\n#", "\n@", "\nprint", "\nif", "\n```"], + stop_words=["\nclass", "\ndef", "\n#", "\n@", "\nprint", "\nif", "\n```", ""], requires_execution=True, ) self.strip_prompt = strip_prompt diff --git a/bigcode_eval/tasks/multiple.py b/bigcode_eval/tasks/multiple.py index ec031c64e..7f514426d 100644 --- a/bigcode_eval/tasks/multiple.py +++ b/bigcode_eval/tasks/multiple.py @@ -90,7 +90,7 @@ def __init__(self, language): GeneralMultiPLE.DATASET_PATH, self.DATASET_NAME, revision=self.DATASET_REVISION) - stop_words = self.dataset["test"][0]["stop_tokens"] + stop_words = self.dataset["test"][0]["stop_tokens"] + [""] super().__init__( stop_words=stop_words, requires_execution=True,