forked from open-compass/opencompass
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add humaneval postprocessor for GPT models & eval config for GPT4, en…
…hance the original humaneval postprocessor (open-compass#129) * [Enhancement] Enhance humaneval postprocessor * add human-eval testcase * update * update --------- Co-authored-by: Leymore <zfz-960727@163.com>
- Loading branch information
1 parent
5a8ef3a
commit 768388c
Showing
4 changed files
with
190 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from mmengine.config import read_base | ||
from opencompass.models import OpenAI | ||
from opencompass.partitioners import NaivePartitioner | ||
from opencompass.runners import LocalRunner | ||
from opencompass.tasks import OpenICLInferTask | ||
|
||
with read_base(): | ||
from .datasets.collections.chat_medium import datasets | ||
from .summarizers.medium import summarizer | ||
|
||
# GPT4 needs a special humaneval postprocessor | ||
from opencompass.datasets.humaneval import humaneval_gpt_postprocess | ||
for _dataset in datasets: | ||
if _dataset['path'] == 'openai_humaneval': | ||
_dataset['eval_cfg']['pred_postprocessor']['type'] = humaneval_gpt_postprocess | ||
|
||
|
||
api_meta_template = dict( | ||
round=[ | ||
dict(role='HUMAN', api_role='HUMAN'), | ||
dict(role='BOT', api_role='BOT', generate=True), | ||
], | ||
) | ||
|
||
models = [ | ||
dict(abbr='GPT4', | ||
type=OpenAI, path='gpt-4-0613', | ||
key='ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well | ||
meta_template=api_meta_template, | ||
query_per_second=1, | ||
max_out_len=2048, max_seq_len=2048, batch_size=8), | ||
] | ||
|
||
infer = dict( | ||
partitioner=dict(type=NaivePartitioner), | ||
runner=dict( | ||
type=LocalRunner, | ||
max_num_workers=4, | ||
task=dict(type=OpenICLInferTask)), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
import unittest | ||
|
||
from opencompass.datasets.humaneval import humaneval_postprocess | ||
|
||
|
||
def run_humaneval_check(completion): | ||
program = [ | ||
'def get_fraction(x: float) -> float:', | ||
humaneval_postprocess(completion), | ||
'', | ||
'assert get_fraction(1.28) == 0.28', | ||
'assert get_fraction(1.0) == 0.0', | ||
] | ||
program = '\n'.join(program) | ||
exec(program) | ||
|
||
|
||
class TestHumaneval(unittest.TestCase): | ||
|
||
def test_vanilla(self): | ||
raw = ' return x - int(x)' | ||
run_humaneval_check(raw) | ||
|
||
def test_python_quote(self): | ||
lines = [ | ||
'```python', | ||
' return x - int(x)', | ||
'```', | ||
] | ||
raw = '\n'.join(lines) | ||
run_humaneval_check(raw) | ||
|
||
def test_bare_quote(self): | ||
lines = [ | ||
'```', | ||
' return x - int(x)', | ||
'```', | ||
] | ||
raw = '\n'.join(lines) | ||
run_humaneval_check(raw) | ||
|
||
def test_error_space_quote(self): | ||
lines = [ | ||
'```', | ||
' return x - int(x)', | ||
'```', | ||
] | ||
raw = '\n'.join(lines) | ||
run_humaneval_check(raw) | ||
|
||
def test_import_1(self): | ||
lines = [ | ||
'import numpy as np', | ||
'import math', | ||
'from typing import List', | ||
'', | ||
'def func(x):', | ||
' return x - int(x)', | ||
] | ||
raw = '\n'.join(lines) | ||
run_humaneval_check(raw) | ||
|
||
def test_import_2(self): | ||
lines = [ | ||
'from typing import List', | ||
'import numpy as np', | ||
'import math', | ||
'def func(x):', | ||
' return x - int(x)', | ||
] | ||
raw = '\n'.join(lines) | ||
run_humaneval_check(raw) | ||
|
||
def test_import_3(self): | ||
lines = [ | ||
'import math', | ||
'', | ||
'', | ||
'def func(x):', | ||
' return x - int(x)', | ||
] | ||
raw = '\n'.join(lines) | ||
run_humaneval_check(raw) | ||
|
||
def test_comment(self): | ||
lines = [ | ||
'def func(x: float) -> float:', | ||
" '''", | ||
' blah blah blah', | ||
' blah blah blah', | ||
" '''", | ||
' return x - int(x)', | ||
] | ||
raw = '\n'.join(lines) | ||
run_humaneval_check(raw) | ||
|
||
def test_additional(self): | ||
lines = [ | ||
' return x - int(x)', | ||
'', | ||
'', | ||
'def func(x: float) -> float:', | ||
" '''", | ||
' blah blah blah', | ||
' blah blah blah', | ||
" '''", | ||
' return x - int(x)', | ||
] | ||
raw = '\n'.join(lines) | ||
run_humaneval_check(raw) |