Skip to content

Commit

Permalink
Merge pull request #139 from allenai/hamishivi-aeval-06
Browse files Browse the repository at this point in the history
Bump AlpacaEval to 0.6, add AlpacaEval 2
  • Loading branch information
hamishivi committed May 25, 2024
2 parents aa29913 + 6f43970 commit c857718
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 7 deletions.
3 changes: 0 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,6 @@ RUN pip install ai2-olmo
# NLTK download
RUN python -m nltk.downloader punkt

# Use v1 of alpaca eval.
ENV IS_ALPACA_EVAL_2=False

COPY open_instruct open_instruct
COPY eval eval
COPY ds_configs ds_configs
Expand Down
2 changes: 0 additions & 2 deletions eval/alpaca_farm/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ def main(args):
df_leaderboard, annotations = alpaca_farm_evaluate(
model_outputs=model_results,
reference_outputs=args.reference_path,
annotators_config="alpaca_eval_gpt4",
output_path=args.save_dir,
is_return_instead_of_print=True,
caching_path=os.path.join(args.save_dir, "alpaca_eval_annotator_cache.json"),
Expand All @@ -119,7 +118,6 @@ def main(args):
else:
df_leaderboard, annotations = alpaca_farm_evaluate(
model_outputs=model_results,
annotators_config="alpaca_eval_gpt4",
output_path=args.save_dir,
is_return_instead_of_print=True,
caching_path=os.path.join(args.save_dir, "alpaca_eval_annotator_cache.json"),
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ einops
flash-attn==2.2.2
auto-gptq
fire
alpaca-eval==0.5.3
alpaca-eval==0.6
# for human eval web app
flask
vllm
Expand Down
13 changes: 12 additions & 1 deletion scripts/submit_eval_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def adjust_gpus(task_spec, experiment_group, model_name, gpu_multiplier):
"toxigen",
"xstest",
"alpaca_eval",
"alpaca_eval_2",
]
experiment_groups = args.experiments or experiment_groups_default

Expand Down Expand Up @@ -348,7 +349,17 @@ def adjust_gpus(task_spec, experiment_group, model_name, gpu_multiplier):
'''
elif experiment_group == "alpaca_eval":
task_spec['arguments'][0] = '''
python -m eval.alpaca_farm.run_eval \
IS_ALPACA_EVAL_2=False python -m eval.alpaca_farm.run_eval \
--use_vllm \
--model_name_or_path /model \
--tokenizer_name_or_path /model \
--save_dir /output/ \
--use_chat_format \
--chat_formatting_function eval.templates.create_prompt_with_tulu_chat_format
'''
elif experiment_group == "alpaca_eval_2":
task_spec['arguments'][0] = '''
IS_ALPACA_EVAL_2=True python -m eval.alpaca_farm.run_eval \
--use_vllm \
--model_name_or_path /model \
--tokenizer_name_or_path /model \
Expand Down

0 comments on commit c857718

Please sign in to comment.