Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Add support for running preemptible workloads on beaker (#3143)
Browse files Browse the repository at this point in the history
* Add support for running preemptible workloads on beaker

* Fix boolean

* Move resumable_train script to scripts/ai2-internal
  • Loading branch information
Vivek Lakshmanan committed Aug 13, 2019
1 parent 0bd3319 commit fa1ff67
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 0 deletions.
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ RUN ./scripts/cache_models.py
ARG SOURCE_COMMIT
ENV ALLENNLP_SOURCE_COMMIT $SOURCE_COMMIT

# Copy wrapper script to allow beaker to run resumable training workloads.
COPY scripts/ai2-internal/resumable_train.sh /stage/allennlp

LABEL maintainer="allennlp-contact@allenai.org"

EXPOSE 8000
Expand Down
3 changes: 3 additions & 0 deletions Dockerfile.pip
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ RUN if [ ! -z "$VERSION" ]; \
else echo "Installing the latest pip release of allennlp"; pip install allennlp; \
fi

# Copy wrapper script to allow beaker to run resumable training workloads.
COPY scripts/ai2-internal/resumable_train.sh /stage/allennlp

LABEL maintainer="allennlp-contact@allenai.org"

ENV ALLENNLP_VERSION=$VERSION
Expand Down
20 changes: 20 additions & 0 deletions scripts/ai2-internal/resumable_train.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

# Dispatches to allennlp train. Recovers if the serialization directory is
# found and is non-empty, trains from scratch otherwise.
#
# Usage:
# resumable_train.sh serialization_dir [train_arg ...]

serialization_dir=$1
shift

# If $serialization_dir exists and is non-empty we are resuming
if [ -d $serialization_dir ] && [ "$(ls -A $serialization_dir)" ]; then
echo "Recovering state from $serialization_dir"
allennlp train -r -s $serialization_dir $@
else
echo "No recovery state found. Starting from scratch."
allennlp train -s $serialization_dir $@
fi

6 changes: 6 additions & 0 deletions scripts/ai2-internal/run_with_beaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ def main(param_file: str, args: argparse.Namespace):
"--file-friendly-logging"
]

if args.preemptible:
allennlp_command = [ "/stage/allennlp/resumable_train.sh", "/output", "/config.json", "--file-friendly-logging" ]

dataset_mounts = []
for source in args.source + [f"{config_dataset_id}:/config.json"]:
datasetId, containerPath = source.split(":")
Expand All @@ -88,6 +91,8 @@ def main(param_file: str, args: argparse.Namespace):
requirements["memory"] = args.memory
if args.gpu_count:
requirements["gpuCount"] = int(args.gpu_count)
if args.preemptible:
requirements["preemptible"] = True
config_spec = {
"description": args.desc,
"image": image,
Expand Down Expand Up @@ -136,6 +141,7 @@ def main(param_file: str, args: argparse.Namespace):
parser.add_argument('--cpu', help='CPUs to reserve for this experiment (e.g., 0.5)')
parser.add_argument('--gpu-count', default=1, help='GPUs to use for this experiment (e.g., 1 (default))')
parser.add_argument('--memory', help='Memory to reserve for this experiment (e.g., 1GB)')
parser.add_argument('--preemptible', action='store_true', help='Allow task to run on preemptible hardware')

args = parser.parse_args()

Expand Down

0 comments on commit fa1ff67

Please sign in to comment.