Skip to content

Commit

Permalink
Increase aws s3 cp chunksize default
Browse files Browse the repository at this point in the history
  • Loading branch information
brietaylor committed Apr 8, 2020
1 parent 3949c2b commit ea2df8f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
9 changes: 9 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@ RUN echo 'alias serratus="sudo -u serratus"' >> /root/.bashrc

WORKDIR /home/serratus

# Increase the default chunksize for `aws s3 cp`. By default it is 8MB,
# which results in a very high number of PUT and POST requests. These
# numbers have NOT been experimented on, but chosen to be just below the
# max size for a single-part upload (5GB). I haven't pushed it higher
# because I don't want to test the edge cases where a filesize is around
# the part limit.
RUN aws configure set default.s3.multipart_threshold 4GB \
&& aws configure set default.s3.multipart_chunksize 4GB

#==========================================================
# ENTRYPOINT ==============================================
#==========================================================
Expand Down
9 changes: 8 additions & 1 deletion scripts/serratus-dl/s3_cp_formatted.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,11 @@ N=$(expr "$2" - 1 || true) # Parallel is one based. :/

DEST=$(printf "$FMT" "$N")
shift 2
aws s3 cp - "$DEST" "$@" <&0

# Create a temporary file to hold the stdin data. We used to stream this through
# `aws s3 cp`, but it crashes when using a large chunksize combined with streaming.
TEMPFILE="$(basename "$DEST")"
cat - > "$TEMPFILE"
trap 'rm -f "$TEMPFILE"' EXIT

aws s3 cp "$TEMPFILE" "$DEST" "$@"

0 comments on commit ea2df8f

Please sign in to comment.