From 73a2d1675b2e328c9bf47c00fcbab0db4ec84250 Mon Sep 17 00:00:00 2001 From: Yanko Bolanos Date: Mon, 12 Feb 2024 11:09:36 -0800 Subject: [PATCH] improved part algo --- mem_concat.go | 4 ++-- s3tar.go | 12 +----------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/mem_concat.go b/mem_concat.go index cbbc0ea..2de20bf 100644 --- a/mem_concat.go +++ b/mem_concat.go @@ -31,7 +31,7 @@ func buildInMemoryConcat(ctx context.Context, client *s3.Client, objectList []*S sizeLimit := findMinimumPartSize(estimatedSize, opts.UserMaxPartSize) - Infof(ctx, "mpu partsize: %s, estimated ram usage: %s\nlargestObject: %d\n", formatBytes(sizeLimit), formatBytes(sizeLimit*int64(threads)*3), largestObjectSize) + Infof(ctx, "mpu partsize: %s, largestObject: %d\n", formatBytes(sizeLimit), largestObjectSize) // TODO: fix TOC to be pre-appended // tocObj, _, err := buildToc(ctx, objectList) @@ -42,7 +42,7 @@ func buildInMemoryConcat(ctx context.Context, client *s3.Client, objectList []*S groups := splitSliceBySizeLimit(sizeLimit, objectList) if len(groups) > maxPartNumLimit { - return nil, fmt.Errorf("number of parts exceeded the number of mpu parts allowed\n") + return nil, fmt.Errorf("number of parts (%d) exceeded the number of mpu parts allowed (10k)\n", len(groups)) } Infof(ctx, "number of parts: %d\n", len(groups)) diff --git a/s3tar.go b/s3tar.go index a0ade90..805953f 100644 --- a/s3tar.go +++ b/s3tar.go @@ -599,16 +599,10 @@ func _processSmallFiles(ctx context.Context, objectList []*S3Obj, start, end int // findMinimumPartSize is for the case when we want to optimize as many parts // as possible. This is helpful to parallelize the workload even more. -// findMinimumPartSize will start at 10MB and increment by 5MB until we're +// findMinimumPartSize will start at 5MB and increment by 5MB until we're // within the 10,000 MPU part limit func findMinimumPartSize(finalSizeBytes, userMaxSize int64) int64 { - if userMaxSize == 0 { - userMaxSize = partSizeMax - } else { - userMaxSize = userMaxSize * 1024 * 1024 - } - const fiveMB = beginningPad partSize := int64(fiveMB) @@ -618,10 +612,6 @@ func findMinimumPartSize(finalSizeBytes, userMaxSize int64) int64 { } } - if partSize > userMaxSize { - partSize = userMaxSize - } - if partSize > partSizeMax { log.Fatal("part size maximum cannot exceed 5GiB") }