Skip to content

Commit

Permalink
improved part algo
Browse files Browse the repository at this point in the history
  • Loading branch information
rem7 committed Feb 12, 2024
1 parent 84df3b5 commit 73a2d16
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 13 deletions.
4 changes: 2 additions & 2 deletions mem_concat.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func buildInMemoryConcat(ctx context.Context, client *s3.Client, objectList []*S

sizeLimit := findMinimumPartSize(estimatedSize, opts.UserMaxPartSize)

Infof(ctx, "mpu partsize: %s, estimated ram usage: %s\nlargestObject: %d\n", formatBytes(sizeLimit), formatBytes(sizeLimit*int64(threads)*3), largestObjectSize)
Infof(ctx, "mpu partsize: %s, largestObject: %d\n", formatBytes(sizeLimit), largestObjectSize)

// TODO: fix TOC to be pre-appended
// tocObj, _, err := buildToc(ctx, objectList)
Expand All @@ -42,7 +42,7 @@ func buildInMemoryConcat(ctx context.Context, client *s3.Client, objectList []*S

groups := splitSliceBySizeLimit(sizeLimit, objectList)
if len(groups) > maxPartNumLimit {
return nil, fmt.Errorf("number of parts exceeded the number of mpu parts allowed\n")
return nil, fmt.Errorf("number of parts (%d) exceeded the number of mpu parts allowed (10k)\n", len(groups))
}

Infof(ctx, "number of parts: %d\n", len(groups))
Expand Down
12 changes: 1 addition & 11 deletions s3tar.go
Original file line number Diff line number Diff line change
Expand Up @@ -599,16 +599,10 @@ func _processSmallFiles(ctx context.Context, objectList []*S3Obj, start, end int

// findMinimumPartSize is for the case when we want to optimize as many parts
// as possible. This is helpful to parallelize the workload even more.
// findMinimumPartSize will start at 10MB and increment by 5MB until we're
// findMinimumPartSize will start at 5MB and increment by 5MB until we're
// within the 10,000 MPU part limit
func findMinimumPartSize(finalSizeBytes, userMaxSize int64) int64 {

if userMaxSize == 0 {
userMaxSize = partSizeMax
} else {
userMaxSize = userMaxSize * 1024 * 1024
}

const fiveMB = beginningPad
partSize := int64(fiveMB)

Expand All @@ -618,10 +612,6 @@ func findMinimumPartSize(finalSizeBytes, userMaxSize int64) int64 {
}
}

if partSize > userMaxSize {
partSize = userMaxSize
}

if partSize > partSizeMax {
log.Fatal("part size maximum cannot exceed 5GiB")
}
Expand Down

0 comments on commit 73a2d16

Please sign in to comment.