generated from ipfs/ipfs-repository-template
-
Notifications
You must be signed in to change notification settings - Fork 95
/
parse.go
114 lines (101 loc) · 3.2 KB
/
parse.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
package chunk
import (
"errors"
"fmt"
"io"
"strconv"
"strings"
)
const (
// DefaultBlockSize is the chunk size that splitters produce (or aim to).
DefaultBlockSize int64 = 1024 * 256
// No leaf block should contain more than 1MiB of payload data ( wrapping overhead aside )
// This effectively mandates the maximum chunk size
// See discussion at https://github.com/ipfs/boxo/chunker/pull/21#discussion_r369124879 for background
ChunkSizeLimit int = 1048576
)
var (
ErrRabinMin = errors.New("rabin min must be greater than 16")
ErrSize = errors.New("chunker size must be greater than 0")
ErrSizeMax = fmt.Errorf("chunker parameters may not exceed the maximum chunk size of %d", ChunkSizeLimit)
)
// FromString returns a Splitter depending on the given string:
// it supports "default" (""), "size-{size}", "rabin", "rabin-{blocksize}",
// "rabin-{min}-{avg}-{max}" and "buzhash".
func FromString(r io.Reader, chunker string) (Splitter, error) {
switch {
case chunker == "" || chunker == "default":
return DefaultSplitter(r), nil
case strings.HasPrefix(chunker, "size-"):
sizeStr := strings.Split(chunker, "-")[1]
size, err := strconv.Atoi(sizeStr)
if err != nil {
return nil, err
} else if size <= 0 {
return nil, ErrSize
} else if size > ChunkSizeLimit {
return nil, ErrSizeMax
}
return NewSizeSplitter(r, int64(size)), nil
case strings.HasPrefix(chunker, "rabin"):
return parseRabinString(r, chunker)
case chunker == "buzhash":
return NewBuzhash(r), nil
default:
return nil, fmt.Errorf("unrecognized chunker option: %s", chunker)
}
}
func parseRabinString(r io.Reader, chunker string) (Splitter, error) {
parts := strings.Split(chunker, "-")
switch len(parts) {
case 1:
return NewRabin(r, uint64(DefaultBlockSize)), nil
case 2:
size, err := strconv.Atoi(parts[1])
if err != nil {
return nil, err
} else if int(float32(size)*1.5) > ChunkSizeLimit { // FIXME - this will be addressed in a subsequent PR
return nil, ErrSizeMax
}
return NewRabin(r, uint64(size)), nil
case 4:
sub := strings.Split(parts[1], ":")
if len(sub) > 1 && sub[0] != "min" {
return nil, errors.New("first label must be min")
}
min, err := strconv.Atoi(sub[len(sub)-1])
if err != nil {
return nil, err
}
if min < 16 {
return nil, ErrRabinMin
}
sub = strings.Split(parts[2], ":")
if len(sub) > 1 && sub[0] != "avg" {
log.Error("sub == ", sub)
return nil, errors.New("second label must be avg")
}
avg, err := strconv.Atoi(sub[len(sub)-1])
if err != nil {
return nil, err
}
sub = strings.Split(parts[3], ":")
if len(sub) > 1 && sub[0] != "max" {
return nil, errors.New("final label must be max")
}
max, err := strconv.Atoi(sub[len(sub)-1])
if err != nil {
return nil, err
}
if min >= avg {
return nil, errors.New("incorrect format: rabin-min must be smaller than rabin-avg")
} else if avg >= max {
return nil, errors.New("incorrect format: rabin-avg must be smaller than rabin-max")
} else if max > ChunkSizeLimit {
return nil, ErrSizeMax
}
return NewRabinMinMax(r, uint64(min), uint64(avg), uint64(max)), nil
default:
return nil, errors.New("incorrect format (expected 'rabin' 'rabin-[avg]' or 'rabin-[min]-[avg]-[max]'")
}
}