-
Notifications
You must be signed in to change notification settings - Fork 1
/
object_streams.go
104 lines (88 loc) · 3.47 KB
/
object_streams.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
package file
import (
"bytes"
"fmt"
"strconv"
"github.com/benoitkugler/pdf/model"
"github.com/benoitkugler/pdf/reader/parser"
)
// parsed version of an object stream
type objectStream []parser.Object // with length N
// check the cache and process the given object stream number
func (ctx *context) processObjectStream(on int) (objectStream, error) {
if os, ok := ctx.xrefTable.objectStreams[on]; ok {
return os, nil
}
// process the object stream
entry, ok := ctx.xrefTable.objects[model.ObjIndirectRef{ObjectNumber: on}]
if !ok {
return nil, fmt.Errorf("missing object stream for reference %d", on)
}
streamHeader, err := ctx.parseStreamDictAt(entry.offset)
if err != nil {
return nil, fmt.Errorf("invalid stream at %d; %s", entry.offset, err)
}
filters, err := parser.ParseFilters(streamHeader.dict["Filter"], streamHeader.dict["DecodeParms"], ctx.resolve)
if err != nil {
return nil, fmt.Errorf("invalid object stream: %s", err)
}
lengthO, err := ctx.resolve(streamHeader.dict["Length"])
if err != nil {
return nil, fmt.Errorf("invalid object stream Length: %s", err)
}
length, ok := lengthO.(parser.Integer)
if !ok {
return nil, fmt.Errorf("invalid object stream Length: expected integer, got %T", lengthO)
}
// The generation number of an object stream and of any compressed object shall be zero.
decoded, err := ctx.decodeStreamContent(model.ObjIndirectRef{ObjectNumber: on}, filters, streamHeader.contentOffset, int(length))
if err != nil {
return nil, fmt.Errorf("invalid object stream: %s", err)
}
firstObjectOffset, ok := streamHeader.dict["First"].(parser.Integer)
if !ok {
return nil, fmt.Errorf("invalid object stream First: expected integer, got %T", streamHeader.dict["First"])
}
if int(firstObjectOffset) > len(decoded) {
return nil, fmt.Errorf("out of bounds object stream First: %d > %d", firstObjectOffset, len(decoded))
}
prolog := decoded[:firstObjectOffset]
// N pairs of integers separated by white space, where the first integer in each pair shall represent the object
// number of a compressed object and the second integer shall represent the byte offset in the decoded
// stream of that object, relative to the first object stored in the object stream, the value of the stream's first
// entry. The offsets shall be in increasing order.
// Note: The separator used in the prolog shall be white space but some PDF writers use 0x00.
prolog = bytes.ReplaceAll(prolog, []byte{0x00}, []byte{0x20})
fields := bytes.Fields(prolog)
if len(fields)%2 != 0 {
return nil, fmt.Errorf("odd number of fields (%d) in object stream prolog", len(fields))
}
offsets := make([]int, len(fields)/2)
for i := range offsets {
offsets[i], err = strconv.Atoi(string(fields[2*i+1]))
if err != nil {
return nil, fmt.Errorf("invalid object offset in object stream: %v", fields[2*i+1])
}
offsets[i] += int(firstObjectOffset)
if offsets[i] > len(decoded) {
return nil, fmt.Errorf("invalid object offset in object stream: %d", offsets[i])
}
}
objects := make(objectStream, len(offsets))
for i := range objects {
start, end := offsets[i], len(decoded)
if i+1 < len(offsets) {
end = offsets[i+1]
}
objects[i], err = parser.ParseObject(decoded[start:end])
if err != nil {
return nil, fmt.Errorf("invalid object in object stream: %s", err)
}
}
if _, has := streamHeader.dict["Extents"]; has {
return nil, fmt.Errorf("unsupported Extents in object stream")
}
// cache it
ctx.xrefTable.objectStreams[on] = objects
return objects, nil
}