forked from actgardner/gogen-avro
/
reader.go
123 lines (105 loc) · 2.89 KB
/
reader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package container
import (
"bytes"
"compress/flate"
"fmt"
"io"
"github.com/golang/snappy"
"github.com/fortelabsinc/gogen-avro/v10/container/avro"
"github.com/fortelabsinc/gogen-avro/v10/schema"
)
// Reader is a low-level primitive for reading the OCF framing of a file.
// Generally you can create a Reader using the `New<RecordType>Reader` method generate for every record type.
type Reader struct {
codec Codec
reader io.Reader
compressedReader io.Reader
schemaBytes []byte
schema schema.AvroType
sync avro.Sync
}
func NewReader(r io.Reader) (*Reader, error) {
header, err := avro.DeserializeAvroContainerHeader(r)
if err != nil {
return nil, err
}
if bytes.Equal(header.Magic[:], []byte{'o', 'b', 'j', 1}) {
return nil, fmt.Errorf("Unexpected magic in header - %v", header.Magic)
}
schemaBytes, ok := header.Meta["avro.schema"]
if !ok {
return nil, fmt.Errorf("Expected avro.schema in header, not specified in metadata map - %v", header.Meta)
}
log("Got OCF schema from header: %v", string(schemaBytes))
codec, ok := header.Meta["avro.codec"]
if !ok {
log("Expected avro.codec in header, not specified in metadata map. assuming 'null' for Codec", header.Meta)
codec = []byte("null")
}
log("Got OCF codec from header: %v", string(codec))
return &Reader{
codec: Codec(codec),
reader: r,
schemaBytes: schemaBytes,
compressedReader: nil,
schema: nil,
sync: header.Sync,
}, nil
}
func (r *Reader) AvroContainerSchema() []byte {
return r.schemaBytes
}
func (r *Reader) Read(b []byte) (n int, err error) {
if r.compressedReader == nil {
log("OCF reader opening new block")
if err := r.openBlock(); err != nil {
return 0, err
}
}
for {
n, err := r.compressedReader.Read(b)
log("OCF container read: %v %v", n, err)
if n > 0 {
return n, nil
}
if err == io.EOF {
log("OCF EOF, opening new block")
if err := r.openBlock(); err != nil {
return 0, err
}
continue
}
return n, err
}
}
func (r *Reader) openBlock() error {
header, err := avro.DeserializeAvroContainerBlock(r.reader)
if err != nil {
return err
}
log("OCF block size: %v", len(header.RecordBytes))
if header.Sync != r.sync {
return fmt.Errorf("Unexpected sync marker %q, expected %q", header.Sync, r.sync)
}
blockBuffer := bytes.NewBuffer(header.RecordBytes)
switch r.codec {
case Null:
r.compressedReader = blockBuffer
break
case Deflate:
r.compressedReader = flate.NewReader(blockBuffer)
break
case Snappy:
// TODO: Check the last 4 bytes are the big-endian CRC of the compressed Snappy block
dst := make([]byte, 0, 0)
dst, err := snappy.Decode(nil, header.RecordBytes[:len(header.RecordBytes)-4])
if err != nil {
return err
}
r.compressedReader = bytes.NewBuffer(dst)
break
default:
return fmt.Errorf("Unexpected codec %q", r.codec)
}
return nil
}