-
Notifications
You must be signed in to change notification settings - Fork 3.4k
/
writer.go
101 lines (86 loc) · 2.73 KB
/
writer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package arrjson
import (
"fmt"
"io"
"github.com/apache/arrow/go/v13/arrow"
"github.com/apache/arrow/go/v13/arrow/array"
"github.com/apache/arrow/go/v13/arrow/arrio"
"github.com/apache/arrow/go/v13/arrow/internal/dictutils"
"github.com/apache/arrow/go/v13/internal/json"
)
const (
jsonIndent = " "
jsonPrefix = " "
jsonRecPrefix = " "
)
type rawJSON struct {
Schema Schema `json:"schema"`
Records []Record `json:"batches"`
Dictionaries []Dictionary `json:"dictionaries,omitempty"`
}
type Writer struct {
w io.Writer
nrecs int64
raw rawJSON
mapper dictutils.Mapper
}
func NewWriter(w io.Writer, schema *arrow.Schema) (*Writer, error) {
ww := &Writer{
w: w,
}
ww.mapper.ImportSchema(schema)
ww.raw.Schema = schemaToJSON(schema, &ww.mapper)
ww.raw.Records = make([]Record, 0)
return ww, nil
}
func (w *Writer) Write(rec arrow.Record) error {
if w.nrecs == 0 {
pairs, err := dictutils.CollectDictionaries(rec, &w.mapper)
if err != nil {
return err
}
if len(pairs) > 0 {
w.raw.Dictionaries = make([]Dictionary, 0, len(pairs))
}
for _, p := range pairs {
defer p.Dict.Release()
sc := arrow.NewSchema([]arrow.Field{{Name: fmt.Sprintf("DICT%d", p.ID), Type: p.Dict.DataType(), Nullable: true}}, nil)
dummy := array.NewRecord(sc, []arrow.Array{p.Dict}, int64(p.Dict.Len()))
defer dummy.Release()
w.raw.Dictionaries = append(w.raw.Dictionaries, Dictionary{ID: p.ID, Data: recordToJSON(dummy)})
}
}
w.raw.Records = append(w.raw.Records, recordToJSON(rec))
w.nrecs++
return nil
}
func (w *Writer) Close() error {
if w.w == nil {
return nil
}
enc := json.NewEncoder(w.w)
enc.SetIndent("", jsonIndent)
// ensure that we don't convert <, >, !, etc. to their unicode equivalents
// in the output json since we're not using this in an HTML context so that
// we can make sure that the json files match.
enc.SetEscapeHTML(false)
return enc.Encode(w.raw)
}
var (
_ arrio.Writer = (*Writer)(nil)
)