-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
file.go
152 lines (134 loc) · 5.14 KB
/
file.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
// Copyright 2016 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bigquery
import (
"io"
bq "google.golang.org/api/bigquery/v2"
)
// A ReaderSource is a source for a load operation that gets
// data from an io.Reader.
//
// When a ReaderSource is part of a LoadConfig obtained via Job.Config,
// its internal io.Reader will be nil, so it cannot be used for a
// subsequent load operation.
type ReaderSource struct {
r io.Reader
FileConfig
}
// NewReaderSource creates a ReaderSource from an io.Reader. You may
// optionally configure properties on the ReaderSource that describe the
// data being read, before passing it to Table.LoaderFrom.
func NewReaderSource(r io.Reader) *ReaderSource {
return &ReaderSource{r: r}
}
func (r *ReaderSource) populateLoadConfig(lc *bq.JobConfigurationLoad) io.Reader {
r.FileConfig.populateLoadConfig(lc)
return r.r
}
// FileConfig contains configuration options that pertain to files, typically
// text files that require interpretation to be used as a BigQuery table. A
// file may live in Google Cloud Storage (see GCSReference), or it may be
// loaded into a table via the Table.LoaderFromReader.
type FileConfig struct {
// SourceFormat is the format of the data to be read.
// Allowed values are: Avro, CSV, DatastoreBackup, JSON, ORC, and Parquet. The default is CSV.
SourceFormat DataFormat
// Indicates if we should automatically infer the options and
// schema for CSV and JSON sources.
AutoDetect bool
// MaxBadRecords is the maximum number of bad records that will be ignored
// when reading data.
MaxBadRecords int64
// IgnoreUnknownValues causes values not matching the schema to be
// tolerated. Unknown values are ignored. For CSV this ignores extra values
// at the end of a line. For JSON this ignores named values that do not
// match any column name. If this field is not set, records containing
// unknown values are treated as bad records. The MaxBadRecords field can
// be used to customize how bad records are handled.
IgnoreUnknownValues bool
// Schema describes the data. It is required when reading CSV or JSON data,
// unless the data is being loaded into a table that already exists.
Schema Schema
// Additional options for CSV files.
CSVOptions
// Additional options for Parquet files.
ParquetOptions *ParquetOptions
}
func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
conf.SkipLeadingRows = fc.SkipLeadingRows
conf.SourceFormat = string(fc.SourceFormat)
conf.Autodetect = fc.AutoDetect
conf.AllowJaggedRows = fc.AllowJaggedRows
conf.AllowQuotedNewlines = fc.AllowQuotedNewlines
conf.Encoding = string(fc.Encoding)
conf.FieldDelimiter = fc.FieldDelimiter
conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
conf.MaxBadRecords = fc.MaxBadRecords
if fc.Schema != nil {
conf.Schema = fc.Schema.toBQ()
}
if fc.ParquetOptions != nil {
conf.ParquetOptions = &bq.ParquetOptions{
EnumAsString: fc.ParquetOptions.EnumAsString,
EnableListInference: fc.ParquetOptions.EnableListInference,
}
}
conf.Quote = fc.quote()
}
func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) {
fc.SourceFormat = DataFormat(conf.SourceFormat)
fc.AutoDetect = conf.Autodetect
fc.MaxBadRecords = conf.MaxBadRecords
fc.IgnoreUnknownValues = conf.IgnoreUnknownValues
fc.Schema = bqToSchema(conf.Schema)
fc.SkipLeadingRows = conf.SkipLeadingRows
fc.AllowJaggedRows = conf.AllowJaggedRows
fc.AllowQuotedNewlines = conf.AllowQuotedNewlines
fc.Encoding = Encoding(conf.Encoding)
fc.FieldDelimiter = conf.FieldDelimiter
fc.CSVOptions.setQuote(conf.Quote)
}
func (fc *FileConfig) populateExternalDataConfig(conf *bq.ExternalDataConfiguration) {
format := fc.SourceFormat
if format == "" {
// Format must be explicitly set for external data sources.
format = CSV
}
conf.Autodetect = fc.AutoDetect
conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
conf.MaxBadRecords = fc.MaxBadRecords
conf.SourceFormat = string(format)
if fc.Schema != nil {
conf.Schema = fc.Schema.toBQ()
}
if format == CSV {
fc.CSVOptions.populateExternalDataConfig(conf)
}
if fc.ParquetOptions != nil {
conf.ParquetOptions = &bq.ParquetOptions{
EnumAsString: fc.ParquetOptions.EnumAsString,
EnableListInference: fc.ParquetOptions.EnableListInference,
}
}
}
// Encoding specifies the character encoding of data to be loaded into BigQuery.
// See https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding
// for more details about how this is used.
type Encoding string
const (
// UTF_8 specifies the UTF-8 encoding type.
UTF_8 Encoding = "UTF-8"
// ISO_8859_1 specifies the ISO-8859-1 encoding type.
ISO_8859_1 Encoding = "ISO-8859-1"
)