-
Notifications
You must be signed in to change notification settings - Fork 4
/
sync.go
311 lines (299 loc) · 8.57 KB
/
sync.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
package dataset
import (
"fmt"
"log"
"strings"
// Caltech Library Packages
"github.com/caltechlibrary/dataset/tbl"
"github.com/caltechlibrary/dotpath"
)
// findLabel looks through an array of string for a specific label
func findLabel(labels []string, label string) (int, bool) {
for pos, val := range labels {
if val == label {
return pos, true
}
}
return -1, false
}
// strInArray checks to see if val is in an array of strings
func strInArray(a []string, val string) bool {
for _, item := range a {
if item == val {
return true
}
}
return false
}
// mergeKeys takes a keyList and an unordered list of keys
// appending the missing keys to the end of keyList
func mergeKeys(sorted []string, unsorted []string) []string {
newKeys := []string{}
for _, key := range unsorted {
if strInArray(sorted, key) == false {
newKeys = append(newKeys, key)
}
}
if len(newKeys) > 0 {
sorted = append(sorted, newKeys...)
}
return sorted
}
// labelsToHeaderRow checks the labels of a frame to make sure
// all labels are in table's header row. If not it appends the
// missing columns to the end of the header row and returns
// new header row and true if a change is needed.
func labelsToHeaderRow(f *DataFrame, table [][]interface{}) ([]string, bool) {
changed := false
header := []string{}
for i, cell := range table[0] {
val, err := tbl.ValueInterfaceToString(cell)
if err == nil {
header = append(header, val)
} else {
header = append(header, fmt.Sprintf(fmtColumnName, i+1))
changed = true
}
}
for _, label := range f.Labels {
if strInArray(header, label) == false {
header = append(header, label)
changed = true
}
}
return header, changed
}
// dotPathToColumnMap creates a mapping from dotpath in collection
// to column number in table by matching header row values with
// a frame's labels. Returns an error if ._Key is not identified.
func dotPathToColumnMap(f *DataFrame, table [][]interface{}) (map[string]int, error) {
colMap := make(map[string]int)
if len(f.Labels) != len(f.DotPaths) {
return colMap, fmt.Errorf("corrupted frame, labels don't map to dot paths")
}
if len(table) < 2 {
return colMap, fmt.Errorf("table is empty")
}
// Find key column
keyCol := -1
for i, col := range f.DotPaths {
if col == "._Key" {
keyCol = i
break
}
}
if keyCol < 0 {
return nil, fmt.Errorf("Can't indentify key column")
}
// Work from the header row of table.
for i, col := range table[0] {
// Get each column's label
label, err := tbl.ValueInterfaceToString(col)
if err == nil && strings.TrimSpace(label) != "" {
// Find label then DotPaths index pos.
// Write an index of Dotpath to column no.
if pos, hasLabel := findLabel(f.Labels, label); hasLabel == true {
// Find the dotpath matching the label
dotPath := f.DotPaths[pos]
// Map the dotpath to a column number
colMap[dotPath] = i
}
}
}
return colMap, nil
}
// rowToObj assembles a new JSON object from map into row and row values
// BUG: This is a naive map assumes all root level object properties
func rowToObj(key string, dotPathToCols map[string]int, row []interface{}) map[string]interface{} {
obj := map[string]interface{}{}
for p, i := range dotPathToCols {
if i < len(row) {
attrName := strings.TrimPrefix(p, ".")
obj[attrName] = row[i]
}
}
obj["_Key"] = key
return obj
}
// hasKey takes a list of keys (string) and sees if key is in list
func hasKey(keys []string, key string) bool {
for _, item := range keys {
if item == key {
return true
}
}
return false
}
// MergeIntoTable - uses a DataFrame associated in the collection
// to map attributes into table appending new content and optionally
// overwriting existing content for rows with matching ids. Returns
// a new table (i.e. [][]interface{}) or error.
func (c *Collection) MergeIntoTable(frameName string, table [][]interface{}, overwrite bool, verbose bool) ([][]interface{}, error) {
// Build Map dotpath to column position
//
// For each data row of table (i.e. row 1 through last row)
// get ID value
// if has ID && overwrite == true then replace cells values
// else save id for append to table
// Update table
f, err := c.getFrame(frameName)
if err != nil {
return table, err
}
// Makesure we have a header that supports all the Frame's
// dotPaths and label
headerRow, changed := labelsToHeaderRow(f, table)
if changed {
table[0] = tbl.RowStringToInterface(headerRow)
}
// Based on table's new header, calc the map of dotpath to
// column no.
colMap, err := dotPathToColumnMap(f, table)
if err != nil {
return table, err
}
dotPaths := f.DotPaths
keyCol, _ := colMap["._Key"]
key := ""
tableKeys := []string{}
for i, row := range table {
//NOTE: we skip the header row
if i == 0 {
continue
}
// Get ID from row
if keyCol >= 0 && keyCol < len(row) {
key, err = tbl.ValueInterfaceToString(row[keyCol])
if err == nil && key != "" {
// collect the tables' row keys
tableKeys = append(tableKeys, key)
} else {
if verbose {
log.Printf("skipping row %d, invalid key found in column %d, %+v, %T", i, keyCol, row[keyCol], row[keyCol])
}
continue
}
} else {
if verbose {
log.Printf("skipping row %d, no key found in column %d", i, keyCol)
}
continue
}
if c.KeyExists(key) {
// Pad cells in row if necessary
for i := len(row); i < len(headerRow); i++ {
row = append(row, "")
}
obj := map[string]interface{}{}
err := c.Read(key, obj, false)
if err != nil {
return table, fmt.Errorf("Can't read %s from row %d in collection", key, i)
}
// For each row replace cells in dotPath map to column number
for _, p := range dotPaths {
//NOTE: need to do this in order, so iterate over
// f.DotPaths then get j from map m.
j, ok := colMap[p]
if ok == false {
continue
}
val, err := dotpath.Eval(p, obj)
if err == nil {
row[j] = val
}
}
// update row in table
table[i] = row
} else if verbose {
log.Printf("skipping row %d, key %s not found in collection %s", i, key, c.Name)
}
}
// Append rows to table if needed
for _, key := range f.Keys {
if hasKey(tableKeys, key) == false {
// Generate a row to add
row := make([]interface{}, len(headerRow)-1)
// Get the data for the row
obj := map[string]interface{}{}
err = c.Read(key, obj, false)
if err != nil {
return table, fmt.Errorf("failed to read %q in %s, %s", key, c.Name, err)
}
// For each row replace cells in dotPath map to column number
for p, j := range colMap {
val, err := dotpath.Eval(p, obj)
if err == nil {
// Pad cells in row if necessary
for j >= len(row) {
row = append(row, nil)
}
row[j] = val
}
}
table = append(table, row)
}
}
return table, nil
}
// MergeFromTable - uses a DataFrame associated in the collection
// to map columns from a table into JSON object attributes saving the
// JSON object in the collection. If overwrite is true then JSON objects
// for matching keys will be updated, if false only new objects will be
// added to collection. Returns an error value
func (c *Collection) MergeFromTable(frameName string, table [][]interface{}, overwrite bool, verbose bool) error {
// Build Map dotpath to column position
//
// For each data row of table (i.e. row 1 through last row)
// get ID value
// if has ID && overwrite == true then join with overwrite
// else if has ID then join (append)
// else add object to collection
// Regenerate the frame
f, err := c.getFrame(frameName)
if err != nil {
return err
}
colMap, err := dotPathToColumnMap(f, table)
if err != nil {
return err
}
keyCol, ok := colMap["._Key"]
if ok == false || keyCol < 0 {
return fmt.Errorf("Missing key column in table")
}
key := ""
keys := []string{}
for i, row := range table[1:] {
// get Key
if keyCol < len(row) {
key, err = tbl.ValueInterfaceToString(row[keyCol])
if err != nil || key == "" {
if verbose {
log.Printf("skipping row %d, invalid key found in column %d, %+v, %T, %s", i+2, keyCol, row[keyCol], row[keyCol], err)
}
continue
}
obj := rowToObj(key, colMap, row)
if c.KeyExists(key) {
// Update collection, and get merged object.
if err := c.Join(key, obj, overwrite); err != nil {
return err
}
err = c.Read(key, obj, false)
} else {
err = c.Create(key, obj)
}
if err != nil {
return err
}
// Update f.ObjectMap
f.ObjectMap[key] = obj
keys = append(keys, key)
}
}
// Update the frame's keys
f.Keys = mergeKeys(f.Keys, keys)
// Save Frame so it can be regenerated later
err = c.setFrame(frameName, f)
return err
}