-
Notifications
You must be signed in to change notification settings - Fork 4
/
rename-columns.go
144 lines (122 loc) · 3.91 KB
/
rename-columns.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
package row
import (
"context"
"github.com/go-go-golems/glazed/pkg/middlewares"
"github.com/go-go-golems/glazed/pkg/types"
"github.com/pkg/errors"
"gopkg.in/yaml.v3"
"regexp"
)
type RenameColumnMiddleware struct {
Renames map[types.FieldName]types.FieldName
// orderedmap *regexp.Regexp -> string
RegexpRenames RegexpReplacements
// renamedColumns keeps tracks of columns that are being renamed. To avoid
// going through all the Renames and RegexpRenames on every row,
// we cache affected columns in renamedColumns.
renamedColumns map[types.FieldName]types.FieldName
}
var _ middlewares.RowMiddleware = (*RenameColumnMiddleware)(nil)
func (r *RenameColumnMiddleware) Close(ctx context.Context) error {
return nil
}
func NewFieldRenameColumnMiddleware(renames map[types.FieldName]types.FieldName) *RenameColumnMiddleware {
return &RenameColumnMiddleware{
Renames: renames,
RegexpRenames: RegexpReplacements{},
renamedColumns: map[types.FieldName]types.FieldName{},
}
}
func NewRegexpRenameColumnMiddleware(renames RegexpReplacements) *RenameColumnMiddleware {
return &RenameColumnMiddleware{
Renames: map[types.FieldName]types.FieldName{},
RegexpRenames: renames,
renamedColumns: map[types.FieldName]types.FieldName{},
}
}
func NewRenameColumnMiddleware(
renames map[types.FieldName]types.FieldName,
regexpRenames RegexpReplacements,
) *RenameColumnMiddleware {
return &RenameColumnMiddleware{
Renames: renames,
RegexpRenames: regexpRenames,
renamedColumns: map[types.FieldName]types.FieldName{},
}
}
type RegexpReplacement struct {
Regexp *regexp.Regexp
Replacement string
}
type RegexpReplacements []*RegexpReplacement
func (rr *RegexpReplacements) UnmarshalYAML(value *yaml.Node) error {
if value.Kind != yaml.MappingNode {
return errors.Errorf("expected a mapping node, got %v", value.Kind)
}
*rr = RegexpReplacements{}
for i := 0; i < len(value.Content); i += 2 {
key := value.Content[i]
val := value.Content[i+1]
if key.Kind != yaml.ScalarNode {
return errors.Errorf("expected a scalar node, got %v", key.Kind)
}
if val.Kind != yaml.ScalarNode {
return errors.Errorf("expected a scalar node, got %v", val.Kind)
}
re, err := regexp.Compile(key.Value)
if err != nil {
return err
}
*rr = append(*rr, &RegexpReplacement{
Regexp: re,
Replacement: val.Value,
})
}
return nil
}
type ColumnMiddlewareConfig struct {
FieldRenames map[types.FieldName]types.FieldName `yaml:"renames"`
// FIXME regex renames actually need to ordered
RegexpRenames RegexpReplacements `yaml:"regexpRenames"`
}
func NewRenameColumnMiddlewareFromYAML(decoder *yaml.Decoder) (*RenameColumnMiddleware, error) {
var config ColumnMiddlewareConfig
err := decoder.Decode(&config)
if err != nil {
return nil, err
}
return NewRenameColumnMiddleware(config.FieldRenames, config.RegexpRenames), nil
}
// renameColumn takes a single column fields, looks up if it has already been processed previously,
// and otherwise runs it through the renames and regexp renames to compute the renamed column name.
func (r *RenameColumnMiddleware) renameColumn(
column types.FieldName,
) types.FieldName {
if rename, ok := r.renamedColumns[column]; ok {
return rename
}
// we run string renames first, as we consider them more exhaustive matches
for match, rename := range r.Renames {
if column == match {
r.renamedColumns[column] = rename
return rename
}
}
for _, rr := range r.RegexpRenames {
rename := rr.Regexp.ReplaceAllString(column, rr.Replacement)
if rename != column {
r.renamedColumns[column] = rename
return rename
}
}
r.renamedColumns[column] = column
return column
}
func (r *RenameColumnMiddleware) Process(ctx context.Context, row types.Row) ([]types.Row, error) {
newRow := types.NewRow()
for pair := row.Oldest(); pair != nil; pair = pair.Next() {
newRow.Set(r.renameColumn(pair.Key), pair.Value)
}
ret := []types.Row{newRow}
return ret, nil
}