-
Notifications
You must be signed in to change notification settings - Fork 519
/
api_extractor.go
141 lines (123 loc) · 4.15 KB
/
api_extractor.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
/*
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package helper
import (
"github.com/apache/incubator-devlake/errors"
"reflect"
"github.com/apache/incubator-devlake/models/common"
"github.com/apache/incubator-devlake/plugins/core"
"github.com/apache/incubator-devlake/plugins/core/dal"
)
// ApiExtractorArgs FIXME ...
type ApiExtractorArgs struct {
RawDataSubTaskArgs
Params interface{}
Extract func(row *RawData) ([]interface{}, errors.Error)
BatchSize int
}
// ApiExtractor helps you extract Raw Data from api responses to Tool Layer Data
// It reads rows from specified raw data table, and feed it into `Extract` handler
// you can return arbitrary tool layer entities in this handler, ApiExtractor would
// first delete old data by their RawDataOrigin information, and then perform a
// batch save for you.
type ApiExtractor struct {
*RawDataSubTask
args *ApiExtractorArgs
}
// NewApiExtractor creates a new ApiExtractor
func NewApiExtractor(args ApiExtractorArgs) (*ApiExtractor, errors.Error) {
// process args
rawDataSubTask, err := NewRawDataSubTask(args.RawDataSubTaskArgs)
if err != nil {
return nil, err
}
if args.BatchSize == 0 {
args.BatchSize = 500
}
return &ApiExtractor{
RawDataSubTask: rawDataSubTask,
args: &args,
}, nil
}
// Execute sub-task
func (extractor *ApiExtractor) Execute() errors.Error {
// load data from database
db := extractor.args.Ctx.GetDal()
log := extractor.args.Ctx.GetLogger()
clauses := []dal.Clause{
dal.From(extractor.table),
dal.Where("params = ?", extractor.params),
dal.Orderby("id ASC"),
}
count, err := db.Count(clauses...)
if err != nil {
return errors.Default.Wrap(err, "error getting count of clauses")
}
cursor, err := db.Cursor(clauses...)
if err != nil {
return errors.Default.Wrap(err, "error running DB query")
}
log.Info("get data from %s where params=%s and got %d", extractor.table, extractor.params, count)
defer cursor.Close()
row := &RawData{}
// batch save divider
RAW_DATA_ORIGIN := "RawDataOrigin"
divider := NewBatchSaveDivider(extractor.args.Ctx, extractor.args.BatchSize, extractor.table, extractor.params)
// prgress
extractor.args.Ctx.SetProgress(0, -1)
ctx := extractor.args.Ctx.GetContext()
// iterate all rows
for cursor.Next() {
select {
case <-ctx.Done():
return errors.Convert(ctx.Err())
default:
}
err = db.Fetch(cursor, row)
if err != nil {
return errors.Default.Wrap(err, "error fetching row")
}
results, err := extractor.args.Extract(row)
if err != nil {
return errors.Default.Wrap(err, "error calling plugin Extract implementation")
}
for _, result := range results {
// get the batch operator for the specific type
batch, err := divider.ForType(reflect.TypeOf(result))
if err != nil {
return errors.Default.Wrap(err, "error getting batch from result")
}
// set raw data origin field
origin := reflect.ValueOf(result).Elem().FieldByName(RAW_DATA_ORIGIN)
if origin.IsValid() && origin.IsZero() {
origin.Set(reflect.ValueOf(common.RawDataOrigin{
RawDataTable: extractor.table,
RawDataId: row.ID,
RawDataParams: row.Params,
}))
}
// records get saved into db when slots were max outed
err = batch.Add(result)
if err != nil {
return errors.Default.Wrap(err, "error adding result to batch")
}
extractor.args.Ctx.IncProgress(1)
}
extractor.args.Ctx.IncProgress(1)
}
// save the last batches
return divider.Close()
}
var _ core.SubTask = (*ApiExtractor)(nil)