-
Notifications
You must be signed in to change notification settings - Fork 0
/
Sculptor.go
161 lines (132 loc) · 4.94 KB
/
Sculptor.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
package sculptor
import (
"errors"
"github.com/esonhugh/sculptor/parser"
"github.com/esonhugh/sculptor/parser/query"
"sync"
"time"
)
// Func is alias of DataSculptor customFunc and fallbackFunc type
type Func func(*DataSculptor) error
// DocumentType is core struct in package.
// it contains all the information required to extract data from a document during runtime.
type DataSculptor struct {
// DocType is the type of document to be extracted
DocType DocumentType
// Filename is processed filename or path
Filename string
// docQueries is the list of DocumentQuery to be executed
docQueries []parser.DocumentQuery
// scanner is the scanner used docQueries to extract data from the document
scanner parser.RawDataParser
// targetStruct is the sample struct to need to filled with the extracted data
targetStruct any
// Wg set for goruntime if thread process.
Wg *sync.WaitGroup
// count is the number of records processed
count uint64
// ConstructedOutput is the channel to send the extracted data out
ConstructedOutput chan any
// lastErr is the last error occurred while processing the record. If nil keep process.
lastErr error
// fallbackFunc process if record is bad.
fallbackFunc []Func
// customFunc Hooks before send to channel.
customFunc []Func
// options make CustomOptions
options Options
}
// NewDataSculptor returns a new DataSculptor with initialized values
func NewDataSculptor(file string) *DataSculptor {
return NewDataSculptorWithOptionsAndWg(file, DefaultOptions, &sync.WaitGroup{})
}
// NewDataSculptorWithWg returns a new DataSculptor with initialized values
func NewDataSculptorWithWg(file string, wg *sync.WaitGroup) *DataSculptor {
return NewDataSculptorWithOptionsAndWg(file, DefaultOptions, wg)
}
// NewDataSculptorWithOptionsAndWg returns a new DataSculptor with initialized values
func NewDataSculptorWithOptionsAndWg(file string, o Options, wg *sync.WaitGroup) *DataSculptor {
return &DataSculptor{
Wg: wg,
Filename: file,
ConstructedOutput: make(chan any, o.BufSize),
options: o,
}
}
// SetQuery sets the query for the given tag name
func (d *DataSculptor) SetQuery(tagName string, Query string) *DataSculptor {
d.docQueries = append(d.docQueries, parser.DocumentQuery{
Query: Query,
TagName: tagName,
})
return d
}
// SetDocType sets the document type for the given filename.
// If Supported, It will automatically set the scanner for the given document type.
// If not supported, it will panic.
// If you want Set you own scanners please use SetScanner() and follow the interface.
func (d *DataSculptor) SetDocType(docType DocumentType) *DataSculptor {
d.DocType = docType
var dataParser parser.RawDataParser
switch docType {
case CSV_DOCUMENT:
dataParser = query.NewCsvReader(d.Filename)
case JSON_DOCUMENT:
dataParser = query.NewJsonReader(d.Filename)
default:
panic("Document Type Not Supported")
}
d.SetScanner(dataParser)
return d
}
// SetCSVDelimiter sets the delimiter for the CSV document if you set the document type to CSV.
// Else it will make error.
func (d *DataSculptor) SetCSVDelimiter(r rune) *DataSculptor {
if d.DocType != CSV_DOCUMENT {
d.lastErr = errors.New("your Document Type is not CSV. Please check your document type")
}
d.scanner.(*query.CSV).SetDelimiter(r)
return d
}
// SetScanner sets the scanner (parser.RawDataParser) which used to extract data from the document.
// SetScanner is helpful if you want to use your own scanner to process your file.
func (d *DataSculptor) SetScanner(dataParser parser.RawDataParser) *DataSculptor {
d.scanner = dataParser
return d
}
// SetCustomFunc sets the customFunc which will be called
// between constructing targetStruct complete and sending the extracted data to the channel.
func (d *DataSculptor) SetCustomFunc(f ...Func) *DataSculptor {
d.customFunc = append(d.customFunc, f...)
return d
}
// SetFallbackFunc sets the fallbackFunc which will be called when framework can't handle the record.
func (d *DataSculptor) SetFallbackFunc(f ...Func) *DataSculptor {
d.fallbackFunc = append(d.fallbackFunc, f...)
return d
}
// SetTargetStruct sets the target struct with the given struct pointer.
// Helpful in SetFallbackFunc and SetCustomFunc.
// It will be called when init before the Do() func.
func (d *DataSculptor) SetTargetStruct(targetStruct any) *DataSculptor {
d.targetStruct = targetStruct
return d
}
// CurrentTarget func returns the current target struct during process.
// Helpful in SetFallbackFunc and SetCustomFunc.
func (d *DataSculptor) CurrentTarget() any {
return d.targetStruct
}
// Error() func returns the last error occurred while processing the record.
// Helpful in SetFallbackFunc and SetCustomFunc.
func (d *DataSculptor) Error() error {
return d.lastErr
}
// Send func sends the extracted data to the channel.
func (d *DataSculptor) send() {
d.ConstructedOutput <- d.targetStruct
time.Sleep(d.options.Latency)
}
func (d *DataSculptor) Close() {
close(d.ConstructedOutput)
}