/
DataSourceExtractor.java
240 lines (193 loc) · 6.83 KB
/
DataSourceExtractor.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
/**
* Copyright 2017 Confluent Inc.
**/
package io.confluent.kql.util;
import io.confluent.kql.metastore.MetaStore;
import io.confluent.kql.metastore.StructuredDataSource;
import io.confluent.kql.parser.SqlBaseBaseVisitor;
import io.confluent.kql.parser.SqlBaseParser;
import io.confluent.kql.parser.tree.AliasedRelation;
import io.confluent.kql.parser.tree.Node;
import io.confluent.kql.parser.tree.NodeLocation;
import io.confluent.kql.parser.tree.QualifiedName;
import io.confluent.kql.parser.tree.Relation;
import io.confluent.kql.parser.tree.Table;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.TerminalNode;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
public class DataSourceExtractor
extends SqlBaseBaseVisitor<Node> {
final MetaStore metaStore;
Schema fromSchema;
Schema joinLeftSchema;
Schema joinRightSchema;
String fromAlias;
String leftAlias;
String rightAlias;
Set<String> commonFieldNames = new HashSet<>();
Set<String> leftFieldNames = new HashSet<>();
Set<String> rightFieldNames = new HashSet<>();
boolean isJoin = false;
public DataSourceExtractor(final MetaStore metaStore) {
this.metaStore = metaStore;
}
@Override
public Node visitQuerySpecification(final SqlBaseParser.QuerySpecificationContext ctx) {
Relation from = (Relation) visit(ctx.from);
return visitChildren(ctx);
}
@Override
public Node visitTableName(final SqlBaseParser.TableNameContext context) {
return new Table(getLocation(context), getQualifiedName(context.qualifiedName()));
}
@Override
public Node visitAliasedRelation(final SqlBaseParser.AliasedRelationContext context) {
Table table = (Table) visit(context.relationPrimary());
String alias = null;
if (context.children.size() == 1) {
alias = table.getName().getSuffix();
} else if (context.children.size() == 2) {
alias = context.children.get(1).getText();
}
if (!isJoin) {
this.fromAlias = alias.toUpperCase();
StructuredDataSource
fromDataSource =
metaStore.getSource(table.getName().getSuffix().toUpperCase());
if (fromDataSource == null) {
throw new KQLException(table.getName().getSuffix().toUpperCase() + " does not exist.");
}
this.fromSchema = fromDataSource.getSchema();
return null;
}
return new AliasedRelation(getLocation(context), table, alias,
getColumnAliases(context.columnAliases()));
}
@Override
public Node visitJoinRelation(final SqlBaseParser.JoinRelationContext context) {
this.isJoin = true;
AliasedRelation left = (AliasedRelation) visit(context.left);
AliasedRelation right;
if (context.CROSS() != null) {
right = (AliasedRelation) visit(context.right);
} else {
if (context.NATURAL() != null) {
right = (AliasedRelation) visit(context.right);
} else {
right = (AliasedRelation) visit(context.rightRelation);
}
}
this.leftAlias = left.getAlias().toUpperCase();
StructuredDataSource
leftDataSource =
metaStore.getSource(((Table) left.getRelation()).getName().getSuffix().toUpperCase());
if (leftDataSource == null) {
throw new KQLException(((Table) left.getRelation()).getName().getSuffix() + " does not "
+ "exist.");
}
this.joinLeftSchema = leftDataSource.getSchema();
this.rightAlias = right.getAlias().toUpperCase();
StructuredDataSource
rightDataSource =
metaStore.getSource(((Table) right.getRelation()).getName().getSuffix().toUpperCase());
if (rightDataSource == null) {
throw new KQLException(((Table) right.getRelation()).getName().getSuffix() + " does not "
+ "exist.");
}
this.joinRightSchema = rightDataSource.getSchema();
return null;
}
public void extractDataSources(final ParseTree node) {
visit(node);
if (joinLeftSchema != null) {
for (Field field : joinLeftSchema.fields()) {
leftFieldNames.add(field.name().toUpperCase());
}
for (Field field : joinRightSchema.fields()) {
rightFieldNames.add(field.name().toUpperCase());
if (leftFieldNames.contains(field.name().toUpperCase())) {
commonFieldNames.add(field.name().toUpperCase());
}
}
}
}
public MetaStore getMetaStore() {
return metaStore;
}
public Schema getFromSchema() {
return fromSchema;
}
public Schema getJoinLeftSchema() {
return joinLeftSchema;
}
public Schema getJoinRightSchema() {
return joinRightSchema;
}
public String getFromAlias() {
return fromAlias;
}
public String getLeftAlias() {
return leftAlias;
}
public String getRightAlias() {
return rightAlias;
}
public Set<String> getCommonFieldNames() {
return commonFieldNames;
}
public Set<String> getLeftFieldNames() {
return leftFieldNames;
}
public Set<String> getRightFieldNames() {
return rightFieldNames;
}
private static QualifiedName getQualifiedName(SqlBaseParser.QualifiedNameContext context) {
List<String> parts = context
.identifier().stream()
.map(ParseTree::getText)
.collect(toList());
return QualifiedName.of(parts);
}
private static boolean isDistinct(SqlBaseParser.SetQuantifierContext setQuantifier) {
return setQuantifier != null && setQuantifier.DISTINCT() != null;
}
private static Optional<String> getTextIfPresent(ParserRuleContext context) {
return Optional.ofNullable(context)
.map(ParseTree::getText);
}
private static Optional<String> getTextIfPresent(Token token) {
return Optional.ofNullable(token)
.map(Token::getText);
}
private static List<String> getColumnAliases(
SqlBaseParser.ColumnAliasesContext columnAliasesContext) {
if (columnAliasesContext == null) {
return null;
}
return columnAliasesContext
.identifier().stream()
.map(ParseTree::getText)
.collect(toList());
}
public static NodeLocation getLocation(TerminalNode terminalNode) {
requireNonNull(terminalNode, "terminalNode is null");
return getLocation(terminalNode.getSymbol());
}
public static NodeLocation getLocation(ParserRuleContext parserRuleContext) {
requireNonNull(parserRuleContext, "parserRuleContext is null");
return getLocation(parserRuleContext.getStart());
}
public static NodeLocation getLocation(Token token) {
requireNonNull(token, "token is null");
return new NodeLocation(token.getLine(), token.getCharPositionInLine());
}
}