-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Support extensibilty for table datasources #10030
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package org.apache.druid.query; | ||
|
|
||
| import org.apache.druid.timeline.Overshadowable; | ||
| import org.apache.druid.timeline.TimelineLookup; | ||
| import org.apache.druid.timeline.TimelineObjectHolder; | ||
| import org.joda.time.Interval; | ||
|
|
||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.function.BiFunction; | ||
|
|
||
| /** | ||
| * Represents a source of data for a query obtained from multiple base tables. Implementations of this interface | ||
| * must handle more than one table dataSource. | ||
| */ | ||
| public interface MultiTableDataSource extends DataSource | ||
| { | ||
| /** | ||
| * @param intervals The intervals to find the timeline objects for | ||
| * @param timelineMap Table dataSource names along with its corresponding timeline for a specific interval | ||
| * @param <ObjectType> Type of the overshadowable object handled by the timeline | ||
| * @return Map of table datasources mapped to their corresponding list of timeline objects which needs to be queried | ||
| */ | ||
| <ObjectType extends Overshadowable<ObjectType>> List<List<TimelineObjectHolder<String, ObjectType>>> retrieveSegmentsForIntervals( | ||
| List<Interval> intervals, | ||
| Map<String, TimelineLookup<String, ObjectType>> timelineMap, | ||
| BiFunction<Interval, TimelineLookup<String, ObjectType>, List<TimelineObjectHolder<String, ObjectType>>> biFunction | ||
| ); | ||
|
|
||
| /** | ||
| * Returns the base table dataSources from which the data for a query is retrieved. | ||
| */ | ||
| List<TableDataSource> getDataSources(); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,12 +26,19 @@ | |
| import com.google.common.collect.ImmutableList; | ||
| import com.google.common.collect.Iterables; | ||
| import org.apache.druid.java.util.common.IAE; | ||
| import org.apache.druid.timeline.Overshadowable; | ||
| import org.apache.druid.timeline.TimelineLookup; | ||
| import org.apache.druid.timeline.TimelineObjectHolder; | ||
| import org.joda.time.Interval; | ||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Set; | ||
| import java.util.function.BiFunction; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| public class UnionDataSource implements DataSource | ||
| public class UnionDataSource implements MultiTableDataSource | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the future, if we wanted to extend UnionDataSource to support unioning nontables (like, perhaps, unioning the results of queries) then do you see a good migration path to get there?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In that case, I would expect us to have a |
||
| { | ||
| @JsonProperty | ||
| private final List<TableDataSource> dataSources; | ||
|
|
@@ -51,6 +58,7 @@ public Set<String> getTableNames() | |
| .collect(Collectors.toSet()); | ||
| } | ||
|
|
||
| @Override | ||
| @JsonProperty | ||
| public List<TableDataSource> getDataSources() | ||
| { | ||
|
|
@@ -79,6 +87,27 @@ public DataSource withChildren(List<DataSource> children) | |
| ); | ||
| } | ||
|
|
||
| @Override | ||
| public <ObjectType extends Overshadowable<ObjectType>> List<List<TimelineObjectHolder<String, ObjectType>>> retrieveSegmentsForIntervals( | ||
| List<Interval> intervals, | ||
| Map<String, TimelineLookup<String, ObjectType>> timelineMap, | ||
| BiFunction<Interval, TimelineLookup<String, ObjectType>, List<TimelineObjectHolder<String, ObjectType>>> biLookupFn | ||
| ) | ||
| { | ||
| List<List<TimelineObjectHolder<String, ObjectType>>> segmentsList = new ArrayList<>(); | ||
| for (String datasource : timelineMap.keySet()) { | ||
| List<TimelineObjectHolder<String, ObjectType>> dataSourceSegments = intervals.stream() | ||
| .flatMap(itvl -> biLookupFn.apply( | ||
| itvl, | ||
| timelineMap.get(datasource)) | ||
| .stream()) | ||
| .collect(Collectors.toList()); | ||
| segmentsList.add(dataSourceSegments); | ||
| } | ||
| return segmentsList; | ||
|
|
||
| } | ||
|
|
||
| @Override | ||
| public boolean isCacheable() | ||
| { | ||
|
|
||
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package org.apache.druid.query; | ||
|
|
||
| import com.fasterxml.jackson.databind.ObjectMapper; | ||
| import com.google.common.collect.ImmutableList; | ||
| import com.google.common.collect.ImmutableSet; | ||
| import com.google.common.collect.Lists; | ||
| import org.apache.druid.segment.TestHelper; | ||
| import org.junit.Assert; | ||
| import org.junit.Test; | ||
|
|
||
| import java.io.IOException; | ||
|
|
||
| public class MultiTableDataSourceTest | ||
| { | ||
| private static final ObjectMapper JSON_MAPPER = TestHelper.makeJsonMapper(); | ||
|
|
||
| @Test | ||
| public void testSerialization() throws IOException | ||
| { | ||
| MultiTableDataSource dataSource = new UnionDataSource(ImmutableList.of( | ||
| new TableDataSource("datasource1"), | ||
| new TableDataSource("datasource2") | ||
| )); | ||
| String json = JSON_MAPPER.writeValueAsString(dataSource); | ||
| MultiTableDataSource serdeDataSource = JSON_MAPPER.readValue(json, MultiTableDataSource.class); | ||
| Assert.assertEquals(dataSource, serdeDataSource); | ||
| } | ||
|
|
||
| @Test | ||
| public void testUnionDataSource() throws Exception | ||
| { | ||
| MultiTableDataSource dataSource = JSON_MAPPER.readValue( | ||
| "{\"type\":\"union\", \"dataSources\":[\"datasource1\", \"datasource2\"]}", | ||
| MultiTableDataSource.class | ||
| ); | ||
| Assert.assertTrue(dataSource instanceof MultiTableDataSource); | ||
| Assert.assertEquals( | ||
| Lists.newArrayList(new TableDataSource("datasource1"), new TableDataSource("datasource2")), | ||
| Lists.newArrayList(dataSource.getDataSources()) | ||
| ); | ||
| Assert.assertEquals( | ||
| ImmutableSet.of("datasource1", "datasource2"), | ||
| dataSource.getTableNames() | ||
| ); | ||
|
|
||
| final MultiTableDataSource serde = JSON_MAPPER.readValue(JSON_MAPPER.writeValueAsString(dataSource), MultiTableDataSource.class); | ||
| Assert.assertEquals(dataSource, serde); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, why not make TableDataSource an implementation of this interface, and give it a pretty basic
retrieveSegmentsForIntervalsmethod? Then, we know that anything that access tables directly is an implementation of this interface. It might make some of the logic in DataSourceAnalysis simpler.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
makes sense. Thanks