diff --git a/CHANGES.md b/CHANGES.md index a468ab17f..2a5aa5156 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,6 +7,7 @@ * [METAMODEL-1139] - Employed Java 8 functional types (java.util.function) in favor of (now deprecated) Ref, Action, Func. * [METAMODEL-1140] - Allowed SalesforceDataContext without a security token. * [METAMODEL-1141] - Added RFC 4180 compliant CSV parsing. + * [METAMODEL-1144] - Optimized evaluation of conditional client-side JOIN statements. ### Apache MetaModel 4.6.0 diff --git a/core/src/main/java/org/apache/metamodel/MetaModelHelper.java b/core/src/main/java/org/apache/metamodel/MetaModelHelper.java index f30a08aca..a2681da58 100644 --- a/core/src/main/java/org/apache/metamodel/MetaModelHelper.java +++ b/core/src/main/java/org/apache/metamodel/MetaModelHelper.java @@ -175,7 +175,7 @@ public static DataSet getCarthesianProduct(DataSet[] fromDataSets, FilterItem... } public static DataSet getCarthesianProduct(DataSet[] fromDataSets, Iterable whereItems) { - assert(fromDataSets.length>0); + assert (fromDataSets.length > 0); // First check if carthesian product is even nescesary if (fromDataSets.length == 1) { return getFiltered(fromDataSets[0], whereItems); @@ -185,76 +185,75 @@ public static DataSet getCarthesianProduct(DataSet[] fromDataSets, Iterable filtersIterable){ + public static InMemoryDataSet nestedLoopJoin(DataSet innerLoopDs, DataSet outerLoopDs, + Iterable filtersIterable) { List filters = new ArrayList<>(); - for(FilterItem fi : filtersIterable){ + for (FilterItem fi : filtersIterable) { filters.add(fi); } List innerRows = innerLoopDs.toRows(); - - List allItems = new ArrayList<>(Arrays.asList(outerLoopDs.getSelectItems())) ; + List allItems = new ArrayList<>(Arrays.asList(outerLoopDs.getSelectItems())); allItems.addAll(Arrays.asList(innerLoopDs.getSelectItems())); - Set applicableFilters = applicableFilters(filters,allItems); + Set applicableFilters = applicableFilters(filters, allItems); DataSetHeader jointHeader = new CachingDataSetHeader(allItems); List resultRows = new ArrayList<>(); - for(Row outerRow: outerLoopDs){ - for(Row innerRow: innerRows){ + for (Row outerRow : outerLoopDs) { + for (Row innerRow : innerRows) { Object[] joinedRowObjects = new Object[outerRow.getValues().length + innerRow.getValues().length]; - System.arraycopy(outerRow.getValues(),0,joinedRowObjects,0,outerRow.getValues().length); - System.arraycopy(innerRow.getValues(),0,joinedRowObjects,outerRow.getValues().length,innerRow.getValues().length); - - Row joinedRow = new DefaultRow(jointHeader,joinedRowObjects); + System.arraycopy(outerRow.getValues(), 0, joinedRowObjects, 0, outerRow.getValues().length); + System.arraycopy(innerRow.getValues(), 0, joinedRowObjects, outerRow.getValues().length, innerRow + .getValues().length); + Row joinedRow = new DefaultRow(jointHeader, joinedRowObjects); - if(applicableFilters.isEmpty()|| applicableFilters.stream().allMatch(fi -> fi.accept(joinedRow))){ + if (applicableFilters.isEmpty() || applicableFilters.stream().allMatch(fi -> fi.accept(joinedRow))) { resultRows.add(joinedRow); } } } - return new InMemoryDataSet(jointHeader,resultRows); + return new InMemoryDataSet(jointHeader, resultRows); } /** * Filters the FilterItems such that only the FilterItems are returned, * which contain SelectItems that are contained in selectItemList + * * @param filters * @param selectItemList * @return */ - private static Set applicableFilters(Collection filters, Collection selectItemList) { + private static Set applicableFilters(Collection filters, + Collection selectItemList) { Set items = new HashSet(selectItemList); - return filters.stream().filter( fi -> { + return filters.stream().filter(fi -> { Collection fiSelectItems = new ArrayList<>(); fiSelectItems.add(fi.getSelectItem()); Object operand = fi.getOperand(); - if(operand instanceof SelectItem){ + if (operand instanceof SelectItem) { fiSelectItems.add((SelectItem) operand); } @@ -263,7 +262,6 @@ private static Set applicableFilters(Collection filters }).collect(Collectors.toSet()); } - public static DataSet getFiltered(DataSet dataSet, Iterable filterItems) { List filters = CollectionUtils.map(filterItems, filterItem -> { return filterItem; diff --git a/core/src/test/java/org/apache/metamodel/MetaModelHelperTest.java b/core/src/test/java/org/apache/metamodel/MetaModelHelperTest.java index 705c6f167..a84cef1c7 100644 --- a/core/src/test/java/org/apache/metamodel/MetaModelHelperTest.java +++ b/core/src/test/java/org/apache/metamodel/MetaModelHelperTest.java @@ -116,14 +116,17 @@ public void testRightJoin() throws Exception { public void testSimpleCarthesianProduct() throws Exception { DataSet dataSet = MetaModelHelper.getCarthesianProduct(createDataSet1(), createDataSet2()); List results = new ArrayList(); - - while(dataSet.next()){ - results.add(dataSet.getRow().toString()); + + while (dataSet.next()) { + results.add(dataSet.getRow().toString()); } assertEquals(2, dataSet.getSelectItems().length); assertEquals(9, results.size()); assertTrue(results.contains("Row[values=[f, b]]")); assertTrue(results.contains("Row[values=[f, a]]")); + assertTrue(results.contains("Row[values=[f, r]]")); + assertTrue(results.contains("Row[values=[o, b]]")); + assertTrue(results.contains("Row[values=[o, a]]")); assertTrue(results.contains("Row[values=[o, r]]")); } @@ -182,8 +185,8 @@ private DataSet createDataSet1() { data1.add(new Object[] { "f" }); data1.add(new Object[] { "o" }); data1.add(new Object[] { "o" }); - DataSet dataSet1 = createDataSet( - new SelectItem[] { new SelectItem(new MutableColumn("foo", ColumnType.VARCHAR)) }, data1); + DataSet dataSet1 = createDataSet(new SelectItem[] { new SelectItem(new MutableColumn("foo", + ColumnType.VARCHAR)) }, data1); return dataSet1; } @@ -200,8 +203,8 @@ private DataSet createDataSet3() { List data3 = new ArrayList(); data3.add(new Object[] { "w00p", true }); data3.add(new Object[] { "yippie", false }); - DataSet dataSet3 = createDataSet(new SelectItem[] { new SelectItem("expression", "e"), - new SelectItem("webish?", "w") }, data3); + DataSet dataSet3 = createDataSet(new SelectItem[] { new SelectItem("expression", "e"), new SelectItem("webish?", + "w") }, data3); return dataSet3; } @@ -210,9 +213,7 @@ private DataSet createDataSet4() { DataSet dataSet4 = createDataSet(new SelectItem[] { new SelectItem("abc", "abc") }, data4); return dataSet4; } - - - + private int bigDataSetSize = 3000; /** @@ -220,42 +221,33 @@ private DataSet createDataSet4() { * @return a big dataset, mocking an employee table */ private DataSet createDataSet5() { - List data5 = new ArrayList(); - - - for(int i = 0; i data5 = new ArrayList(); + + for (int i = 0; i < bigDataSetSize; i++) { + data5.add(new Object[] { i, "Person_" + i, bigDataSetSize - (i + 1) }); + } + + DataSet dataSet5 = createDataSet(new SelectItem[] { new SelectItem(new MutableColumn("nr", ColumnType.BIGINT)), + new SelectItem(new MutableColumn("name", ColumnType.STRING)), new SelectItem(new MutableColumn("dnr", + ColumnType.BIGINT)) }, data5); + return dataSet5; + } + /** * * @return a big dataset, mocking an department table */ private DataSet createDataSet6() { - List data6 = new ArrayList(); - - for(int i = 0; i data6 = new ArrayList(); + + for (int i = 0; i < bigDataSetSize; i++) { + data6.add(new Object[] { i, "Department_" + i }); + } + + DataSet dataSet6 = createDataSet(new SelectItem[] { new SelectItem(new MutableColumn("nr", ColumnType.BIGINT)), + new SelectItem(new MutableColumn("name", ColumnType.STRING)), }, data6); + return dataSet6; + } public void testGetTables() throws Exception { MutableTable table1 = new MutableTable("table1"); @@ -365,23 +357,22 @@ public void testLeftJoinNoRowsOrSingleRow() throws Exception { assertEquals("Row[values=[1, 2, null]]", joinedDs.getRow().toString()); assertFalse(joinedDs.next()); } - - - public void testCarthesianProductScalability(){ - - DataSet employees = createDataSet5(); - DataSet departmens = createDataSet6(); - - FilterItem fi = new FilterItem(employees.getSelectItems()[2], OperatorType.EQUALS_TO,departmens.getSelectItems()[0]); - - DataSet joined = MetaModelHelper.getCarthesianProduct(new DataSet[]{employees,departmens}, fi); - int count = 0; - while(joined.next()){ - count++; - } - - assertTrue(count == bigDataSetSize); - - + + public void testCarthesianProductScalability() { + + DataSet employees = createDataSet5(); + DataSet departmens = createDataSet6(); + + FilterItem fi = new FilterItem(employees.getSelectItems()[2], OperatorType.EQUALS_TO, departmens + .getSelectItems()[0]); + + DataSet joined = MetaModelHelper.getCarthesianProduct(new DataSet[] { employees, departmens }, fi); + int count = 0; + while (joined.next()) { + count++; + } + + assertTrue(count == bigDataSetSize); + } } diff --git a/jdbc/src/test/java/org/apache/metamodel/jdbc/MultiJDBCDataSetTest.java b/jdbc/src/test/java/org/apache/metamodel/jdbc/MultiJDBCDataSetTest.java index d91036211..0b60f9536 100644 --- a/jdbc/src/test/java/org/apache/metamodel/jdbc/MultiJDBCDataSetTest.java +++ b/jdbc/src/test/java/org/apache/metamodel/jdbc/MultiJDBCDataSetTest.java @@ -36,11 +36,11 @@ import java.util.concurrent.TimeUnit; /** - * A test case using two simple h2 in memory databases for executing single query over both databases. + * A test case using two simple h2 in memory databases for executing single + * query over both databases. */ public class MultiJDBCDataSetTest { - public static final String DRIVER_CLASS = "org.h2.Driver"; public static final String EMP_URL_MEMORY_DATABASE = "jdbc:h2:mem:emp"; public static final String DEP_URL_MEMORY_DATABASE = "jdbc:h2:mem:dep"; @@ -53,109 +53,80 @@ public class MultiJDBCDataSetTest { private int employeeSize = 10000; private int departmentSize = 1000; - int employeesPerDepartment = employeeSize/ departmentSize; - + int employeesPerDepartment = employeeSize / departmentSize; private static final Logger logger = LoggerFactory.getLogger(MultiJDBCDataSetTest.class); - @Before public void setup() throws Exception { Class.forName(DRIVER_CLASS); emp_conn = DriverManager.getConnection(EMP_URL_MEMORY_DATABASE); - dep_conn = DriverManager.getConnection(DEP_URL_MEMORY_DATABASE); - + dep_conn = DriverManager.getConnection(DEP_URL_MEMORY_DATABASE); emp_dcon = new JdbcDataContext(emp_conn); dep_dcon = new JdbcDataContext(dep_conn); + emp_dcon.executeUpdate(new CreateTable(emp_dcon.getDefaultSchema(), "employee").withColumn("id").ofType( + ColumnType.INTEGER).asPrimaryKey().withColumn("name").ofType(ColumnType.VARCHAR).ofSize(200).withColumn( + "dep_id").ofType(ColumnType.INTEGER)); - - - emp_dcon.executeUpdate(new CreateTable(emp_dcon.getDefaultSchema(),"employee") - .withColumn("id").ofType(ColumnType.INTEGER).asPrimaryKey() - .withColumn("name").ofType(ColumnType.VARCHAR).ofSize(200) - .withColumn("dep_id").ofType(ColumnType.INTEGER)); - - - for(int i = 0;i