From 053c34684f6f46948bc3ad3a71a5a366ac9526f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dennis=20Du=20Kr=C3=B8ger?= Date: Thu, 3 Sep 2015 15:41:44 +0200 Subject: [PATCH 1/8] Use missing and exists filters for null comparisons. This will make isNull() and isNotNull() work. They failed before, as a term query, which does not support null values, was used. A test was also un-ignored; It was ignored as it used to fail, but the fix for METAMODEL-172 fixed the test as well. --- .../ElasticSearchDataContext.java | 31 +++-- .../ElasticSearchDataContextTest.java | 114 ++++++++++-------- 2 files changed, 85 insertions(+), 60 deletions(-) diff --git a/elasticsearch/src/main/java/org/apache/metamodel/elasticsearch/ElasticSearchDataContext.java b/elasticsearch/src/main/java/org/apache/metamodel/elasticsearch/ElasticSearchDataContext.java index ba221d1bb..7bdd1abe0 100644 --- a/elasticsearch/src/main/java/org/apache/metamodel/elasticsearch/ElasticSearchDataContext.java +++ b/elasticsearch/src/main/java/org/apache/metamodel/elasticsearch/ElasticSearchDataContext.java @@ -61,6 +61,7 @@ import org.elasticsearch.common.hppc.cursors.ObjectCursor; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.FilterBuilders; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.slf4j.Logger; @@ -68,7 +69,7 @@ /** * DataContext implementation for ElasticSearch analytics engine. - * + * * ElasticSearch has a data storage structure hierarchy that briefly goes like * this: * - * + * * When instantiating this DataContext, an index name is provided. Within this * index, each document type is represented as a table. - * + * * This implementation supports either automatic discovery of a schema or manual * specification of a schema, through the {@link SimpleTableDef} class. */ @@ -123,7 +124,7 @@ public ElasticSearchDataContext(Client client, String indexName, SimpleTableDef. /** * Constructs a {@link ElasticSearchDataContext} and automatically detects * the schema structure/view on all indexes (see - * {@link #detectSchema(Client)}). + * {@link #detectSchema(Client, String)}). * * @param client * the ElasticSearch client @@ -143,7 +144,7 @@ public ElasticSearchDataContext(Client client, String indexName) { * * @param client * the client to inspect - * @param indexName2 + * @param indexName * @return a mutable schema instance, useful for further fine tuning by the * user. */ @@ -303,7 +304,7 @@ private SearchRequestBuilder createSearchRequest(Table table, int firstRow, int /** * Creates, if possible, a {@link QueryBuilder} object which can be used to * push down one or more {@link FilterItem}s to ElasticSearch's backend. - * + * * @param table * @param whereItems * @param logicalOperator @@ -341,10 +342,20 @@ protected QueryBuilder createQueryBuilderForSimpleWhere(Table table, List operands = CollectionUtils.toList(operand); @@ -432,7 +443,7 @@ public void executeUpdate(UpdateScript update) { /** * Gets the {@link Client} that this {@link DataContext} is wrapping. - * + * * @return */ public Client getElasticSearchClient() { @@ -441,7 +452,7 @@ public Client getElasticSearchClient() { /** * Gets the name of the index that this {@link DataContext} is working on. - * + * * @return */ public String getIndexName() { diff --git a/elasticsearch/src/test/java/org/apache/metamodel/elasticsearch/ElasticSearchDataContextTest.java b/elasticsearch/src/test/java/org/apache/metamodel/elasticsearch/ElasticSearchDataContextTest.java index d421b9965..f7d9093be 100644 --- a/elasticsearch/src/test/java/org/apache/metamodel/elasticsearch/ElasticSearchDataContextTest.java +++ b/elasticsearch/src/test/java/org/apache/metamodel/elasticsearch/ElasticSearchDataContextTest.java @@ -18,9 +18,6 @@ */ package org.apache.metamodel.elasticsearch; -import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.junit.Assert.*; - import java.util.Arrays; import java.util.Date; import java.util.LinkedHashMap; @@ -49,7 +46,6 @@ import org.apache.metamodel.schema.Schema; import org.apache.metamodel.schema.Table; import org.apache.metamodel.update.Update; -import org.elasticsearch.Version; import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; @@ -59,9 +55,11 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.junit.AfterClass; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.junit.Assert.*; + public class ElasticSearchDataContextTest { private static final String indexName = "twitter"; @@ -80,10 +78,11 @@ public class ElasticSearchDataContextTest { public static void beforeTests() throws Exception { embeddedElasticsearchServer = new EmbeddedElasticsearchServer(); client = embeddedElasticsearchServer.getClient(); - indexOneTweeterDocumentPerIndex(indexType1, 1); - indexOneTweeterDocumentPerIndex(indexType2, 1); + indexTweeterDocument(indexType1, 1); + indexTweeterDocument(indexType2, 1); + indexTweeterDocument(indexType2, 2, null); insertPeopleDocuments(); - indexOneTweeterDocumentPerIndex(indexType2, 1); + indexTweeterDocument(indexType2, 1); indexBulkDocuments(indexName, bulkIndexType, 10); // The refresh API allows to explicitly refresh one or more index, @@ -143,12 +142,9 @@ public void testDocumentIdAsPrimaryKey() throws Exception { assertEquals(1, pks.length); assertEquals("_id", pks[0].getName()); - DataSet ds = dataContext.query().from(table).select("user", "_id").orderBy("_id").asc().execute(); - try { + try (DataSet ds = dataContext.query().from(table).select("user", "_id").orderBy("_id").asc().execute()) { assertTrue(ds.next()); assertEquals("Row[values=[user1, tweet_tweet2_1]]", ds.getRow().toString()); - } finally { - ds.close(); } } @@ -157,8 +153,7 @@ public void testExecutePrimaryKeyLookupQuery() throws Exception { Table table = dataContext.getDefaultSchema().getTableByName("tweet2"); Column[] pks = table.getPrimaryKeys(); - DataSet ds = dataContext.query().from(table).selectAll().where(pks[0]).eq("tweet_tweet2_1").execute(); - try { + try (DataSet ds = dataContext.query().from(table).selectAll().where(pks[0]).eq("tweet_tweet2_1").execute()) { assertTrue(ds.next()); Object dateValue = ds.getRow().getValue(2); assertEquals("Row[values=[tweet_tweet2_1, 1, " + dateValue + ", user1]]", ds.getRow().toString()); @@ -166,14 +161,10 @@ public void testExecutePrimaryKeyLookupQuery() throws Exception { assertFalse(ds.next()); assertEquals(InMemoryDataSet.class, ds.getClass()); - } finally { - ds.close(); } } - // TODO: Un-ignore this test, and it wil fail - needs fixin' @Test - @Ignore public void testDateIsHandledAsDate() throws Exception { Table table = dataContext.getDefaultSchema().getTableByName("tweet1"); Column column = table.getColumnByName("postDate"); @@ -225,8 +216,7 @@ public void run(UpdateCallback callback) { } }); - final DataSet ds = dataContext.query().from(table).selectAll().orderBy("bar").execute(); - try { + try (DataSet ds = dataContext.query().from(table).selectAll().orderBy("bar").execute()) { assertTrue(ds.next()); assertEquals("hello", ds.getRow().getValue(fooColumn).toString()); assertNotNull(ds.getRow().getValue(idColumn)); @@ -234,8 +224,6 @@ public void run(UpdateCallback callback) { assertEquals("world", ds.getRow().getValue(fooColumn).toString()); assertNotNull(ds.getRow().getValue(idColumn)); assertFalse(ds.next()); - } finally { - ds.close(); } dataContext.executeUpdate(new DropTable(table)); @@ -402,6 +390,36 @@ public void testWhereColumnEqualsValues() throws Exception { } } + @Test + public void testWhereColumnIsNullValues() throws Exception { + DataSet ds = dataContext.query().from(indexType2).select("message").where("postDate") + .isNull().execute(); + assertEquals(ElasticSearchDataSet.class, ds.getClass()); + + try { + assertTrue(ds.next()); + assertEquals("Row[values=[2]]", ds.getRow().toString()); + assertFalse(ds.next()); + } finally { + ds.close(); + } + } + + @Test + public void testWhereColumnIsNotNullValues() throws Exception { + DataSet ds = dataContext.query().from(indexType2).select("message").where("postDate") + .isNotNull().execute(); + assertEquals(ElasticSearchDataSet.class, ds.getClass()); + + try { + assertTrue(ds.next()); + assertEquals("Row[values=[1]]", ds.getRow().toString()); + assertFalse(ds.next()); + } finally { + ds.close(); + } + } + @Test public void testWhereMultiColumnsEqualValues() throws Exception { DataSet ds = dataContext.query().from(bulkIndexType).select("user").and("message").where("user") @@ -419,10 +437,9 @@ public void testWhereMultiColumnsEqualValues() throws Exception { @Test public void testWhereColumnInValues() throws Exception { - DataSet ds = dataContext.query().from(bulkIndexType).select("user").and("message").where("user") - .in("user4", "user5").orderBy("message").execute(); - try { + try (DataSet ds = dataContext.query().from(bulkIndexType).select("user").and("message").where("user") + .in("user4", "user5").orderBy("message").execute()) { assertTrue(ds.next()); String row1 = ds.getRow().toString(); @@ -433,8 +450,6 @@ public void testWhereColumnInValues() throws Exception { assertEquals("Row[values=[user5, 5]]", row2); assertFalse(ds.next()); - } finally { - ds.close(); } } @@ -498,29 +513,15 @@ public void testCountQuery() throws Exception { assertEquals("[10]", Arrays.toString(row)); } - @Test + @Test(expected = IllegalArgumentException.class) public void testQueryForANonExistingTable() throws Exception { - boolean thrown = false; - try { - dataContext.query().from("nonExistingTable").select("user").and("message").execute(); - } catch (IllegalArgumentException IAex) { - thrown = true; - } - assertTrue(thrown); + dataContext.query().from("nonExistingTable").select("user").and("message").execute(); } - @Test + @Test(expected = IllegalArgumentException.class) public void testQueryForAnExistingTableAndNonExistingField() throws Exception { - indexOneTweeterDocumentPerIndex(indexType1, 1); - boolean thrown = false; - try { - dataContext.query().from(indexType1).select("nonExistingField").execute(); - } catch (IllegalArgumentException IAex) { - thrown = true; - } finally { - // ds.close(); - } - assertTrue(thrown); + indexTweeterDocument(indexType1, 1); + dataContext.query().from(indexType1).select("nonExistingField").execute(); } @Test @@ -549,7 +550,7 @@ private static void indexBulkDocuments(String indexName, String indexType, int n try { for (int i = 0; i < numberOfDocuments; i++) { - bulkRequest.add(client.prepareIndex(indexName, indexType, new Integer(i).toString()).setSource( + bulkRequest.add(client.prepareIndex(indexName, indexType, Integer.toString(i)).setSource( buildTweeterJson(i))); } bulkRequest.execute().actionGet(); @@ -559,7 +560,16 @@ private static void indexBulkDocuments(String indexName, String indexType, int n } - private static void indexOneTweeterDocumentPerIndex(String indexType, int id) { + private static void indexTweeterDocument(String indexType, int id, Date date) { + try { + client.prepareIndex(indexName, indexType).setSource(buildTweeterJson(id, date)) + .setId("tweet_" + indexType + "_" + id).execute().actionGet(); + } catch (Exception ex) { + System.out.println("Exception indexing documents!!!!!"); + } + } + + private static void indexTweeterDocument(String indexType, int id) { try { client.prepareIndex(indexName, indexType).setSource(buildTweeterJson(id)) .setId("tweet_" + indexType + "_" + id).execute().actionGet(); @@ -578,9 +588,13 @@ private static void indexOnePeopleDocument(String gender, int age, int id) { } private static Map buildTweeterJson(int elementId) throws Exception { - Map map = new LinkedHashMap(); + return buildTweeterJson(elementId, new Date()); + } + + private static Map buildTweeterJson(int elementId, Date date) throws Exception { + Map map = new LinkedHashMap<>(); map.put("user", "user" + elementId); - map.put("postDate", new Date()); + map.put("postDate", date); map.put("message", elementId); return map; } From d22f299956bc8ef9752b0cee920b1d5594e4b693 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dennis=20Du=20Kr=C3=B8ger?= Date: Thu, 3 Sep 2015 16:07:59 +0200 Subject: [PATCH 2/8] Improves exception handling --- .../ElasticSearchDataContextTest.java | 92 ++++++------------- 1 file changed, 30 insertions(+), 62 deletions(-) diff --git a/elasticsearch/src/test/java/org/apache/metamodel/elasticsearch/ElasticSearchDataContextTest.java b/elasticsearch/src/test/java/org/apache/metamodel/elasticsearch/ElasticSearchDataContextTest.java index f7d9093be..6390dea90 100644 --- a/elasticsearch/src/test/java/org/apache/metamodel/elasticsearch/ElasticSearchDataContextTest.java +++ b/elasticsearch/src/test/java/org/apache/metamodel/elasticsearch/ElasticSearchDataContextTest.java @@ -18,6 +18,7 @@ */ package org.apache.metamodel.elasticsearch; +import java.io.IOException; import java.util.Arrays; import java.util.Date; import java.util.LinkedHashMap; @@ -93,7 +94,7 @@ public static void beforeTests() throws Exception { System.out.println("Embedded ElasticSearch server created!"); } - private static void insertPeopleDocuments() { + private static void insertPeopleDocuments() throws IOException { indexOnePeopleDocument("female", 20, 5); indexOnePeopleDocument("female", 17, 8); indexOnePeopleDocument("female", 18, 9); @@ -124,14 +125,11 @@ public void testSimpleQuery() throws Exception { assertEquals(ColumnType.DATE, table.getColumnByName("postDate").getType()); assertEquals(ColumnType.BIGINT, table.getColumnByName("message").getType()); - DataSet ds = dataContext.query().from(indexType1).select("user").and("message").execute(); - assertEquals(ElasticSearchDataSet.class, ds.getClass()); + try(DataSet ds = dataContext.query().from(indexType1).select("user").and("message").execute()) { + assertEquals(ElasticSearchDataSet.class, ds.getClass()); - try { assertTrue(ds.next()); assertEquals("Row[values=[user1, 1]]", ds.getRow().toString()); - } finally { - ds.close(); } } @@ -377,67 +375,54 @@ public void testDropTable() throws Exception { @Test public void testWhereColumnEqualsValues() throws Exception { - DataSet ds = dataContext.query().from(bulkIndexType).select("user").and("message").where("user") - .isEquals("user4").execute(); - assertEquals(ElasticSearchDataSet.class, ds.getClass()); + try (DataSet ds = dataContext.query().from(bulkIndexType).select("user").and("message").where("user") + .isEquals("user4").execute()) { + assertEquals(ElasticSearchDataSet.class, ds.getClass()); - try { assertTrue(ds.next()); assertEquals("Row[values=[user4, 4]]", ds.getRow().toString()); assertFalse(ds.next()); - } finally { - ds.close(); } } @Test public void testWhereColumnIsNullValues() throws Exception { - DataSet ds = dataContext.query().from(indexType2).select("message").where("postDate") - .isNull().execute(); - assertEquals(ElasticSearchDataSet.class, ds.getClass()); + try(DataSet ds = dataContext.query().from(indexType2).select("message").where("postDate") + .isNull().execute()){ + assertEquals(ElasticSearchDataSet.class, ds.getClass()); - try { assertTrue(ds.next()); assertEquals("Row[values=[2]]", ds.getRow().toString()); assertFalse(ds.next()); - } finally { - ds.close(); } } @Test public void testWhereColumnIsNotNullValues() throws Exception { - DataSet ds = dataContext.query().from(indexType2).select("message").where("postDate") - .isNotNull().execute(); - assertEquals(ElasticSearchDataSet.class, ds.getClass()); + try(DataSet ds = dataContext.query().from(indexType2).select("message").where("postDate") + .isNotNull().execute()){ + assertEquals(ElasticSearchDataSet.class, ds.getClass()); - try { assertTrue(ds.next()); assertEquals("Row[values=[1]]", ds.getRow().toString()); assertFalse(ds.next()); - } finally { - ds.close(); } } @Test public void testWhereMultiColumnsEqualValues() throws Exception { - DataSet ds = dataContext.query().from(bulkIndexType).select("user").and("message").where("user") - .isEquals("user4").and("message").ne(5).execute(); - assertEquals(ElasticSearchDataSet.class, ds.getClass()); + try(DataSet ds = dataContext.query().from(bulkIndexType).select("user").and("message").where("user") + .isEquals("user4").and("message").ne(5).execute()){ + assertEquals(ElasticSearchDataSet.class, ds.getClass()); - try { assertTrue(ds.next()); assertEquals("Row[values=[user4, 4]]", ds.getRow().toString()); assertFalse(ds.next()); - } finally { - ds.close(); } } @Test public void testWhereColumnInValues() throws Exception { - try (DataSet ds = dataContext.query().from(bulkIndexType).select("user").and("message").where("user") .in("user4", "user5").orderBy("message").execute()) { assertTrue(ds.next()); @@ -548,50 +533,33 @@ private static void createIndex() { private static void indexBulkDocuments(String indexName, String indexType, int numberOfDocuments) { BulkRequestBuilder bulkRequest = client.prepareBulk(); - try { - for (int i = 0; i < numberOfDocuments; i++) { - bulkRequest.add(client.prepareIndex(indexName, indexType, Integer.toString(i)).setSource( - buildTweeterJson(i))); - } - bulkRequest.execute().actionGet(); - } catch (Exception ex) { - System.out.println("Exception indexing documents!!!!!"); + for (int i = 0; i < numberOfDocuments; i++) { + bulkRequest.add(client.prepareIndex(indexName, indexType, Integer.toString(i)).setSource( + buildTweeterJson(i))); } - + bulkRequest.execute().actionGet(); } private static void indexTweeterDocument(String indexType, int id, Date date) { - try { - client.prepareIndex(indexName, indexType).setSource(buildTweeterJson(id, date)) - .setId("tweet_" + indexType + "_" + id).execute().actionGet(); - } catch (Exception ex) { - System.out.println("Exception indexing documents!!!!!"); - } + client.prepareIndex(indexName, indexType).setSource(buildTweeterJson(id, date)) + .setId("tweet_" + indexType + "_" + id).execute().actionGet(); } private static void indexTweeterDocument(String indexType, int id) { - try { - client.prepareIndex(indexName, indexType).setSource(buildTweeterJson(id)) - .setId("tweet_" + indexType + "_" + id).execute().actionGet(); - } catch (Exception ex) { - System.out.println("Exception indexing documents!!!!!"); - } + client.prepareIndex(indexName, indexType).setSource(buildTweeterJson(id)) + .setId("tweet_" + indexType + "_" + id).execute().actionGet(); } - private static void indexOnePeopleDocument(String gender, int age, int id) { - try { - client.prepareIndex(indexName, peopleIndexType).setSource(buildPeopleJson(gender, age, id)).execute() - .actionGet(); - } catch (Exception ex) { - System.out.println("Exception indexing documents!!!!!"); - } + private static void indexOnePeopleDocument(String gender, int age, int id) throws IOException { + client.prepareIndex(indexName, peopleIndexType).setSource(buildPeopleJson(gender, age, id)).execute() + .actionGet(); } - private static Map buildTweeterJson(int elementId) throws Exception { + private static Map buildTweeterJson(int elementId) { return buildTweeterJson(elementId, new Date()); } - private static Map buildTweeterJson(int elementId, Date date) throws Exception { + private static Map buildTweeterJson(int elementId, Date date) { Map map = new LinkedHashMap<>(); map.put("user", "user" + elementId); map.put("postDate", date); @@ -599,7 +567,7 @@ private static Map buildTweeterJson(int elementId, Date date) th return map; } - private static XContentBuilder buildPeopleJson(String gender, int age, int elementId) throws Exception { + private static XContentBuilder buildPeopleJson(String gender, int age, int elementId) throws IOException { return jsonBuilder().startObject().field("gender", gender).field("age", age).field("id", elementId).endObject(); } From 9221755629d258c069f45ecda98f53399df08781 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dennis=20Du=20Kr=C3=B8ger?= Date: Tue, 8 Sep 2015 14:33:10 +0200 Subject: [PATCH 3/8] An attempt to lower memory usage of Excel This is currently not properly functioning (creating fails) --- .../DefaultSpreadsheetReaderDelegate.java | 17 ++-- .../metamodel/excel/ExcelDataContext.java | 20 ++-- .../apache/metamodel/excel/ExcelUtils.java | 33 +++++-- .../excel/SpreadsheetReaderDelegate.java | 7 +- .../excel/XlsxSpreadsheetReaderDelegate.java | 98 +++++++++++++------ .../metamodel/excel/ExcelDataContextTest.java | 21 ++++ 6 files changed, 135 insertions(+), 61 deletions(-) diff --git a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java index 80d91d6bf..ad59a8799 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java +++ b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java @@ -18,9 +18,9 @@ */ package org.apache.metamodel.excel; -import java.io.InputStream; import java.util.Iterator; +import org.apache.metamodel.util.Resource; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; @@ -36,7 +36,6 @@ import org.apache.metamodel.schema.Schema; import org.apache.metamodel.schema.Table; import org.apache.metamodel.util.AlphabeticSequence; -import org.apache.metamodel.util.Ref; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -55,9 +54,9 @@ public DefaultSpreadsheetReaderDelegate(ExcelConfiguration configuration) { } @Override - public Schema createSchema(InputStream inputStream, String schemaName) { + public Schema createSchema(final Resource resource, String schemaName) { final MutableSchema schema = new MutableSchema(schemaName); - final Workbook wb = ExcelUtils.readWorkbook(inputStream); + final Workbook wb = ExcelUtils.readWorkbook(resource); for (int i = 0; i < wb.getNumberOfSheets(); i++) { final Sheet currentSheet = wb.getSheetAt(i); @@ -70,8 +69,8 @@ public Schema createSchema(InputStream inputStream, String schemaName) { } @Override - public DataSet executeQuery(InputStream inputStream, Table table, Column[] columns, int maxRows) { - final Workbook wb = ExcelUtils.readWorkbook(inputStream); + public DataSet executeQuery(final Resource resource, Table table, Column[] columns, int maxRows) { + final Workbook wb = ExcelUtils.readWorkbook(resource); final Sheet sheet = wb.getSheet(table.getName()); if (sheet == null || sheet.getPhysicalNumberOfRows() == 0) { @@ -87,7 +86,7 @@ public DataSet executeQuery(InputStream inputStream, Table table, Column[] colum } @Override - public void notifyTablesModified(Ref inputStreamRef) { + public void notifyTablesModified(final Resource resource) { // do nothing } @@ -164,7 +163,7 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) { /** * Builds columns based on row/cell values. - * + * * @param table * @param wb * @param row @@ -194,7 +193,7 @@ private void createColumns(MutableTable table, Workbook wb, Row row) { * Gets the column offset (first column to include). This is dependent on * the row used for column processing and whether the skip empty columns * property is set. - * + * * @param row * @return */ diff --git a/excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java b/excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java index 5963e69f1..9f9f943ee 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java +++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java @@ -145,25 +145,19 @@ protected String getMainSchemaName() throws MetaModelException { @Override public DataSet materializeMainSchemaTable(Table table, Column[] columns, int maxRows) { - - Ref inputStreamRef = getInputStreamRef(); - InputStream inputStream = null; try { - SpreadsheetReaderDelegate delegate = getSpreadsheetReaderDelegate(inputStreamRef); - inputStream = inputStreamRef.get(); + SpreadsheetReaderDelegate delegate = getSpreadsheetReaderDelegate(); // METAMODEL-47: Ensure that we have loaded the schema at this point getDefaultSchema(); - DataSet dataSet = delegate.executeQuery(inputStream, table, columns, maxRows); + DataSet dataSet = delegate.executeQuery(_resource, table, columns, maxRows); return dataSet; } catch (Exception e) { if (e instanceof RuntimeException) { throw (RuntimeException) e; } throw new MetaModelException("Unexpected exception while materializing main schema table", e); - } finally { - FileHelper.safeClose(inputStream); } } @@ -176,9 +170,9 @@ protected Schema getMainSchema() throws MetaModelException { Ref inputStreamRef = getInputStreamRef(); InputStream inputStream = null; try { - SpreadsheetReaderDelegate delegate = getSpreadsheetReaderDelegate(inputStreamRef); + SpreadsheetReaderDelegate delegate = getSpreadsheetReaderDelegate(); inputStream = inputStreamRef.get(); - Schema schema = delegate.createSchema(inputStream, getMainSchemaName()); + Schema schema = delegate.createSchema(_resource, getMainSchemaName()); assert getMainSchemaName().equals(schema.getName()); return schema; } catch (Exception e) { @@ -209,13 +203,13 @@ protected Class getSpreadsheetReaderDelegat return null; } - private SpreadsheetReaderDelegate getSpreadsheetReaderDelegate(Ref inputStream) + private SpreadsheetReaderDelegate getSpreadsheetReaderDelegate() throws MetaModelException { if (_spreadsheetReaderDelegate == null) { synchronized (this) { if (_spreadsheetReaderDelegate == null) { try { - if (POIXMLDocument.hasOOXMLHeader(inputStream.get())) { + if (POIXMLDocument.hasOOXMLHeader(getInputStream())) { _spreadsheetReaderDelegate = new XlsxSpreadsheetReaderDelegate(_configuration); } else { _spreadsheetReaderDelegate = new DefaultSpreadsheetReaderDelegate(_configuration); @@ -252,7 +246,7 @@ public InputStream fetch() { protected void notifyTablesModified() { LazyRef inputStreamRef = getInputStreamRef(); try { - getSpreadsheetReaderDelegate(inputStreamRef).notifyTablesModified(inputStreamRef); + getSpreadsheetReaderDelegate().notifyTablesModified(_resource); } finally { if (inputStreamRef.isFetched()) { FileHelper.safeClose(inputStreamRef.get()); diff --git a/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java b/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java index d73721f71..a75ee63c1 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java +++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java @@ -18,6 +18,8 @@ */ package org.apache.metamodel.excel; +import java.io.File; +import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.text.NumberFormat; @@ -27,10 +29,12 @@ import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; +import org.apache.metamodel.util.FileResource; import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.hssf.usermodel.HSSFFont; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.util.HSSFColor; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.ss.formula.FormulaParseException; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.CellStyle; @@ -91,6 +95,19 @@ public static XMLReader createXmlReader() { } } + /** + * Initializes a workbook instance based on a {@link File}. + * + * @return a workbook instance based on the {@link File}.. + */ + public static Workbook readWorkbook(final File file) { + try { + return WorkbookFactory.create(file); + } catch (Exception e) { + throw new IllegalStateException("Could not open workbook", e); + } + } + /** * Initializes a workbook instance based on a inputstream. * @@ -106,12 +123,16 @@ public static Workbook readWorkbook(InputStream inputStream) { } public static Workbook readWorkbook(Resource resource) { - return resource.read(new Func() { - @Override - public Workbook eval(InputStream inputStream) { - return readWorkbook(inputStream); - } - }); + if(resource instanceof FileResource){ + return readWorkbook(((FileResource) resource).getFile()); + } else { + return resource.read(new Func() { + @Override + public Workbook eval(InputStream inputStream) { + return readWorkbook(inputStream); + } + }); + } } public static boolean isXlsxFile(Resource resource) { diff --git a/excel/src/main/java/org/apache/metamodel/excel/SpreadsheetReaderDelegate.java b/excel/src/main/java/org/apache/metamodel/excel/SpreadsheetReaderDelegate.java index e18553e32..716807ebf 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/SpreadsheetReaderDelegate.java +++ b/excel/src/main/java/org/apache/metamodel/excel/SpreadsheetReaderDelegate.java @@ -25,6 +25,7 @@ import org.apache.metamodel.schema.Schema; import org.apache.metamodel.schema.Table; import org.apache.metamodel.util.Ref; +import org.apache.metamodel.util.Resource; /** * Delegate for spreadsheet-implementation specific operations in an @@ -32,12 +33,12 @@ */ interface SpreadsheetReaderDelegate { - public void notifyTablesModified(Ref inputStreamRef); + void notifyTablesModified(final Resource resource); - public Schema createSchema(InputStream inputStream, String schemaName) + Schema createSchema(final Resource resource, String schemaName) throws Exception; - public DataSet executeQuery(InputStream inputStream, Table table, + DataSet executeQuery(final Resource resource, Table table, Column[] columns, int maxRows) throws Exception; } diff --git a/excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java b/excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java index 85dbdb120..97eaf9433 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java +++ b/excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java @@ -38,7 +38,8 @@ import org.apache.metamodel.schema.Table; import org.apache.metamodel.util.AlphabeticSequence; import org.apache.metamodel.util.FileHelper; -import org.apache.metamodel.util.Ref; +import org.apache.metamodel.util.FileResource; +import org.apache.metamodel.util.Resource; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.slf4j.Logger; @@ -60,64 +61,101 @@ final class XlsxSpreadsheetReaderDelegate implements SpreadsheetReaderDelegate { public XlsxSpreadsheetReaderDelegate(ExcelConfiguration configuration) { _configuration = configuration; - _tableNamesToInternalIds = new ConcurrentHashMap(); + _tableNamesToInternalIds = new ConcurrentHashMap<>(); } @Override - public DataSet executeQuery(InputStream inputStream, Table table, Column[] columns, int maxRows) throws Exception { - final OPCPackage pkg = OPCPackage.open(inputStream); - final XSSFReader xssfReader = new XSSFReader(pkg); - final String relationshipId = _tableNamesToInternalIds.get(table.getName()); - - if (relationshipId == null) { - throw new IllegalStateException("No internal relationshipId found for table: " + table); - } + public DataSet executeQuery(final Resource resource, Table table, Column[] columns, int maxRows) throws Exception { + InputStream inputStream = null; + try { + final OPCPackage pkg; + if (resource instanceof FileResource) { + pkg = OPCPackage.open(((FileResource) resource).getFile()); + } else { + inputStream = resource.read(); + pkg = OPCPackage.open(inputStream); + } - return buildDataSet(columns, maxRows, relationshipId, xssfReader); + final XSSFReader xssfReader = new XSSFReader(pkg); + final String relationshipId = _tableNamesToInternalIds.get(table.getName()); + + if (relationshipId == null) { + throw new IllegalStateException("No internal relationshipId found for table: " + table); + } + + return buildDataSet(columns, maxRows, relationshipId, xssfReader); + } finally { + if (inputStream != null) { + FileHelper.safeClose(inputStream); + } + } } @Override - public Schema createSchema(InputStream inputStream, String schemaName) throws Exception { - final MutableSchema schema = new MutableSchema(schemaName); - final OPCPackage pkg = OPCPackage.open(inputStream); - final XSSFReader xssfReader = new XSSFReader(pkg); + public Schema createSchema(final Resource resource, String schemaName) throws Exception { + InputStream inputStream = null; + try { + final OPCPackage pkg; + if (resource instanceof FileResource) { + pkg = OPCPackage.open(((FileResource) resource).getFile()); + } else { + inputStream = resource.read(); + pkg = OPCPackage.open(inputStream); + } - final XlsxWorkbookToTablesHandler workbookToTables = new XlsxWorkbookToTablesHandler(schema, - _tableNamesToInternalIds); - buildTables(xssfReader, workbookToTables); + final MutableSchema schema = new MutableSchema(schemaName); + final XSSFReader xssfReader = new XSSFReader(pkg); - for (Entry entry : _tableNamesToInternalIds.entrySet()) { + final XlsxWorkbookToTablesHandler workbookToTables = new XlsxWorkbookToTablesHandler(schema, + _tableNamesToInternalIds); + buildTables(xssfReader, workbookToTables); + + for (Entry entry : _tableNamesToInternalIds.entrySet()) { - final String tableName = entry.getKey(); - final String relationshipId = entry.getValue(); + final String tableName = entry.getKey(); + final String relationshipId = entry.getValue(); - final MutableTable table = (MutableTable) schema.getTableByName(tableName); + final MutableTable table = (MutableTable) schema.getTableByName(tableName); - buildColumns(table, relationshipId, xssfReader); + buildColumns(table, relationshipId, xssfReader); + } + return schema; + } finally { + if (inputStream != null) { + FileHelper.safeClose(inputStream); + } } - return schema; + } @Override - public void notifyTablesModified(Ref inputStreamRef) { - InputStream inputStream = inputStreamRef.get(); + public void notifyTablesModified(final Resource resource) { + InputStream inputStream = null; final XlsxWorkbookToTablesHandler workbookToTables = new XlsxWorkbookToTablesHandler(null, _tableNamesToInternalIds); try { - final OPCPackage pkg = OPCPackage.open(inputStream); + final OPCPackage pkg; + if (resource instanceof FileResource) { + pkg = OPCPackage.open(((FileResource) resource).getFile()); + } else { + inputStream = resource.read(); + pkg = OPCPackage.open(inputStream); + } final XSSFReader xssfReader = new XSSFReader(pkg); buildTables(xssfReader, workbookToTables); } catch (Exception e) { throw new IllegalStateException(e); } finally { - FileHelper.safeClose(inputStream); + if (inputStream != null) { + FileHelper.safeClose(inputStream); + } } } private DataSet buildDataSet(final Column[] columns, int maxRows, final String relationshipId, final XSSFReader xssfReader) throws Exception { - List selectItems = new ArrayList(columns.length); + List selectItems = new ArrayList<>(columns.length); for (Column column : columns) { selectItems.add(new SelectItem(column)); } @@ -137,7 +175,7 @@ public boolean row(int rowNumber, List values, List