diff --git a/oracle-plugin/docs/Oracle-batchsink.md b/oracle-plugin/docs/Oracle-batchsink.md index 6b113dc4c..ea4491a6e 100644 --- a/oracle-plugin/docs/Oracle-batchsink.md +++ b/oracle-plugin/docs/Oracle-batchsink.md @@ -60,37 +60,38 @@ will be passed to the JDBC driver as connection arguments for JDBC drivers that Data Types Mapping ---------- - | Oracle Data Type | CDAP Schema Data Type | Comment | - | ------------------------------ | --------------------- | ------------------------------------------------------ | - | VARCHAR2 | string | | - | NVARCHAR2 | string | | - | VARCHAR | string | | - | NUMBER | string | For NUMBER types defined without a precision and scale | - | NUMBER | decimal | For NUMBER types with a defined precision and scale | - | FLOAT | double | | - | LONG | string | | - | DATE | timestamp | | - | BINARY_FLOAT | float | | - | BINARY_DOUBLE | double | | - | TIMESTAMP | timestamp | | - | TIMESTAMP WITH TIME ZONE | string | Timestamp string in the following format: | - | | | "2019-07-15 15:57:46.65 GMT" | - | TIMESTAMP WITH LOCAL TIME ZONE | timestamp | | - | INTERVAL YEAR TO MONTH | string | Oracle's 'INTERVAL YEAR TO MONTH' literal in the | - | | | standard format: "year[-month]" | - | INTERVAL DAY TO SECOND | string | Oracle's 'INTERVAL DAY TO SECOND' literal in the | - | | | standard format: | - | | | "[day] [hour][:minutes][:seconds[.milliseconds]" | - | RAW | bytes | | - | LONG RAW | bytes | | - | ROWID | string | | - | UROWID | string | | - | CHAR | string | | - | NCHAR | string | | - | CLOB | string | | - | NCLOB | string | | - | BLOB | bytes | | - | BFILE | | BFILE data type is not supported for the sink | + | Oracle Data Type | CDAP Schema Data Type | Comment | + | ------------------------------ | --------------------- | -----------------------------------------------------------| + | VARCHAR2 | string | | + | NVARCHAR2 | string | | + | VARCHAR | string | | + | NUMBER | string | For NUMBER types defined without a precision and scale. | + | | | Users can manually set output schema to map it to Decimal. | + | NUMBER | decimal | For NUMBER types defined with a precision and scale. | + | FLOAT | double | | + | LONG | string | | + | DATE | timestamp | | + | BINARY_FLOAT | float | | + | BINARY_DOUBLE | double | | + | TIMESTAMP | timestamp | | + | TIMESTAMP WITH TIME ZONE | string | Timestamp string in the following format: | + | | | "2019-07-15 15:57:46.65 GMT" | + | TIMESTAMP WITH LOCAL TIME ZONE | timestamp | | + | INTERVAL YEAR TO MONTH | string | Oracle's 'INTERVAL YEAR TO MONTH' literal in the | + | | | standard format: "year[-month]" | + | INTERVAL DAY TO SECOND | string | Oracle's 'INTERVAL DAY TO SECOND' literal in the | + | | | standard format: | + | | | "[day] [hour][:minutes][:seconds[.milliseconds]" | + | RAW | bytes | | + | LONG RAW | bytes | | + | ROWID | string | | + | UROWID | string | | + | CHAR | string | | + | NCHAR | string | | + | CLOB | string | | + | NCLOB | string | | + | BLOB | bytes | | + | BFILE | | BFILE data type is not supported for the sink | Example diff --git a/oracle-plugin/docs/Oracle-batchsource.md b/oracle-plugin/docs/Oracle-batchsource.md index 9e3dae94e..33a381645 100644 --- a/oracle-plugin/docs/Oracle-batchsource.md +++ b/oracle-plugin/docs/Oracle-batchsource.md @@ -74,35 +74,37 @@ with the tradeoff of higher memory usage. Data Types Mapping ---------- - | Oracle Data Type | CDAP Schema Data Type | Comment | - | ------------------------------ | --------------------- | ------------------------------------------------------ | - | VARCHAR2 | string | | - | NVARCHAR2 | string | | - | VARCHAR | string | | - | NUMBER | decimal | | - | FLOAT | double | | - | LONG | string | | - | DATE | timestamp | | - | BINARY_FLOAT | float | | - | BINARY_DOUBLE | double | | - | TIMESTAMP | timestamp | | - | TIMESTAMP WITH TIME ZONE | string | | - | TIMESTAMP WITH LOCAL TIME ZONE | timestamp | | - | INTERVAL YEAR TO MONTH | string | | - | INTERVAL DAY TO SECOND | string | | - | RAW | bytes | | - | LONG RAW | bytes | | - | ROWID | string | | - | UROWID | string | | - | CHAR | string | | - | NCHAR | string | | - | CLOB | string | | - | NCLOB | string | | - | BLOB | bytes | | - | BFILE | bytes | BFILE is a data type used to store a locator (link) | - | | | to an external file, which is stored outside of the | - | | | database. Only the locator will be read from an | - | | | Oracle table and not the content of the external file. | + | Oracle Data Type | CDAP Schema Data Type | Comment | + | ------------------------------ | --------------------- | -----------------------------------------------------------| + | VARCHAR2 | string | | + | NVARCHAR2 | string | | + | VARCHAR | string | | + | NUMBER | string | For NUMBER types defined without a precision and scale. | + | | | Users can manually set output schema to map it to Decimal. | + | NUMBER | decimal | For NUMBER types defined with a precision and scale. | + | FLOAT | double | | + | LONG | string | | + | DATE | timestamp | | + | BINARY_FLOAT | float | | + | BINARY_DOUBLE | double | | + | TIMESTAMP | timestamp | | + | TIMESTAMP WITH TIME ZONE | string | | + | TIMESTAMP WITH LOCAL TIME ZONE | timestamp | | + | INTERVAL YEAR TO MONTH | string | | + | INTERVAL DAY TO SECOND | string | | + | RAW | bytes | | + | LONG RAW | bytes | | + | ROWID | string | | + | UROWID | string | | + | CHAR | string | | + | NCHAR | string | | + | CLOB | string | | + | NCLOB | string | | + | BLOB | bytes | | + | BFILE | bytes | BFILE is a data type used to store a locator (link) | + | | | to an external file, which is stored outside of the | + | | | database. Only the locator will be read from an | + | | | Oracle table and not the content of the external file. | Example diff --git a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSource.java b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSource.java index f2e3af53a..13ccd5f38 100644 --- a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSource.java +++ b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSource.java @@ -23,6 +23,7 @@ import io.cdap.cdap.api.annotation.MetadataProperty; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.api.data.schema.Schema; import io.cdap.cdap.etl.api.FailureCollector; import io.cdap.cdap.etl.api.batch.BatchSourceContext; import io.cdap.cdap.etl.api.connector.Connector; @@ -153,6 +154,21 @@ public void validate(FailureCollector collector) { public String getTransactionIsolationLevel() { return connection.getTransactionIsolationLevel(); } + + @Override + protected void validateField(FailureCollector collector, + Schema.Field field, Schema actualFieldSchema, Schema expectedFieldSchema) { + // This change is needed to make sure that the pipeline upgrade continues to work post upgrade. + // Since the older handling of the precision less used to convert to the decimal type, + // and the new version would try to convert to the String type. In that case the output schema would + // contain Decimal(38, 0) (or something similar), and the code internally would try to identify + // the schema of the field(without precision and scale) as String. + if (Schema.LogicalType.DECIMAL.equals(expectedFieldSchema.getLogicalType()) + && actualFieldSchema.getType().equals(Schema.Type.STRING)) { + return; + } + super.validateField(collector, field, actualFieldSchema, expectedFieldSchema); + } } /** diff --git a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSourceDBRecord.java b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSourceDBRecord.java index 89807e21e..df30f077f 100644 --- a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSourceDBRecord.java +++ b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSourceDBRecord.java @@ -223,8 +223,16 @@ private void handleOracleSpecificType(ResultSet resultSet, StructuredRecord.Buil recordBuilder.set(field.getName(), resultSet.getDouble(columnIndex)); } else { if (precision == 0) { - // In case of precision less decimal convert the field to String type - recordBuilder.set(field.getName(), resultSet.getString(columnIndex)); + Schema nonNullableSchema = field.getSchema().isNullable() ? + field.getSchema().getNonNullable() : field.getSchema(); + if (Schema.LogicalType.DECIMAL.equals(nonNullableSchema.getLogicalType())) { + // Handle the field using the schema set in the output schema + BigDecimal decimal = resultSet.getBigDecimal(columnIndex, getScale(field.getSchema())); + recordBuilder.setDecimal(field.getName(), decimal); + } else { + // In case of Number defined without precision and scale convert to String type + recordBuilder.set(field.getName(), resultSet.getString(columnIndex)); + } } else { // It's required to pass 'scale' parameter since in the case of Oracle, scale of 'BigDecimal' depends on the // scale set in the logical schema. For example for value '77.12' if the scale set in the logical schema is diff --git a/oracle-plugin/src/test/java/io/cdap/plugin/oracle/OracleSourceDBRecordUnitTest.java b/oracle-plugin/src/test/java/io/cdap/plugin/oracle/OracleSourceDBRecordUnitTest.java index 16ab399dd..6aca0466e 100644 --- a/oracle-plugin/src/test/java/io/cdap/plugin/oracle/OracleSourceDBRecordUnitTest.java +++ b/oracle-plugin/src/test/java/io/cdap/plugin/oracle/OracleSourceDBRecordUnitTest.java @@ -48,20 +48,18 @@ public class OracleSourceDBRecordUnitTest { /** * Validate the precision less Numbers handling against following use cases. - * 1. Ensure that for Number(0,-127) non nullable type a String is returned by default. - * 2. Ensure that for Number(0,-127) non nullable type a String is returned, - * if schema defined this as Number(38,4). - * 3. Ensure that for Number(0,-127) nullable type a String type is returned by default. - * 4. Ensure that for Number(0,-127) nullable type a String is returned, - * if schema defined this as Number(38,4). + * 1. Ensure that for Number(0,-127) non nullable type a String type is returned if output schema is String. + * 2. Ensure that for Number(0,-127) non nullable type a String type is returned if output schema is String. + * 3. Ensure that for Number(0,-127) nullable type a String type is returned if output schema is String. + * 4. Ensure that for Number(0,-127) nullable type a String type is returned if output schema is String. * @throws Exception */ @Test - public void validatePrecisionLessDecimalParsing() throws Exception { - Schema.Field field1 = Schema.Field.of("ID1", Schema.decimalOf(DEFAULT_PRECISION)); - Schema.Field field2 = Schema.Field.of("ID2", Schema.decimalOf(DEFAULT_PRECISION, 4)); - Schema.Field field3 = Schema.Field.of("ID3", Schema.nullableOf(Schema.decimalOf(DEFAULT_PRECISION))); - Schema.Field field4 = Schema.Field.of("ID4", Schema.nullableOf(Schema.decimalOf(DEFAULT_PRECISION, 4))); + public void validatePrecisionLessNumberParsingForOutputSchemaAsString() throws Exception { + Schema.Field field1 = Schema.Field.of("ID1", Schema.of(Schema.Type.STRING)); + Schema.Field field2 = Schema.Field.of("ID2", Schema.of(Schema.Type.STRING)); + Schema.Field field3 = Schema.Field.of("ID3", Schema.nullableOf(Schema.of(Schema.Type.STRING))); + Schema.Field field4 = Schema.Field.of("ID4", Schema.nullableOf(Schema.of(Schema.Type.STRING))); Schema schema = Schema.recordOf( "dbRecord", @@ -95,14 +93,59 @@ public void validatePrecisionLessDecimalParsing() throws Exception { Assert.assertEquals(record.get("ID4"), "123.4568"); } + /** + * Validate the precision less Numbers handling against following use cases. + * 1. Ensure that for Number(0,-127) non nullable type a Decimal type is returned if output schema is Decimal. + * 2. Ensure that for Number(0,-127) non nullable type a String is returned if output schema is Decimal. + * 3. Ensure that for Number(0,-127) nullable type a String type is returned if output schema is Decimal. + * 4. Ensure that for Number(0,-127) nullable type a String is returned if output schema is Decimal. + * @throws Exception + */ + @Test + public void validatePrecisionLessNumberParsingForOutputSchemaAsDecimal() throws Exception { + Schema.Field field1 = Schema.Field.of("ID1", Schema.decimalOf(DEFAULT_PRECISION)); + Schema.Field field2 = Schema.Field.of("ID2", Schema.decimalOf(DEFAULT_PRECISION, 4)); + Schema.Field field3 = Schema.Field.of("ID3", Schema.nullableOf(Schema.decimalOf(DEFAULT_PRECISION))); + Schema.Field field4 = Schema.Field.of("ID4", Schema.decimalOf(DEFAULT_PRECISION, 4)); + + Schema schema = Schema.recordOf( + "dbRecord", + field1, + field2, + field3, + field4 + ); + + when(resultSet.getMetaData()).thenReturn(resultSetMetaData); + when(resultSet.getBigDecimal(eq(1), eq(0))).thenReturn(new BigDecimal("123")); + when(resultSet.getBigDecimal(eq(2), eq(4))).thenReturn(new BigDecimal("123.4568")); + when(resultSet.getBigDecimal(eq(3), eq(0))).thenReturn(new BigDecimal("123")); + when(resultSet.getBigDecimal(eq(4), eq(4))).thenReturn(new BigDecimal("123.4568")); + + StructuredRecord.Builder builder = StructuredRecord.builder(schema); + OracleSourceDBRecord dbRecord = new OracleSourceDBRecord(null, null); + dbRecord.handleField(resultSet, builder, field1, 1, Types.NUMERIC, 0, -127); + dbRecord.handleField(resultSet, builder, field2, 2, Types.NUMERIC, 0, -127); + dbRecord.handleField(resultSet, builder, field3, 3, Types.NUMERIC, 0, -127); + dbRecord.handleField(resultSet, builder, field4, 4, Types.NUMERIC, 0, -127); + + StructuredRecord record = builder.build(); + Assert.assertTrue(record.getDecimal("ID1") instanceof BigDecimal); + Assert.assertEquals(record.getDecimal("ID1").toPlainString(), "123"); + Assert.assertTrue(record.getDecimal("ID2") instanceof BigDecimal); + Assert.assertEquals(record.getDecimal("ID2").toPlainString(), "123.4568"); + Assert.assertTrue(record.getDecimal("ID3") instanceof BigDecimal); + Assert.assertEquals(record.getDecimal("ID3").toPlainString(), "123"); + Assert.assertTrue(record.getDecimal("ID4") instanceof BigDecimal); + Assert.assertEquals(record.getDecimal("ID4").toPlainString(), "123.4568"); + } + /** * Validate the default precision Numbers handling against following use cases. - * 1. Ensure that for Number(38, 0) non nullable type a Number(38,0) is returned. - * 2. Ensure that for Number(38, 4) non nullable type a Number(38,6) is returned, - * if schema defined this as Number(38,6). - * 3. Ensure that for Number(38, 0) nullable type a Number(38,0) is returned by default. - * 4. Ensure that for Number(38, 4) nullable type a Number(38,6) is returned, - * if schema defined this as Number(38,6). + * 1. Ensure that for Number(38, 0) non nullable type a Number(38,0) is returned if output schema is Decimal. + * 2. Ensure that for Number(38, 4) non nullable type a Number(38,6) is returned if output schema is Decimal. + * 3. Ensure that for Number(38, 0) nullable type a Number(38,0) is returned if output schema is Decimal. + * 4. Ensure that for Number(38, 4) nullable type a Number(38,6) is returned if output schema is Decimal. * @throws Exception */ @Test