Skip to content

Commit

Permalink
fix postgres data handling from WAL logs in CDC mode (#15481)
Browse files Browse the repository at this point in the history
* fix postgres data handling from WAL logs in CDC mode

* format

* use formatter for dates also (#15485)

* format

* change test structure

* change log to debug

Co-authored-by: Edward Gao <edward.gao@airbyte.io>
  • Loading branch information
subodh1810 and edgao committed Aug 10, 2022
1 parent fdb5eb9 commit 0092712
Show file tree
Hide file tree
Showing 13 changed files with 387 additions and 169 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
import java.time.format.DateTimeFormatter;
import java.util.function.Function;

/**
* TODO : Replace all the DateTime related logic of this class with
* {@link io.airbyte.db.jdbc.DateTimeConverter}
*/
public class DataTypeUtils {

public static final String DATE_FORMAT_PATTERN = "yyyy-MM-dd'T'HH:mm:ss'Z'";
Expand All @@ -27,6 +31,7 @@ public class DataTypeUtils {
public static final DateTimeFormatter TIMETZ_FORMATTER = DateTimeFormatter.ofPattern("HH:mm:ss.SSSSSSXXX");
public static final DateTimeFormatter TIMESTAMPTZ_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX");
public static final DateTimeFormatter OFFSETDATETIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSSS XXX");
public static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd");

// wrap SimpleDateFormat in a function because SimpleDateFormat is not threadsafe as a static final.
public static DateFormat getDateFormat() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,18 +258,19 @@ public String getFullyQualifiedTableNameWithQuoting(final Connection connection,
return schemaName != null ? enquoteIdentifier(connection, schemaName) + "." + quotedTableName : quotedTableName;
}

protected <ObjectType> ObjectType getObject(ResultSet resultSet, int index, Class<ObjectType> clazz) throws SQLException {
protected <ObjectType> ObjectType getObject(final ResultSet resultSet, final int index, final Class<ObjectType> clazz) throws SQLException {
return resultSet.getObject(index, clazz);
}

protected void putTimeWithTimezone(ObjectNode node, String columnName, ResultSet resultSet, int index) throws SQLException {
OffsetTime timetz = getObject(resultSet, index, OffsetTime.class);
protected void putTimeWithTimezone(final ObjectNode node, final String columnName, final ResultSet resultSet, final int index) throws SQLException {
final OffsetTime timetz = getObject(resultSet, index, OffsetTime.class);
node.put(columnName, timetz.format(TIMETZ_FORMATTER));
}

protected void putTimestampWithTimezone(ObjectNode node, String columnName, ResultSet resultSet, int index) throws SQLException {
OffsetDateTime timestamptz = getObject(resultSet, index, OffsetDateTime.class);
LocalDate localDate = timestamptz.toLocalDate();
protected void putTimestampWithTimezone(final ObjectNode node, final String columnName, final ResultSet resultSet, final int index)
throws SQLException {
final OffsetDateTime timestamptz = getObject(resultSet, index, OffsetDateTime.class);
final LocalDate localDate = timestamptz.toLocalDate();
node.put(columnName, resolveEra(localDate, timestamptz.format(TIMESTAMPTZ_FORMATTER)));
}

Expand All @@ -283,7 +284,7 @@ protected void putTimestampWithTimezone(ObjectNode node, String columnName, Resu
*
* You most likely would prefer to call one of the overloaded methods, which accept temporal types.
*/
public static String resolveEra(boolean isBce, String value) {
public static String resolveEra(final boolean isBce, final String value) {
String mangledValue = value;
if (isBce) {
if (mangledValue.startsWith("-")) {
Expand All @@ -296,11 +297,11 @@ public static String resolveEra(boolean isBce, String value) {
return mangledValue;
}

public static boolean isBce(LocalDate date) {
public static boolean isBce(final LocalDate date) {
return date.getEra().equals(IsoEra.BCE);
}

public static String resolveEra(LocalDate date, String value) {
public static String resolveEra(final LocalDate date, final String value) {
return resolveEra(isBce(date), value);
}

Expand All @@ -311,14 +312,14 @@ public static String resolveEra(LocalDate date, String value) {
* This is technically kind of sketchy due to ancient timestamps being weird (leap years, etc.), but
* my understanding is that {@link #ONE_CE} has the same weirdness, so it cancels out.
*/
public static String resolveEra(Date date, String value) {
public static String resolveEra(final Date date, final String value) {
return resolveEra(date.before(ONE_CE), value);
}

/**
* See {@link #resolveEra(Date, String)} for explanation.
*/
public static String resolveEra(Timestamp timestamp, String value) {
public static String resolveEra(final Timestamp timestamp, final String value) {
return resolveEra(timestamp.before(ONE_CE), value);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.db.jdbc;

import static io.airbyte.db.DataTypeUtils.DATE_FORMATTER;
import static io.airbyte.db.DataTypeUtils.TIMESTAMPTZ_FORMATTER;
import static io.airbyte.db.DataTypeUtils.TIMESTAMP_FORMATTER;
import static io.airbyte.db.DataTypeUtils.TIMETZ_FORMATTER;
import static io.airbyte.db.DataTypeUtils.TIME_FORMATTER;
import static io.airbyte.db.jdbc.AbstractJdbcCompatibleSourceOperations.resolveEra;
import static java.time.ZoneOffset.UTC;

import java.sql.Date;
import java.sql.Time;
import java.sql.Timestamp;
import java.time.Duration;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.OffsetDateTime;
import java.time.OffsetTime;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DateTimeConverter {

private static final Logger LOGGER = LoggerFactory.getLogger(DateTimeConverter.class);
public static final DateTimeFormatter TIME_WITH_TIMEZONE_FORMATTER = DateTimeFormatter.ofPattern(
"HH:mm:ss[.][SSSSSSSSS][SSSSSSS][SSSSSS][SSSSS][SSSS][SSS][SS][S][''][XXX][XX][X]");

public static String convertToTimeWithTimezone(final Object time) {
if (time instanceof final java.time.OffsetTime timetz) {
return timetz.format(TIMETZ_FORMATTER);
}
final OffsetTime timetz = OffsetTime.parse(time.toString(), TIME_WITH_TIMEZONE_FORMATTER);
return timetz.format(TIMETZ_FORMATTER);
}

public static String convertToTimestampWithTimezone(final Object timestamp) {
if (timestamp instanceof final Timestamp t) {
// In snapshot mode, debezium produces a java.sql.Timestamp object for the TIMESTAMPTZ type.
// Conceptually, a timestamp with timezone is an Instant. But t.toInstant() actually mangles the
// value for ancient dates, because leap years weren't applied consistently in ye olden days.
// Additionally, toInstant() (and toLocalDateTime()) actually lose the era indicator, so we can't
// rely on their getEra() methods.
// So we have special handling for this case, which sidesteps the toInstant conversion.
final ZonedDateTime timestamptz = t.toLocalDateTime().atZone(UTC);
final String value = timestamptz.format(TIMESTAMPTZ_FORMATTER);
return resolveEra(t, value);
} else if (timestamp instanceof final OffsetDateTime t) {
return resolveEra(t.toLocalDate(), t.format(TIMESTAMPTZ_FORMATTER));
} else if (timestamp instanceof final ZonedDateTime timestamptz) {
return resolveEra(timestamptz.toLocalDate(), timestamptz.format(TIMESTAMPTZ_FORMATTER));
} else {
// This case probably isn't strictly necessary, but I'm leaving it just in case there's some weird
// situation that I'm not aware of.
final Instant instant = Instant.parse(timestamp.toString());
final OffsetDateTime offsetDateTime = OffsetDateTime.ofInstant(instant, UTC);
final ZonedDateTime timestamptz = ZonedDateTime.from(offsetDateTime);
final LocalDate localDate = timestamptz.toLocalDate();
final String value = timestamptz.format(TIMESTAMPTZ_FORMATTER);
return resolveEra(localDate, value);
}
}

/**
* See {@link #convertToTimestampWithTimezone(Object)} for explanation of the weird things happening
* here.
*/
public static String convertToTimestamp(final Object timestamp) {
if (timestamp instanceof final Timestamp t) {
// Snapshot mode
final LocalDateTime localDateTime = t.toLocalDateTime();
final String value = localDateTime.format(TIMESTAMP_FORMATTER);
return resolveEra(t, value);
} else if (timestamp instanceof final Instant i) {
// Incremental mode
return resolveEra(i.atZone(UTC).toLocalDate(), i.atOffset(UTC).toLocalDateTime().format(TIMESTAMP_FORMATTER));
} else {
final LocalDateTime localDateTime = LocalDateTime.parse(timestamp.toString());
final LocalDate date = localDateTime.toLocalDate();
final String value = localDateTime.format(TIMESTAMP_FORMATTER);
return resolveEra(date, value);
}
}

/**
* See {@link #convertToTimestampWithTimezone(Object)} for explanation of the weird things happening
* here.
*/
public static String convertToDate(final Object date) {
if (date instanceof final Date d) {
// Snapshot mode
final LocalDate localDate = ((Date) date).toLocalDate();
return resolveEra(d, localDate.format(DATE_FORMATTER));
} else if (date instanceof LocalDate d) {
// Incremental mode
return resolveEra(d, d.format(DATE_FORMATTER));
} else {
final LocalDate localDate = LocalDate.parse(date.toString());
return resolveEra(localDate, localDate.format(DATE_FORMATTER));
}
}

public static String convertToTime(final Object time) {
if (time instanceof final Time sqlTime) {
return sqlTime.toLocalTime().format(TIME_FORMATTER);
} else if (time instanceof final LocalTime localTime) {
return localTime.format(TIME_FORMATTER);
} else if (time instanceof java.time.Duration) {
long value = ((Duration) time).toNanos();
if (value >= 0 && value <= TimeUnit.DAYS.toNanos(1)) {
return LocalTime.ofNanoOfDay(value).format(TIME_FORMATTER);
} else {
final long updatedValue = 0 > value ? Math.abs(value) : TimeUnit.DAYS.toNanos(1);
LOGGER.debug("Time values must use number of milliseconds greater than 0 and less than 86400000000000 but its {}, converting to {} ", value,
updatedValue);
return LocalTime.ofNanoOfDay(updatedValue).format(TIME_FORMATTER);
}
} else {
return LocalTime.parse(time.toString()).format(TIME_FORMATTER);
}
}

}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ private DebeziumConverterUtils() {
throw new UnsupportedOperationException();
}

/**
* TODO : Replace usage of this method with {@link io.airbyte.db.jdbc.DateTimeConverter}
*/
public static String convertDate(final Object input) {
/**
* While building this custom converter we were not sure what type debezium could return cause there
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

package io.airbyte.integrations.debezium.internals;

import io.airbyte.db.jdbc.DateTimeConverter;
import io.debezium.spi.converter.CustomConverter;
import io.debezium.spi.converter.RelationalColumn;
import java.math.BigDecimal;
Expand All @@ -21,7 +22,7 @@ public class PostgresConverter implements CustomConverter<SchemaBuilder, Relatio

private static final Logger LOGGER = LoggerFactory.getLogger(PostgresConverter.class);

private final String[] DATE_TYPES = {"DATE", "DATETIME", "TIME", "TIMETZ", "INTERVAL", "TIMESTAMP", "TIMESTAMPTZ"};
private final String[] DATE_TYPES = {"DATE", "TIME", "TIMETZ", "INTERVAL", "TIMESTAMP", "TIMESTAMPTZ"};
private final String[] BIT_TYPES = {"BIT", "VARBIT"};
private final String[] MONEY_ITEM_TYPE = {"MONEY"};
private final String[] GEOMETRICS_TYPES = {"BOX", "CIRCLE", "LINE", "LSEG", "POINT", "POLYGON", "PATH"};
Expand Down Expand Up @@ -115,7 +116,7 @@ private void registerDate(final RelationalColumn field, final ConverterRegistrat
case "DATE" -> DateTimeConverter.convertToDate(x);
case "TIME" -> DateTimeConverter.convertToTime(x);
case "INTERVAL" -> convertInterval((PGInterval) x);
default -> DebeziumConverterUtils.convertDate(x);
default -> throw new IllegalArgumentException("Unknown field type " + fieldType.toUpperCase(Locale.ROOT));
};
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,14 @@ public void setUpInternal() throws Exception {
localRoot = Files.createTempDirectory(testDir, "output");
environment = new TestDestinationEnv(localRoot);
workerConfigs = new WorkerConfigs(new EnvConfigs());

setupEnvironment(environment);

processFactory = new DockerProcessFactory(
workerConfigs,
workspaceRoot,
workspaceRoot.toString(),
localRoot.toString(),
"host");

setupEnvironment(environment);
}

@AfterEach
Expand Down
Loading

0 comments on commit 0092712

Please sign in to comment.