airbytehq · edgao · Jul 12, 2024 · Jun 26, 2024 · edgao · Jul 1, 2024
@@ -174,6 +174,7 @@ corresponds to that version.
 
 | Version    | Date       | Pull Request                                               | Subject                                                                                                                                                        |
 |:-----------|:-----------| :--------------------------------------------------------- |:---------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| 0.41.2     | 2024-07-12 | [\#40567](https://github.com/airbytehq/airbyte/pull/40567) | Fix BaseSqlGenerator test case (generation_id support); update minimum platform version for refreshes support.                                                 |
 | 0.41.1     | 2024-07-11 | [\#41212](https://github.com/airbytehq/airbyte/pull/41212) | Improve debezium logging.                                                                                                                                      |
 | 0.41.0     | 2024-07-11 | [\#38240](https://github.com/airbytehq/airbyte/pull/38240) | Sources : Changes in CDC interfaces to support WASS algorithm                                                                                                  |
 | 0.40.11    | 2024-07-08 | [\#41041](https://github.com/airbytehq/airbyte/pull/41041) | Destinations: Fix truncate refreshes incorrectly discarding data if successful attempt had 0 records                                                           |

@@ -1 +1 @@
-version=0.41.1
+version=0.41.2
@@ -137,7 +137,7 @@ constructor(
     fun toStreamConfig(stream: ConfiguredAirbyteStream): StreamConfig {
         if (stream.generationId == null || stream.minimumGenerationId == null) {
             throw ConfigErrorException(
-                "You must upgrade your platform version to use this connector version. Either downgrade your connector or upgrade platform to 0.63.0"
+                "You must upgrade your platform version to use this connector version. Either downgrade your connector or upgrade platform to 0.63.7"
             )
         }
         if (

@@ -1814,6 +1814,9 @@ abstract class BaseSqlGeneratorIntegrationTest<DestinationState : MinimumDestina
                                                 )
                                             )
                                     )
+                                    .withSyncId(42)
+                                    .withGenerationId(43)
+                                    .withMinimumGenerationId(0)
                                     .withSyncMode(SyncMode.INCREMENTAL)
                                     .withDestinationSyncMode(DestinationSyncMode.APPEND)
                             )

@@ -4,7 +4,7 @@ plugins {
 }
 
 airbyteJavaConnector {
-    cdkVersionRequired = '0.38.3'
+    cdkVersionRequired = '0.41.2'
     features = ['db-destinations', 's3-destinations', 'typing-deduping']
     useLocalCdk = false
 }

@@ -5,7 +5,7 @@ data:
   connectorSubtype: database
   connectorType: destination
   definitionId: f7a7d195-377f-cf5b-70a5-be6b819019dc
-  dockerImageTag: 3.2.0
+  dockerImageTag: 3.3.0
   dockerRepository: airbyte/destination-redshift
   documentationUrl: https://docs.airbyte.com/integrations/destinations/redshift
   githubIssueLabel: destination-redshift
@@ -37,6 +37,7 @@ data:
   releaseStage: generally_available
   supportLevel: certified
   supportsDbt: true
+  supportsRefreshes: true
   tags:
     - language:java
   connectorTestSuitesOptions:

@@ -184,6 +184,12 @@ class RedshiftDestination : BaseConnector(), Destination {
                                 hasUnprocessedRecords = true,
                                 maxProcessedTimestamp = Optional.empty(),
                             ),
+                        initialTempRawTableStatus =
+                            InitialRawTableStatus(
+                                rawTableExists = false,
+                                hasUnprocessedRecords = true,
+                                maxProcessedTimestamp = Optional.empty(),
+                            ),
                         isSchemaMismatch = true,
                         isFinalTableEmpty = true,
                         destinationState =
@@ -284,7 +290,8 @@ class RedshiftDestination : BaseConnector(), Destination {
         )
     }
 
-    private fun getDatabase(dataSource: DataSource): JdbcDatabase {
+    @VisibleForTesting
+    fun getDatabase(dataSource: DataSource): JdbcDatabase {
         return DefaultJdbcDatabase(dataSource)
     }
 

@@ -19,7 +19,6 @@ import io.airbyte.integrations.destination.redshift.manifest.Entry
 import io.airbyte.integrations.destination.redshift.manifest.Manifest
 import io.airbyte.integrations.destination.redshift.typing_deduping.RedshiftDestinationHandler
 import io.airbyte.integrations.destination.redshift.typing_deduping.RedshiftSqlGenerator
-import io.airbyte.protocol.models.v0.DestinationSyncMode
 import io.github.oshai.kotlinlogging.KotlinLogging
 import java.time.Instant
 import java.time.ZoneOffset
@@ -41,17 +40,70 @@ class RedshiftStagingStorageOperation(
     private val writeDatetime: ZonedDateTime = Instant.now().atZone(ZoneOffset.UTC)
     private val objectMapper = ObjectMapper()
 
-    override fun prepareStage(streamId: StreamId, destinationSyncMode: DestinationSyncMode) {
+    override fun prepareStage(streamId: StreamId, suffix: String, replace: Boolean) {
         // create raw table
-        destinationHandler.execute(Sql.of(createRawTableQuery(streamId)))
-        if (destinationSyncMode == DestinationSyncMode.OVERWRITE) {
-            destinationHandler.execute(Sql.of(truncateRawTableQuery(streamId)))
+        destinationHandler.execute(Sql.of(createRawTableQuery(streamId, suffix)))
+        if (replace) {
+            destinationHandler.execute(Sql.of(truncateRawTableQuery(streamId, suffix)))
         }
         // create bucket for staging files
         s3StorageOperations.createBucketIfNotExists()
     }
 
-    override fun writeToStage(streamConfig: StreamConfig, data: SerializableBuffer) {
+    override fun overwriteStage(streamId: StreamId, suffix: String) {
+        destinationHandler.execute(
+            Sql.transactionally(
+                """DROP TABLE IF EXISTS "${streamId.rawNamespace}"."${streamId.rawName}" """,
+                """ALTER TABLE "${streamId.rawNamespace}"."${streamId.rawName}$suffix" RENAME TO "${streamId.rawName}" """
+            )
+        )
+    }
+
+    override fun transferFromTempStage(streamId: StreamId, suffix: String) {
+        destinationHandler.execute(
+            // ALTER TABLE ... APPEND is an efficient way to move records from one table to another.
+            // Instead of naively duplicating the data, it actually moves the underlying data
+            // blocks.
+            // (https://docs.aws.amazon.com/redshift/latest/dg/r_ALTER_TABLE_APPEND.html)
+            // But it can't run inside transactions, so run these statements separately.
+            Sql.separately(
+                // Note for future developers:
+                // ALTER TABLE ... APPEND has some interesting restrictions where both tables need
+                // the exact same structure (clustering, columns, etc.), so if we want to change
+                // those in the future, this might be tricky/annoying?
+                // If we have issues at that point, we can always switch to a simple
+                // `INSERT INTO ... SELECT * FROM ...` query.
+                """
+                ALTER TABLE "${streamId.rawNamespace}"."${streamId.rawName}"
+                APPEND FROM "${streamId.rawNamespace}"."${streamId.rawName}$suffix"
+                """.trimIndent(),
+                """DROP TABLE IF EXISTS "${streamId.rawNamespace}"."${streamId.rawName}$suffix" """,
+            ),
+            // Skip the case-sensitivity thing - ALTER TABLE ... APPEND can't be run in a
+            // transaction, so we can't run the SET statement.
+            // We're only working with schema/table names, so it's fine to just quote the
+            // identifiers instead of relying on this option.
+            forceCaseSensitiveIdentifier = false
+        )
+    }
+
+    override fun getStageGeneration(streamId: StreamId, suffix: String): Long? {
+        val generation =
+            destinationHandler.query(
+                """SELECT ${JavaBaseConstants.COLUMN_NAME_AB_GENERATION_ID} FROM "${streamId.rawNamespace}"."${streamId.rawName}$suffix" LIMIT 1"""
+            )
+        if (generation.isEmpty()) {
+            return null
+        }
+
+        return generation.first()[JavaBaseConstants.COLUMN_NAME_AB_GENERATION_ID].asLong()
+    }
+
+    override fun writeToStage(
+        streamConfig: StreamConfig,
+        suffix: String,
+        data: SerializableBuffer
+    ) {
         val streamId = streamConfig.id
         val objectPath: String = getStagingPath(streamId)
         log.info {
@@ -61,13 +113,19 @@ class RedshiftStagingStorageOperation(
             s3StorageOperations.uploadRecordsToBucket(data, streamId.rawNamespace, objectPath)
 
         log.info {
-            "Starting copy to target table from stage: ${streamId.rawName} in destination from stage: $objectPath/$filename."
+            "Starting copy to target table from stage: ${streamId.rawName}$suffix in destination from stage: $objectPath/$filename."
         }
         val manifestContents = createManifest(listOf(filename), objectPath)
         val manifestPath = putManifest(manifestContents, objectPath)
-        executeCopy(manifestPath, destinationHandler, streamId.rawNamespace, streamId.rawName)
+        executeCopy(
+            manifestPath,
+            destinationHandler,
+            streamId.rawNamespace,
+            streamId.rawName,
+            suffix
+        )
         log.info {
-            "Copy to target table ${streamId.rawNamespace}.${streamId.rawName} in destination complete."
+            "Copy to target table ${streamId.rawNamespace}.${streamId.rawName}$suffix in destination complete."
         }
     }
 
@@ -172,6 +230,7 @@ class RedshiftStagingStorageOperation(
         destinationHandler: RedshiftDestinationHandler,
         schemaName: String,
         tableName: String,
+        suffix: String,
     ) {
         val accessKeyId =
             s3Config.s3CredentialConfig!!.s3CredentialsProvider.credentials.awsAccessKeyId
@@ -180,7 +239,7 @@ class RedshiftStagingStorageOperation(
 
         val copyQuery =
             """
-            COPY $schemaName.$tableName FROM '${getFullS3Path(s3Config.bucketName!!, manifestPath)}'
+            COPY $schemaName.$tableName$suffix FROM '${getFullS3Path(s3Config.bucketName!!, manifestPath)}'
             CREDENTIALS 'aws_access_key_id=$accessKeyId;aws_secret_access_key=$secretAccessKey'
             CSV GZIP
             REGION '${s3Config.bucketRegion}' TIMEFORMAT 'auto'
@@ -195,9 +254,9 @@ class RedshiftStagingStorageOperation(
     companion object {
         private val nameTransformer = RedshiftSQLNameTransformer()
 
-        private fun createRawTableQuery(streamId: StreamId): String {
+        private fun createRawTableQuery(streamId: StreamId, suffix: String): String {
             return """
-                CREATE TABLE IF NOT EXISTS "${streamId.rawNamespace}"."${streamId.rawName}" (
+                CREATE TABLE IF NOT EXISTS "${streamId.rawNamespace}"."${streamId.rawName}$suffix" (
                     ${JavaBaseConstants.COLUMN_NAME_AB_RAW_ID} VARCHAR(36),
                     ${JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT} TIMESTAMPTZ DEFAULT GETDATE(),
                     ${JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT} TIMESTAMPTZ,
@@ -208,12 +267,8 @@ class RedshiftStagingStorageOperation(
             """.trimIndent()
         }
 
-        private fun truncateRawTableQuery(streamId: StreamId): String {
-            return String.format(
-                """TRUNCATE TABLE "%s"."%s";""",
-                streamId.rawNamespace,
-                streamId.rawName
-            )
+        private fun truncateRawTableQuery(streamId: StreamId, suffix: String): String {
+            return """TRUNCATE TABLE "${streamId.rawNamespace}"."${streamId.rawName}$suffix" """
         }
 
         private fun getFullS3Path(s3BucketName: String, s3StagingFile: String): String {

@@ -84,7 +84,17 @@ class RedshiftDestinationHandler(
         execute(sql, logStatements = true)
     }
 
-    fun execute(sql: Sql, logStatements: Boolean) {
+    /**
+     * @param forceCaseSensitiveIdentifier Whether to enable `forceCaseSensitiveIdentifier` on all
+     * transactions. This option is most useful for accessing fields within a `SUPER` value; for
+     * accessing schemas/tables/columns, quoting the identifier is sufficient to force
+     * case-sensitivity, so this option is not necessary.
+     */
+    fun execute(
+        sql: Sql,
+        logStatements: Boolean = true,
+        forceCaseSensitiveIdentifier: Boolean = true
+    ) {
         val transactions = sql.transactions
         val queryId = UUID.randomUUID()
         for (transaction in transactions) {
@@ -103,12 +113,20 @@ class RedshiftDestinationHandler(
                 // characters, even after
                 // specifying quotes.
                 // see https://github.com/airbytehq/airbyte/issues/33900
-                modifiedStatements.add("SET enable_case_sensitive_identifier to TRUE;\n")
+                if (forceCaseSensitiveIdentifier) {
+                    modifiedStatements.add("SET enable_case_sensitive_identifier to TRUE;\n")
+                }
                 modifiedStatements.addAll(transaction)
-                jdbcDatabase.executeWithinTransaction(
-                    modifiedStatements,
-                    logStatements = logStatements
-                )
+                if (modifiedStatements.size != 1) {
+                    jdbcDatabase.executeWithinTransaction(
+                        modifiedStatements,
+                        logStatements = logStatements
+                    )
+                } else {
+                    // Redshift doesn't allow some statements to run in a transaction at all,
+                    // so handle the single-statement case specially.
+                    jdbcDatabase.execute(modifiedStatements.first())
+                }
             } catch (e: SQLException) {
                 log.error(e) { "Sql $queryId-$transactionId failed" }
                 // This is a big hammer for something that should be much more targetted, only when
@@ -155,6 +173,8 @@ class RedshiftDestinationHandler(
         )
     }
 
+    fun query(sql: String): List<JsonNode> = jdbcDatabase.queryJsons(sql)
+
     private fun toJdbcTypeName(airbyteProtocolType: AirbyteProtocolType): String {
         return when (airbyteProtocolType) {
             AirbyteProtocolType.STRING -> "varchar"