Skip to content

Commit

Permalink
[source-mongodb] record count in state & initial iterator refactor (#…
Browse files Browse the repository at this point in the history
…35669)

Signed-off-by: Artem Inzhyyants <artem.inzhyyants@gmail.com>
Signed-off-by: Gireesh Sreepathi <gisripa@gmail.com>
Co-authored-by: Augustin <augustin@airbyte.io>
Co-authored-by: Subodh Kant Chaturvedi <subodh1810@gmail.com>
Co-authored-by: Anatolii Yatsuk <35109939+tolik0@users.noreply.github.com>
Co-authored-by: Baz <oleksandr.bazarnov@globallogic.com>
Co-authored-by: Artem Inzhyyants <36314070+artem1205@users.noreply.github.com>
Co-authored-by: Aaron ("AJ") Steers <aj@airbyte.io>
Co-authored-by: Tim Roes <tim@airbyte.io>
Co-authored-by: benmoriceau <benmoriceau@users.noreply.github.com>
Co-authored-by: Gireesh Sreepathi <gisripa@gmail.com>
Co-authored-by: Marius Posta <marius@airbyte.io>
Co-authored-by: Evan Tahler <evan@airbyte.io>
Co-authored-by: Edward Gao <edward.gao@airbyte.io>
Co-authored-by: Anton Karpets <anton.karpets@globallogic.com>
Co-authored-by: Patrick Nilan <nilan.patrick@gmail.com>
Co-authored-by: Akash Kulkarni <113392464+akashkulk@users.noreply.github.com>
Co-authored-by: Tyler B <104733644+tybernstein@users.noreply.github.com>
Co-authored-by: bgroff <bgroff@users.noreply.github.com>
Co-authored-by: mjgatz <86885812+mjgatz@users.noreply.github.com>
Co-authored-by: mgreene <michael.greene@gravie.com>
Co-authored-by: Serhii Lazebnyi <serhii.lazebnyi@globallogic.com>
Co-authored-by: Serhii Lazebnyi <53845333+lazebnyi@users.noreply.github.com>
Co-authored-by: Rodi Reich Zilberman <867491+rodireich@users.noreply.github.com>
Co-authored-by: Daryna Ishchenko <80129833+darynaishchenko@users.noreply.github.com>
Co-authored-by: Stephane Geneix <147216312+stephane-airbyte@users.noreply.github.com>
Co-authored-by: Joe Reuter <joe@airbyte.io>
Co-authored-by: Marcos Marx <marcosmarxm@users.noreply.github.com>
Co-authored-by: Maxime Carbonneau-Leclerc <3360483+maxi297@users.noreply.github.com>
Co-authored-by: Akash Kulkarni <akash@airbyte.io>
Co-authored-by: Roman Yermilov [GL] <86300758+roman-yermilov-gl@users.noreply.github.com>
Co-authored-by: Alexandre Girard <alexandre@airbyte.io>
Co-authored-by: girarda <girarda@users.noreply.github.com>
Co-authored-by: Brian Lai <51336873+brianjlai@users.noreply.github.com>
Co-authored-by: brianjlai <brianjlai@users.noreply.github.com>
Co-authored-by: Catherine Noll <clnoll@users.noreply.github.com>
Co-authored-by: midavadim <midavadim@yahoo.com>
Co-authored-by: Julien COUTAND <julien.coutand@gmail.com>
Co-authored-by: Christo Grabowski <108154848+ChristoGrab@users.noreply.github.com>
Co-authored-by: maxi297 <maxime@airbyte.io>
Co-authored-by: Bindi Pankhudi <bindi@airbyte.io>
Co-authored-by: Bindi Pankhudi <bindi@airbyte.com>
Co-authored-by: Ben Drucker <bvdrucker@gmail.com>
Co-authored-by: TornadoContre <37258495+TornadoContre@users.noreply.github.com>
Co-authored-by: Natik Gadzhi <natik@respawn.io>
Co-authored-by: Thomas Dippel <dipth@users.noreply.github.com>
Co-authored-by: marcosmarxm <marcosmarxm@gmail.com>
Co-authored-by: Alex Birdsall <ambirdsall@gmail.com>
Co-authored-by: ambirdsall <ambirdsall@users.noreply.github.com>
Co-authored-by: Jose Gerardo Pineda <jose.pineda@airbyte.io>
Co-authored-by: alafanechere <augustin.lafanechere@gmail.com>
Co-authored-by: Pedro S. Lopez <pedroslopez@me.com>
Co-authored-by: Ella Rohm-Ensing <erohmensing@gmail.com>
Co-authored-by: Siarhei Ivanou <sinusu@gmail.com>
Co-authored-by: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Co-authored-by: Ryan Waskewich <156025126+rwask@users.noreply.github.com>
Co-authored-by: Sajarin <sajarindider@gmail.com>
Co-authored-by: artem1205 <artem1205@users.noreply.github.com>
Co-authored-by: perangel <perangel@gmail.com>
Co-authored-by: Joe Bell <joseph.bell@airbyte.io>
Co-authored-by: Obioma Anomnachi <onanomnachi@gmail.com>
Co-authored-by: maxi297 <maxi297@users.noreply.github.com>
Co-authored-by: SatishChGit <satishchinthanippu@gmail.com>
Co-authored-by: Brian Leonard <brian@bleonard.com>
Co-authored-by: David Wallace <dwallace0723@gmail.com>
Co-authored-by: pmossman <pmossman@users.noreply.github.com>
Co-authored-by: Stephane Geneix <stephane@airbyte.io>
Co-authored-by: Alexandre Cuoci <Hesperide@users.noreply.github.com>
Co-authored-by: Danny Tiesling <tiesling@gmail.com>
  • Loading branch information
Show file tree
Hide file tree
Showing 14 changed files with 259 additions and 317 deletions.
Expand Up @@ -4,7 +4,7 @@ plugins {
}

airbyteJavaConnector {
cdkVersionRequired = '0.20.6'
cdkVersionRequired = '0.23.8'
features = ['db-sources', 'datastore-mongo']
useLocalCdk = false
}
Expand Down
Expand Up @@ -5,7 +5,7 @@ data:
connectorSubtype: database
connectorType: source
definitionId: b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e
dockerImageTag: 1.2.15
dockerImageTag: 1.2.16
dockerRepository: airbyte/source-mongodb-v2
documentationUrl: https://docs.airbyte.com/integrations/sources/mongodb-v2
githubIssueLabel: source-mongodb-v2
Expand Down
Expand Up @@ -11,18 +11,18 @@
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.Projections;
import com.mongodb.client.model.Sorts;
import io.airbyte.cdk.integrations.source.relationaldb.state.SourceStateIterator;
import io.airbyte.cdk.integrations.source.relationaldb.state.StateEmitFrequency;
import io.airbyte.commons.exceptions.ConfigErrorException;
import io.airbyte.commons.util.AutoCloseableIterator;
import io.airbyte.commons.util.AutoCloseableIterators;
import io.airbyte.integrations.source.mongodb.cdc.MongoDbCdcConnectorMetadataInjector;
import io.airbyte.integrations.source.mongodb.state.IdType;
import io.airbyte.integrations.source.mongodb.state.MongoDbStateManager;
import io.airbyte.integrations.source.mongodb.state.MongoDbStreamState;
import io.airbyte.protocol.models.v0.AirbyteMessage;
import io.airbyte.protocol.models.v0.CatalogHelpers;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
import io.airbyte.protocol.models.v0.SyncMode;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
Expand Down Expand Up @@ -53,8 +53,6 @@ public List<AutoCloseableIterator<AirbyteMessage>> getIterators(
final List<ConfiguredAirbyteStream> streams,
final MongoDbStateManager stateManager,
final MongoDatabase database,
final MongoDbCdcConnectorMetadataInjector cdcConnectorMetadataInjector,
final Instant emittedAt,
final int checkpointInterval,
final boolean isEnforceSchema) {
return streams
Expand Down Expand Up @@ -113,10 +111,9 @@ public List<AutoCloseableIterator<AirbyteMessage>> getIterators(
.sort(Sorts.ascending(MongoConstants.ID_FIELD))
.allowDiskUse(true)
.cursor();

final var stateIterator =
new MongoDbStateIterator(cursor, stateManager, Optional.ofNullable(cdcConnectorMetadataInjector),
airbyteStream, emittedAt, checkpointInterval, MongoConstants.CHECKPOINT_DURATION, isEnforceSchema);
new SourceStateIterator<>(cursor, airbyteStream, stateManager, new StateEmitFrequency(checkpointInterval,
MongoConstants.CHECKPOINT_DURATION));
return AutoCloseableIterators.fromIterator(stateIterator, cursor::close, null);
})
.toList();
Expand Down
Expand Up @@ -122,8 +122,9 @@ public AutoCloseableIterator<AirbyteMessage> read(final JsonNode config,
final JsonNode state) {
final var emittedAt = Instant.now();
final var cdcMetadataInjector = MongoDbCdcConnectorMetadataInjector.getInstance(emittedAt);
final var stateManager = MongoDbStateManager.createStateManager(state);
final MongoDbSourceConfig sourceConfig = new MongoDbSourceConfig(config);
final var stateManager = MongoDbStateManager.createStateManager(state, sourceConfig);

if (catalog != null) {
MongoUtil.checkSchemaModeMismatch(sourceConfig.getEnforceSchema(),
stateManager.getCdcState() != null ? stateManager.getCdcState().schema_enforced() : sourceConfig.getEnforceSchema(), catalog);
Expand Down

This file was deleted.

Expand Up @@ -139,8 +139,8 @@ public List<AutoCloseableIterator<AirbyteMessage>> createCdcIterators(
MongoDbCdcInitialSnapshotUtils.getStreamsForInitialSnapshot(mongoClient, stateManager, catalog, savedOffsetIsValid);
final InitialSnapshotHandler initialSnapshotHandler = new InitialSnapshotHandler();
final List<AutoCloseableIterator<AirbyteMessage>> initialSnapshotIterators =
initialSnapshotHandler.getIterators(initialSnapshotStreams, stateManager, mongoClient.getDatabase(databaseName), cdcMetadataInjector,
emittedAt, config.getCheckpointInterval(), isEnforceSchema);
initialSnapshotHandler.getIterators(initialSnapshotStreams, stateManager, mongoClient.getDatabase(databaseName),
config.getCheckpointInterval(), isEnforceSchema);

final AirbyteDebeziumHandler<BsonTimestamp> handler = new AirbyteDebeziumHandler<>(config.getDatabaseConfig(),
new MongoDbCdcTargetPosition(initialResumeToken), false, firstRecordWaitTime, subsequentRecordWaitTime, queueSize, false);
Expand Down

0 comments on commit d5e91ae

Please sign in to comment.