From ad55ab01d12e5cea5453abdd861b32c65cd81da3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=ABl=20Luijmes?= Date: Tue, 7 Dec 2021 20:27:40 +0100 Subject: [PATCH] MongoDB limit 10k records during Discovery(#8491) Co-authored-by: Marcos Marx --- .../b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- .../lib/src/main/java/io/airbyte/db/mongodb/MongoUtils.java | 3 +++ airbyte-integrations/connectors/source-mongodb-v2/Dockerfile | 2 +- docs/integrations/sources/mongodb-v2.md | 1 + 6 files changed, 8 insertions(+), 4 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json index f060b0e3bd8c..9ea997bd86f2 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e", "name": "MongoDb", "dockerRepository": "airbyte/source-mongodb-v2", - "dockerImageTag": "0.1.8", + "dockerImageTag": "0.1.9", "documentationUrl": "https://docs.airbyte.io/integrations/sources/mongodb-v2", "icon": "mongodb.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 934e40841a77..c2721c08f642 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -414,7 +414,7 @@ - name: MongoDb sourceDefinitionId: b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e dockerRepository: airbyte/source-mongodb-v2 - dockerImageTag: 0.1.8 + dockerImageTag: 0.1.9 documentationUrl: https://docs.airbyte.io/integrations/sources/mongodb-v2 icon: mongodb.svg sourceType: database diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 070e55fb7bcd..ffea1be8e945 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -3876,7 +3876,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-mongodb-v2:0.1.8" +- dockerImage: "airbyte/source-mongodb-v2:0.1.9" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" changelogUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/mongodb/MongoUtils.java b/airbyte-db/lib/src/main/java/io/airbyte/db/mongodb/MongoUtils.java index 20beb834522d..9597f0308df6 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/mongodb/MongoUtils.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/mongodb/MongoUtils.java @@ -49,6 +49,7 @@ public class MongoUtils { private static final String MISSING_TYPE = "missing"; private static final String NULL_TYPE = "null"; private static final String AIRBYTE_SUFFIX = "_aibyte_transform"; + private static final int DISCOVER_LIMIT = 10000; public static JsonSchemaPrimitive getType(final BsonType dataType) { return switch (dataType) { @@ -194,6 +195,7 @@ public static Map getUniqueFields(final MongoCollection getFieldsName(MongoCollection collection) { AggregateIterable output = collection.aggregate(Arrays.asList( + new Document("$limit", DISCOVER_LIMIT), new Document("$project", new Document("arrayofkeyvalue", new Document("$objectToArray", "$$ROOT"))), new Document("$unwind", "$arrayofkeyvalue"), new Document("$group", new Document("_id", null).append("allkeys", new Document("$addToSet", "$arrayofkeyvalue.k"))))); @@ -207,6 +209,7 @@ private static List getFieldsName(MongoCollection collection) private static ArrayList getTypes(MongoCollection collection, String name) { var fieldName = "$" + name; AggregateIterable output = collection.aggregate(Arrays.asList( + new Document("$limit", DISCOVER_LIMIT), new Document("$project", new Document("_id", 0).append("fieldType", new Document("$type", fieldName))), new Document("$group", new Document("_id", new Document("fieldType", "$fieldType")) .append("count", new Document("$sum", 1))))); diff --git a/airbyte-integrations/connectors/source-mongodb-v2/Dockerfile b/airbyte-integrations/connectors/source-mongodb-v2/Dockerfile index caa82e4ab49a..a76b9b06e95e 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2/Dockerfile +++ b/airbyte-integrations/connectors/source-mongodb-v2/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.8 +LABEL io.airbyte.version=0.1.9 LABEL io.airbyte.name=airbyte/source-mongodb-v2 diff --git a/docs/integrations/sources/mongodb-v2.md b/docs/integrations/sources/mongodb-v2.md index b7c2b658054f..9345e4475256 100644 --- a/docs/integrations/sources/mongodb-v2.md +++ b/docs/integrations/sources/mongodb-v2.md @@ -102,6 +102,7 @@ For more information regarding configuration parameters, please see [MongoDb Doc | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.9 | 2021-12-07 | [8491](https://github.com/airbytehq/airbyte/pull/8491) | Configure 10000 limit doc reading during Discovery step | | 0.1.8 | 2021-11-29 | [8306](https://github.com/airbytehq/airbyte/pull/8306) | Added milliseconds for date format for cursor | | 0.1.7 | 2021-11-22 | [8161](https://github.com/airbytehq/airbyte/pull/8161) | Updated Performance and updated cursor for timestamp type | | 0.1.5 | 2021-11-17 | [8046](https://github.com/airbytehq/airbyte/pull/8046) | Added milliseconds to convert timestamp to datetime format |