Skip to content

Commit

Permalink
MongoDB limit 10k records during Discovery(#8491)
Browse files Browse the repository at this point in the history
Co-authored-by: Marcos Marx <marcosmarxm@gmail.com>
  • Loading branch information
joelluijmes and marcosmarxm committed Dec 7, 2021
1 parent 5ab5cfc commit ad55ab0
Show file tree
Hide file tree
Showing 6 changed files with 8 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sourceDefinitionId": "b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e",
"name": "MongoDb",
"dockerRepository": "airbyte/source-mongodb-v2",
"dockerImageTag": "0.1.8",
"dockerImageTag": "0.1.9",
"documentationUrl": "https://docs.airbyte.io/integrations/sources/mongodb-v2",
"icon": "mongodb.svg"
}
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@
- name: MongoDb
sourceDefinitionId: b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e
dockerRepository: airbyte/source-mongodb-v2
dockerImageTag: 0.1.8
dockerImageTag: 0.1.9
documentationUrl: https://docs.airbyte.io/integrations/sources/mongodb-v2
icon: mongodb.svg
sourceType: database
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3876,7 +3876,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-mongodb-v2:0.1.8"
- dockerImage: "airbyte/source-mongodb-v2:0.1.9"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2"
changelogUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ public class MongoUtils {
private static final String MISSING_TYPE = "missing";
private static final String NULL_TYPE = "null";
private static final String AIRBYTE_SUFFIX = "_aibyte_transform";
private static final int DISCOVER_LIMIT = 10000;

public static JsonSchemaPrimitive getType(final BsonType dataType) {
return switch (dataType) {
Expand Down Expand Up @@ -194,6 +195,7 @@ public static Map<String, BsonType> getUniqueFields(final MongoCollection<Docume

private static List<String> getFieldsName(MongoCollection<Document> collection) {
AggregateIterable<Document> output = collection.aggregate(Arrays.asList(
new Document("$limit", DISCOVER_LIMIT),
new Document("$project", new Document("arrayofkeyvalue", new Document("$objectToArray", "$$ROOT"))),
new Document("$unwind", "$arrayofkeyvalue"),
new Document("$group", new Document("_id", null).append("allkeys", new Document("$addToSet", "$arrayofkeyvalue.k")))));
Expand All @@ -207,6 +209,7 @@ private static List<String> getFieldsName(MongoCollection<Document> collection)
private static ArrayList<String> getTypes(MongoCollection<Document> collection, String name) {
var fieldName = "$" + name;
AggregateIterable<Document> output = collection.aggregate(Arrays.asList(
new Document("$limit", DISCOVER_LIMIT),
new Document("$project", new Document("_id", 0).append("fieldType", new Document("$type", fieldName))),
new Document("$group", new Document("_id", new Document("fieldType", "$fieldType"))
.append("count", new Document("$sum", 1)))));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar

RUN tar xf ${APPLICATION}.tar --strip-components=1

LABEL io.airbyte.version=0.1.8
LABEL io.airbyte.version=0.1.9
LABEL io.airbyte.name=airbyte/source-mongodb-v2
1 change: 1 addition & 0 deletions docs/integrations/sources/mongodb-v2.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ For more information regarding configuration parameters, please see [MongoDb Doc

| Version | Date | Pull Request | Subject |
| :--- | :--- | :--- | :--- |
| 0.1.9 | 2021-12-07 | [8491](https://github.com/airbytehq/airbyte/pull/8491) | Configure 10000 limit doc reading during Discovery step |
| 0.1.8 | 2021-11-29 | [8306](https://github.com/airbytehq/airbyte/pull/8306) | Added milliseconds for date format for cursor |
| 0.1.7 | 2021-11-22 | [8161](https://github.com/airbytehq/airbyte/pull/8161) | Updated Performance and updated cursor for timestamp type |
| 0.1.5 | 2021-11-17 | [8046](https://github.com/airbytehq/airbyte/pull/8046) | Added milliseconds to convert timestamp to datetime format |
Expand Down

0 comments on commit ad55ab0

Please sign in to comment.