diff --git a/README.md b/README.md index 7d2dc34..ce5afc1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Dataform Assertions -Unlock advanced data testing capabilities with this Dataform package, offering a comprehensive and common suite of assertions designed for testing various facets of your warehouse data, including data freshness, unique keys, row conditions, and data completeness. +Unlock advanced data testing capabilities with this Dataform package, offering a comprehensive and common suite of assertions designed for testing various facets of your warehouse data, including data freshness, unique keys, row conditions, data completeness and referential integrity. Your contributions are highly encouraged – whether you have an innovative assertion idea or wish to enhance the existing ones, feel free to open an issue or submit a pull request to enrich the Dataform community. @@ -72,6 +72,7 @@ This package includes the following types of assertions: - **Unique key conditions**: Check if a given primary key (can be a set of columns) is not duplicated in a table. - **Data freshness conditions**: Check if the data in a table is fresh enough given some conditions. - **Data completeness conditions**: Check if the data in a column have less than a given percentage of null values. +- **Referential integrity conditions**: Check if foreign key relationships are maintained between tables. ## Warning diff --git a/definitions/example.js b/definitions/example.js index 5f87063..b682cd6 100644 --- a/definitions/example.js +++ b/definitions/example.js @@ -8,6 +8,7 @@ const commonAssertionsResult = commonAssertions({ "tags": ["assertions"], // Sometimes data quality is not good in some environments, // assertions can be disabled in those environments. + // Set the 'dataform.projectConfig.vars.env' var in 'dataform.json' for this to work. // "disabledInEnvs": ["dv", "qa"] }, rowConditions: { @@ -44,6 +45,19 @@ const commonAssertionsResult = commonAssertions({ "second_table": { "id": 30 } + }, + referentialIntegrityConditions: { + "first_table": [{ + "parentKey": "id", + "childTable": "second_table", + "childKey": "id" + }, + { + "parentKey": "id", + "childTable": "third_table", + "childKey": "parent_id" + } + ] } }); diff --git a/definitions/first_table.sqlx b/definitions/first_table.sqlx index 79da128..295a510 100644 --- a/definitions/first_table.sqlx +++ b/definitions/first_table.sqlx @@ -2,6 +2,10 @@ config { type: "table" } +SELECT + 1 AS id, + CURRENT_DATE() AS updated_date +UNION ALL SELECT 1 AS id, CURRENT_DATE() AS updated_date diff --git a/definitions/third_table.sqlx b/definitions/third_table.sqlx new file mode 100644 index 0000000..222b13d --- /dev/null +++ b/definitions/third_table.sqlx @@ -0,0 +1,11 @@ +config { + type: "table" +} + +SELECT + 1 AS parent_id, + CURRENT_DATE() AS updated_date +UNION ALL +SELECT + 2 AS parent_id, + CURRENT_DATE() AS updated_date diff --git a/includes/referential_integrity_assertions.js b/includes/referential_integrity_assertions.js new file mode 100644 index 0000000..4659039 --- /dev/null +++ b/includes/referential_integrity_assertions.js @@ -0,0 +1,56 @@ +/** + * referential_integrity_assertions.js + * + * This file contains a function to create referential integrity assertions for specific tables in a database. + * The assertions are used to check if the foreign key relationships are maintained between tables. + * The conditions for referential integrity checks are defined in an object format: + * { parentTable: [{ parentKey, childTable, childKey }, ...], ... } + * + * The function `createReferentialIntegrityAssertions` takes in global parameters and the referential integrity conditions. + */ + +/** + * @param {Object} globalParams - See index.js for details. + * @param {Object} parentTable - The name of the parent table in the foreign key relationship. + * @param {Object} parentKey - The name of the column in the parent table that is the primary key. + * @param {Object} childTable - The name of the child table in the foreign key relationship. + * @param {Object} childKey - The name of the column in the child table that is the foreign key. + */ + +const assertions = []; + +const createReferentialIntegrityAssertion = (globalParams, parentTable, parentKey, childTable, childKey) => { + + const assertion = assert(`assert_referential_integrity_${parentTable}_${childTable}`) + .database(globalParams.database) + .schema(globalParams.schema) + .description(`Check referential integrity for ${childTable}.${childKey} referencing ${parentTable}.${parentKey}`) + .tags("assert-referential-integrity") + .query(ctx => ` + SELECT pt.${parentKey} + FROM ${ctx.ref(parentTable)} AS pt + LEFT JOIN ${ctx.ref(childTable)} AS t ON t.${childKey} = pt.${parentKey} + WHERE t.${childKey} IS NULL + `); + + (globalParams.tags && globalParams.tags.forEach((tag) => assertion.tags(tag))); + + (globalParams.disabledInEnvs && globalParams.disabledInEnvs.includes(dataform.projectConfig.vars.env)) && assertion.disabled(); + + assertions.push(assertion); +}; + +module.exports = (globalParams, referentialIntegrityConditions) => { + for (let parentTable in referentialIntegrityConditions) { + const relationships = referentialIntegrityConditions[parentTable]; + + relationships.forEach(({ + parentKey, + childTable, + childKey + }) => { + createReferentialIntegrityAssertion(globalParams, parentTable, parentKey, childTable, childKey); + }) + } + return assertions; +}; diff --git a/index.js b/index.js index f5ba9b2..62ede55 100644 --- a/index.js +++ b/index.js @@ -17,12 +17,14 @@ * @property {Object} uniqueKeyConditions - An object mapping table names to unique key conditions. Format: { tableName: [column1, column2, ...], ... } * @property {Object} dataFreshnessConditions - An object mapping table names to data freshness conditions. Format: { tableName: { delayCondition, timeUnit, dateColumn }, ... } * @property {Object} dataCompletenessConditions - An object mapping table names to data completeness conditions. Format: { tableName: { columnName: allowedPercentageNull, ... }, ... } + * @property {Object} referentialIntegrityConditions - An object mapping parent table names to referential integrity conditions. Format: { parentTable: [{ parentKey, childTable, childKey }, ...], ... } */ const row_condition_assertions = require("./includes/row_condition_assertions"); const unique_key_assertions = require("./includes/unique_key_assertions"); const data_freshness_assertions = require("./includes/data_freshness_assertions"); const data_completeness_assertions = require("./includes/data_completeness_assertions"); +const referential_integrity_assertions = require("./includes/referential_integrity_assertions"); module.exports = ({ globalAssertionsParams = { @@ -35,17 +37,20 @@ module.exports = ({ rowConditions = {}, uniqueKeyConditions = {}, dataFreshnessConditions = {}, - dataCompletenessConditions = {} + dataCompletenessConditions = {}, + referentialIntegrityConditions = {} }) => { const rowConditionAssertionsResult = row_condition_assertions(globalAssertionsParams, rowConditions); const uniqueKeyAssertionsResult = unique_key_assertions(globalAssertionsParams, uniqueKeyConditions); const dataFreshnessAssertionsResult = data_freshness_assertions(globalAssertionsParams, dataFreshnessConditions); const dataCompletenessAssertionsResult = data_completeness_assertions(globalAssertionsParams, dataCompletenessConditions); + const referentialIntegrityAssertionsResult = referential_integrity_assertions(globalAssertionsParams, referentialIntegrityConditions); // New assertion return { rowConditionAssertions: rowConditionAssertionsResult, uniqueKeyAssertions: uniqueKeyAssertionsResult, dataFreshnessAssertions: dataFreshnessAssertionsResult, - dataCompletenessAssertions: dataCompletenessAssertionsResult + dataCompletenessAssertions: dataCompletenessAssertionsResult, + referentialIntegrityAssertions: referentialIntegrityAssertionsResult }; } diff --git a/package-lock.json b/package-lock.json index 81ffe4a..793decc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@devoteamgcloud/dataform-assertions", - "version": "1.0.2", + "version": "1.1.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@devoteamgcloud/dataform-assertions", - "version": "1.0.2", + "version": "1.1.0", "dependencies": { "@dataform/core": "2.9.0" } diff --git a/package.json b/package.json index a99907b..0977955 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@devoteamgcloud/dataform-assertions", - "version": "1.0.2", + "version": "1.1.0", "repository": { "type": "git", "url": "https://github.com/devoteamgcloud/dataform-assertions.git"