Skip to content

Commit

Permalink
Merge pull request #1 from devoteamgcloud/dv
Browse files Browse the repository at this point in the history
New assertion type
  • Loading branch information
hrialan committed Mar 8, 2024
2 parents 1c47146 + 231d3ec commit 7a9b9bd
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 6 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Dataform Assertions

Unlock advanced data testing capabilities with this Dataform package, offering a comprehensive and common suite of assertions designed for testing various facets of your warehouse data, including data freshness, unique keys, row conditions, and data completeness.
Unlock advanced data testing capabilities with this Dataform package, offering a comprehensive and common suite of assertions designed for testing various facets of your warehouse data, including data freshness, unique keys, row conditions, data completeness and referential integrity.

Your contributions are highly encouraged – whether you have an innovative assertion idea or wish to enhance the existing ones, feel free to open an issue or submit a pull request to enrich the Dataform community.

Expand Down Expand Up @@ -72,6 +72,7 @@ This package includes the following types of assertions:
- **Unique key conditions**: Check if a given primary key (can be a set of columns) is not duplicated in a table.
- **Data freshness conditions**: Check if the data in a table is fresh enough given some conditions.
- **Data completeness conditions**: Check if the data in a column have less than a given percentage of null values.
- **Referential integrity conditions**: Check if foreign key relationships are maintained between tables.

## Warning

Expand Down
14 changes: 14 additions & 0 deletions definitions/example.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const commonAssertionsResult = commonAssertions({
"tags": ["assertions"],
// Sometimes data quality is not good in some environments,
// assertions can be disabled in those environments.
// Set the 'dataform.projectConfig.vars.env' var in 'dataform.json' for this to work.
// "disabledInEnvs": ["dv", "qa"]
},
rowConditions: {
Expand Down Expand Up @@ -44,6 +45,19 @@ const commonAssertionsResult = commonAssertions({
"second_table": {
"id": 30
}
},
referentialIntegrityConditions: {
"first_table": [{
"parentKey": "id",
"childTable": "second_table",
"childKey": "id"
},
{
"parentKey": "id",
"childTable": "third_table",
"childKey": "parent_id"
}
]
}
});

Expand Down
4 changes: 4 additions & 0 deletions definitions/first_table.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ config {
type: "table"
}

SELECT
1 AS id,
CURRENT_DATE() AS updated_date
UNION ALL
SELECT
1 AS id,
CURRENT_DATE() AS updated_date
Expand Down
11 changes: 11 additions & 0 deletions definitions/third_table.sqlx
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
config {
type: "table"
}

SELECT
1 AS parent_id,
CURRENT_DATE() AS updated_date
UNION ALL
SELECT
2 AS parent_id,
CURRENT_DATE() AS updated_date
56 changes: 56 additions & 0 deletions includes/referential_integrity_assertions.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/**
* referential_integrity_assertions.js
*
* This file contains a function to create referential integrity assertions for specific tables in a database.
* The assertions are used to check if the foreign key relationships are maintained between tables.
* The conditions for referential integrity checks are defined in an object format:
* { parentTable: [{ parentKey, childTable, childKey }, ...], ... }
*
* The function `createReferentialIntegrityAssertions` takes in global parameters and the referential integrity conditions.
*/

/**
* @param {Object} globalParams - See index.js for details.
* @param {Object} parentTable - The name of the parent table in the foreign key relationship.
* @param {Object} parentKey - The name of the column in the parent table that is the primary key.
* @param {Object} childTable - The name of the child table in the foreign key relationship.
* @param {Object} childKey - The name of the column in the child table that is the foreign key.
*/

const assertions = [];

const createReferentialIntegrityAssertion = (globalParams, parentTable, parentKey, childTable, childKey) => {

const assertion = assert(`assert_referential_integrity_${parentTable}_${childTable}`)
.database(globalParams.database)
.schema(globalParams.schema)
.description(`Check referential integrity for ${childTable}.${childKey} referencing ${parentTable}.${parentKey}`)
.tags("assert-referential-integrity")
.query(ctx => `
SELECT pt.${parentKey}
FROM ${ctx.ref(parentTable)} AS pt
LEFT JOIN ${ctx.ref(childTable)} AS t ON t.${childKey} = pt.${parentKey}
WHERE t.${childKey} IS NULL
`);

(globalParams.tags && globalParams.tags.forEach((tag) => assertion.tags(tag)));

(globalParams.disabledInEnvs && globalParams.disabledInEnvs.includes(dataform.projectConfig.vars.env)) && assertion.disabled();

assertions.push(assertion);
};

module.exports = (globalParams, referentialIntegrityConditions) => {
for (let parentTable in referentialIntegrityConditions) {
const relationships = referentialIntegrityConditions[parentTable];

relationships.forEach(({
parentKey,
childTable,
childKey
}) => {
createReferentialIntegrityAssertion(globalParams, parentTable, parentKey, childTable, childKey);
})
}
return assertions;
};
9 changes: 7 additions & 2 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@
* @property {Object} uniqueKeyConditions - An object mapping table names to unique key conditions. Format: { tableName: [column1, column2, ...], ... }
* @property {Object} dataFreshnessConditions - An object mapping table names to data freshness conditions. Format: { tableName: { delayCondition, timeUnit, dateColumn }, ... }
* @property {Object} dataCompletenessConditions - An object mapping table names to data completeness conditions. Format: { tableName: { columnName: allowedPercentageNull, ... }, ... }
* @property {Object} referentialIntegrityConditions - An object mapping parent table names to referential integrity conditions. Format: { parentTable: [{ parentKey, childTable, childKey }, ...], ... }
*/

const row_condition_assertions = require("./includes/row_condition_assertions");
const unique_key_assertions = require("./includes/unique_key_assertions");
const data_freshness_assertions = require("./includes/data_freshness_assertions");
const data_completeness_assertions = require("./includes/data_completeness_assertions");
const referential_integrity_assertions = require("./includes/referential_integrity_assertions");

module.exports = ({
globalAssertionsParams = {
Expand All @@ -35,17 +37,20 @@ module.exports = ({
rowConditions = {},
uniqueKeyConditions = {},
dataFreshnessConditions = {},
dataCompletenessConditions = {}
dataCompletenessConditions = {},
referentialIntegrityConditions = {}
}) => {
const rowConditionAssertionsResult = row_condition_assertions(globalAssertionsParams, rowConditions);
const uniqueKeyAssertionsResult = unique_key_assertions(globalAssertionsParams, uniqueKeyConditions);
const dataFreshnessAssertionsResult = data_freshness_assertions(globalAssertionsParams, dataFreshnessConditions);
const dataCompletenessAssertionsResult = data_completeness_assertions(globalAssertionsParams, dataCompletenessConditions);
const referentialIntegrityAssertionsResult = referential_integrity_assertions(globalAssertionsParams, referentialIntegrityConditions); // New assertion

return {
rowConditionAssertions: rowConditionAssertionsResult,
uniqueKeyAssertions: uniqueKeyAssertionsResult,
dataFreshnessAssertions: dataFreshnessAssertionsResult,
dataCompletenessAssertions: dataCompletenessAssertionsResult
dataCompletenessAssertions: dataCompletenessAssertionsResult,
referentialIntegrityAssertions: referentialIntegrityAssertionsResult
};
}
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@devoteamgcloud/dataform-assertions",
"version": "1.0.2",
"version": "1.1.0",
"repository": {
"type": "git",
"url": "https://github.com/devoteamgcloud/dataform-assertions.git"
Expand Down

0 comments on commit 7a9b9bd

Please sign in to comment.