From 8e5ec82c1634dff1fa6de9ab0c73c17771add9d1 Mon Sep 17 00:00:00 2001 From: Elias Kassell Date: Wed, 4 Jan 2023 09:05:51 +0000 Subject: [PATCH 1/4] Remove sql package usage --- environments.json | 9 --- includes/sessionized_events.js | 4 +- includes/sessions.js | 4 +- includes/sql.js | 121 +++++++++++++++++++++++++++++++++ includes/user_map.js | 2 +- includes/users.js | 2 +- 6 files changed, 127 insertions(+), 15 deletions(-) delete mode 100644 environments.json create mode 100644 includes/sql.js diff --git a/environments.json b/environments.json deleted file mode 100644 index b825e2f..0000000 --- a/environments.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "environments": [ - { - "name": "production", - "configOverride": {}, - "gitRef": "master" - } - ] -} diff --git a/includes/sessionized_events.js b/includes/sessionized_events.js index 5b8c818..00c2b3f 100644 --- a/includes/sessionized_events.js +++ b/includes/sessionized_events.js @@ -1,4 +1,4 @@ -const sql = require("@dataform/sql")(); +const sql = require("./sql") const segmentCommon = require("./common"); module.exports = (params) => { @@ -42,7 +42,7 @@ select *, coalesce( ( - ${sql.timestamps.diff(`millisecond`, + ${sql.timestampDiff(`millisecond`, sql.windowFunction( "lag", "timestamp", diff --git a/includes/sessions.js b/includes/sessions.js index f4caf96..39f207c 100644 --- a/includes/sessions.js +++ b/includes/sessions.js @@ -1,5 +1,5 @@ const segmentCommon = require("./common"); -const sql = require("@dataform/sql")(); +const sql = require("./sql") module.exports = (params) => { @@ -92,7 +92,7 @@ select ${ctx.when(global.dataform.projectConfig.warehouse == "bigquery", `struct(\n `)} ${segmentCommon.enabledEvents(params).map((event) => `count(segment_sessionized_events.${event}_id) as total_${event}s`).join(`,\n `)}, - ${sql.timestamps.diff("millisecond", "min(segment_sessionized_events.timestamp)", "max(segment_sessionized_events.timestamp)")} as duration_millis + ${sql.timestampDiff("millisecond", "min(segment_sessionized_events.timestamp)", "max(segment_sessionized_events.timestamp)")} as duration_millis ${ctx.when(global.dataform.projectConfig.warehouse == "bigquery", `) as stats`)} -- first values in the session for page fields diff --git a/includes/sql.js b/includes/sql.js new file mode 100644 index 0000000..a513758 --- /dev/null +++ b/includes/sql.js @@ -0,0 +1,121 @@ +const getDialect = () => { + const dataformWarehouse = global.dataform.projectConfig.warehouse; + if (!dataformWarehouse) { + return "standard"; + } + return { + bigquery: "standard", + redshift: "redshift", + postgres: "postgres", + snowflake: "snowflake", + sqldatawarehouse: "mssql", + }[dataformWarehouse]; +}; + +const timestampDiff = (datePart, start, end) => { + const dialect = getDialect(); + if (dialect === "snowflake" || dialect === "redshift") { + return `datediff(${datePart}, ${start}, ${end})`; + } + return `timestamp_diff(${end}, ${start}, ${datePart})`; +}; + +const windowFunction = ( + name, + value, + ignoreNulls = false, + windowSpecification +) => { + const dialect = getDialect(); + const partitionFieldsAsString = windowSpecification.partitionFields + ? [...windowSpecification.partitionFields].join(`, `) + : ""; + const orderFieldsAsString = windowSpecification.orderFields + ? [...windowSpecification.orderFields].join(`, `) + : ""; + + if ( + dialect === "standard" || + dialect === "mssql" || + dialect === "snowflake" + ) { + return `${name}(${value} ${ignoreNulls ? `ignore nulls` : ``}) over (${ + windowSpecification.partitionFields + ? `partition by ${partitionFieldsAsString}` + : `` + } ${ + windowSpecification.orderFields ? `order by ${orderFieldsAsString}` : `` + } ${ + windowSpecification.frameClause ? windowSpecification.frameClause : `` + })`; + } + + // For some window functions in Redshift, a frame clause is always required + const requiresFrame = [ + "avg", + "count", + "first_value", + "last_value", + "max", + "min", + "nth_value", + "stddev_samp", + "stddev_pop", + "stddev", + "sum", + "variance", + "var_samp", + "var_pop", + ].includes(name.toLowerCase()); + + if (dialect === "redshift") { + return `${name}(${value} ${ignoreNulls ? `ignore nulls` : ``}) over (${ + windowSpecification.partitionFields + ? `partition by ${partitionFieldsAsString}` + : `` + } ${ + windowSpecification.orderFields ? `order by ${orderFieldsAsString}` : `` + } ${ + windowSpecification.orderFields + ? windowSpecification.frameClause + ? windowSpecification.frameClause + : requiresFrame + ? `rows between unbounded preceding and unbounded following` + : `` + : `` + })`; + } + + if (dialect === "postgres") { + return `${name}(${value}) over (${ + windowSpecification.partitionFields + ? `partition by ${partitionFieldsAsString}` + : `` + } ${windowSpecification.orderFields || ignoreNulls ? `order by` : ``} ${ + ignoreNulls ? `case when ${value} is not null then 0 else 1 end asc` : `` + } ${orderFieldsAsString && ignoreNulls ? `,` : ``} ${orderFieldsAsString} ${ + windowSpecification.orderFields + ? windowSpecification.frameClause + ? windowSpecification.frameClause + : requiresFrame + ? `rows between unbounded preceding and unbounded following` + : `` + : `` + })`; + } +}; + +const surrogateKey = (columnNames) => { + const dialect = getDialect(); + const columnsAsStrings = columnNames.map((id) => this.asString(id)).join(`,`); + if (dialect === "standard") { + return this.asString(`farm_fingerprint(concat(${columnsAsStrings}))`); + } + return this.asString(`md5(concat(${columnsAsStrings}))`); +}; + +module.exports = { + timestampDiff, + windowFunction, + surrogateKey, +}; diff --git a/includes/user_map.js b/includes/user_map.js index d45f6f4..20432b6 100644 --- a/includes/user_map.js +++ b/includes/user_map.js @@ -1,4 +1,4 @@ -const sql = require("@dataform/sql")(); +const sql = require("./sql") const segmentCommon = require("./common"); module.exports = (params) => { diff --git a/includes/users.js b/includes/users.js index 0ee1539..583a08b 100644 --- a/includes/users.js +++ b/includes/users.js @@ -1,4 +1,4 @@ -const sql = require("@dataform/sql")(); +const sql = require("./sql") let USER = `coalesce( identifies.user_id, From 370d1c4170af86bda6e267c5e054fe542d339cda Mon Sep 17 00:00:00 2001 From: Elias Kassell Date: Wed, 4 Jan 2023 09:07:35 +0000 Subject: [PATCH 2/4] npm uninstall --- package-lock.json | 139 ++++++++++++++++++++++++++++++++++++++++++++-- package.json | 3 +- 2 files changed, 134 insertions(+), 8 deletions(-) diff --git a/package-lock.json b/package-lock.json index 6c59241..62d466d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,138 @@ { + "name": "dataform-segment", + "lockfileVersion": 2, "requires": true, - "lockfileVersion": 1, + "packages": { + "": { + "name": "dataform-segment", + "dependencies": { + "@dataform/core": "1.15.5" + } + }, + "node_modules/@dataform/core": { + "version": "1.15.5", + "resolved": "https://registry.npmjs.org/@dataform/core/-/core-1.15.5.tgz", + "integrity": "sha512-Tn9v1Voxojgr9lEb738+0+BFfY3mx7XDC09nRgMmxYrErtmdrJcTKzsVPDZad3r7axyaZafSrd7rRo9aGsNnow==", + "dependencies": { + "moo": "^0.5.0", + "protobufjs": "6.8.8", + "semver": "^5.6.0", + "tarjan-graph": "^2.0.0" + } + }, + "node_modules/@protobufjs/aspromise": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", + "integrity": "sha1-m4sMxmPWaafY9vXQiToU00jzD78=" + }, + "node_modules/@protobufjs/base64": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", + "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==" + }, + "node_modules/@protobufjs/codegen": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz", + "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==" + }, + "node_modules/@protobufjs/eventemitter": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", + "integrity": "sha1-NVy8mLr61ZePntCV85diHx0Ga3A=" + }, + "node_modules/@protobufjs/fetch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", + "integrity": "sha1-upn7WYYUr2VwDBYZ/wbUVLDYTEU=", + "dependencies": { + "@protobufjs/aspromise": "^1.1.1", + "@protobufjs/inquire": "^1.1.0" + } + }, + "node_modules/@protobufjs/float": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", + "integrity": "sha1-Xp4avctz/Ap8uLKR33jIy9l7h9E=" + }, + "node_modules/@protobufjs/inquire": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz", + "integrity": "sha1-/yAOPnzyQp4tyvwRQIKOjMY48Ik=" + }, + "node_modules/@protobufjs/path": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", + "integrity": "sha1-bMKyDFya1q0NzP0hynZz2Nf79o0=" + }, + "node_modules/@protobufjs/pool": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", + "integrity": "sha1-Cf0V8tbTq/qbZbw2ZQbWrXhG/1Q=" + }, + "node_modules/@protobufjs/utf8": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", + "integrity": "sha1-p3c2C1s5oaLlEG+OhY8v0tBgxXA=" + }, + "node_modules/@types/long": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.1.tgz", + "integrity": "sha512-5tXH6Bx/kNGd3MgffdmP4dy2Z+G4eaXw0SE81Tq3BNadtnMR5/ySMzX4SLEzHJzSmPNn4HIdpQsBvXMUykr58w==" + }, + "node_modules/@types/node": { + "version": "10.17.44", + "resolved": "https://registry.npmjs.org/@types/node/-/node-10.17.44.tgz", + "integrity": "sha512-vHPAyBX1ffLcy4fQHmDyIUMUb42gHZjPHU66nhvbMzAWJqHnySGZ6STwN3rwrnSd1FHB0DI/RWgGELgKSYRDmw==" + }, + "node_modules/long": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz", + "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==" + }, + "node_modules/moo": { + "version": "0.5.1", + "resolved": "https://registry.npmjs.org/moo/-/moo-0.5.1.tgz", + "integrity": "sha512-I1mnb5xn4fO80BH9BLcF0yLypy2UKl+Cb01Fu0hJRkJjlCRtxZMWkTdAtDd5ZqCOxtCkhmRwyI57vWT+1iZ67w==" + }, + "node_modules/protobufjs": { + "version": "6.8.8", + "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.8.8.tgz", + "integrity": "sha512-AAmHtD5pXgZfi7GMpllpO3q1Xw1OYldr+dMUlAnffGTAhqkg72WdmSY71uKBF/JuyiKs8psYbtKrhi0ASCD8qw==", + "hasInstallScript": true, + "dependencies": { + "@protobufjs/aspromise": "^1.1.2", + "@protobufjs/base64": "^1.1.2", + "@protobufjs/codegen": "^2.0.4", + "@protobufjs/eventemitter": "^1.1.0", + "@protobufjs/fetch": "^1.1.0", + "@protobufjs/float": "^1.0.2", + "@protobufjs/inquire": "^1.1.0", + "@protobufjs/path": "^1.1.2", + "@protobufjs/pool": "^1.1.0", + "@protobufjs/utf8": "^1.1.0", + "@types/long": "^4.0.0", + "@types/node": "^10.1.0", + "long": "^4.0.0" + }, + "bin": { + "pbjs": "bin/pbjs", + "pbts": "bin/pbts" + } + }, + "node_modules/semver": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", + "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==", + "bin": { + "semver": "bin/semver" + } + }, + "node_modules/tarjan-graph": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/tarjan-graph/-/tarjan-graph-2.0.0.tgz", + "integrity": "sha512-fDe57nO2Ukw2A/jHwVeiEgERGrGHukf3aHmR/YZ9BrveOtHVlFs289AnVeb1wD2aj9g01ZZ6f7VyMJ2QxI2NBQ==" + } + }, "dependencies": { "@dataform/core": { "version": "1.15.5", @@ -13,11 +145,6 @@ "tarjan-graph": "^2.0.0" } }, - "@dataform/sql": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/@dataform/sql/-/sql-0.2.0.tgz", - "integrity": "sha512-4blUV2+aZklI5wB5MWP+0ZZFlCfQWUPfTIPghGA+4oRfWtGmZTdo1y+7kx9yjms15E14B40Vax86wtn7sUzqSg==" - }, "@protobufjs/aspromise": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", diff --git a/package.json b/package.json index f74b1f2..8a754ba 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,6 @@ { "name": "dataform-segment", "dependencies": { - "@dataform/core": "1.15.5", - "@dataform/sql": "0.2.0" + "@dataform/core": "1.15.5" } } From a28ca8e04f2891d1729b5f25bff1d009db6c342f Mon Sep 17 00:00:00 2001 From: Elias Kassell Date: Wed, 4 Jan 2023 09:09:09 +0000 Subject: [PATCH 3/4] Add asString and verify --- dataform.json | 4 ++-- includes/sql.js | 14 +++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/dataform.json b/dataform.json index d96877b..2a0ff1f 100644 --- a/dataform.json +++ b/dataform.json @@ -1,6 +1,6 @@ { "defaultSchema": "segment_dataform_package", "assertionSchema": "segment_dataform_package", - "warehouse": "bigquery", + "warehouse": "redshift", "gcloudProjectId": "dataform-corp" -} \ No newline at end of file +} diff --git a/includes/sql.js b/includes/sql.js index a513758..e06cbc9 100644 --- a/includes/sql.js +++ b/includes/sql.js @@ -105,13 +105,21 @@ const windowFunction = ( } }; +const asString = (castableToString) => { + const dialect = getDialect(); + if (dialect === "postgres" || dialect === "redshift") { + return `cast(${castableToString} as varchar)`; + } + return `cast(${castableToString} as string)`; +}; + const surrogateKey = (columnNames) => { const dialect = getDialect(); - const columnsAsStrings = columnNames.map((id) => this.asString(id)).join(`,`); + const columnsAsStrings = columnNames.map((id) => asString(id)).join(`,`); if (dialect === "standard") { - return this.asString(`farm_fingerprint(concat(${columnsAsStrings}))`); + return asString(`farm_fingerprint(concat(${columnsAsStrings}))`); } - return this.asString(`md5(concat(${columnsAsStrings}))`); + return asString(`md5(concat(${columnsAsStrings}))`); }; module.exports = { From 9e78a3e594ef2a617867ed4f090df31c225473a8 Mon Sep 17 00:00:00 2001 From: Elias Kassell Date: Wed, 4 Jan 2023 11:12:11 +0000 Subject: [PATCH 4/4] Change default warehouse back to bigquery --- dataform.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataform.json b/dataform.json index 2a0ff1f..0ee66a3 100644 --- a/dataform.json +++ b/dataform.json @@ -1,6 +1,6 @@ { "defaultSchema": "segment_dataform_package", "assertionSchema": "segment_dataform_package", - "warehouse": "redshift", + "warehouse": "bigquery", "gcloudProjectId": "dataform-corp" }