-
Notifications
You must be signed in to change notification settings - Fork 1.9k
clickhouse dialect implementation #98
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
209 changes: 209 additions & 0 deletions
209
packages/cubejs-schema-compiler/adapter/ClickHouseQuery.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,209 @@ | ||
| const moment = require('moment-timezone'); | ||
| const R = require('ramda'); | ||
|
|
||
| const BaseQuery = require('./BaseQuery'); | ||
| const BaseFilter = require('./BaseFilter'); | ||
|
|
||
| const GRANULARITY_TO_INTERVAL = { | ||
| date: 'Day', | ||
| hour: 'Hour', | ||
| month: 'Month', | ||
| quarter: 'Quarter', | ||
| year: 'Year', | ||
| }; | ||
|
|
||
| class ClickHouseFilter extends BaseFilter { | ||
| likeIgnoreCase(column, not) { | ||
| return `lower(${column}) ${not ? 'NOT':''} LIKE CONCAT('%', lower(?), '%')`; | ||
| } | ||
| } | ||
|
|
||
|
|
||
| class ClickHouseQuery extends BaseQuery { | ||
|
|
||
| newFilter(filter) { | ||
| return new ClickHouseFilter(this, filter); | ||
| } | ||
|
|
||
| escapeColumnName(name) { | ||
| return `\`${name}\``; | ||
| } | ||
|
|
||
|
|
||
| convertTz(field) { | ||
| // | ||
| // field yields a Date or a DateTime so add in the extra toDateTime to support the Date case | ||
| // | ||
| // https://clickhouse.yandex/docs/en/data_types/datetime/ | ||
| // https://clickhouse.yandex/docs/en/query_language/functions/date_time_functions/ | ||
| // | ||
| // | ||
| return `toTimeZone(toDateTime(${field}), '${this.timezone}')`; | ||
| } | ||
|
|
||
| timeGroupedColumn(granularity, dimension) { | ||
| if (granularity == 'week') { | ||
| return `toDateTime(toMonday(${dimension}, '${this.timezone}'), '${this.timezone}')` | ||
| } | ||
| else { | ||
| let interval = GRANULARITY_TO_INTERVAL[granularity] | ||
| return `toDateTime(toStartOf${interval}(${dimension}, '${this.timezone}'), '${this.timezone}')`; | ||
| } | ||
| } | ||
|
|
||
| _calcInterval(operation, date, interval) { | ||
| const [intervalValue, intervalUnit] = interval.split(" "); | ||
| let fn = operation + intervalUnit[0].toUpperCase() + intervalUnit.substring(1) + "s" | ||
| return `${fn}(${date}, ${intervalValue})`; | ||
| } | ||
|
|
||
| subtractInterval(date, interval) { | ||
| return this._calcInterval("subtract", date, interval) | ||
| } | ||
|
|
||
| addInterval(date, interval) { | ||
| return this._calcInterval("add", date, interval) | ||
| } | ||
|
|
||
| timeStampCast(value) { | ||
| // value yields a string formatted in ISO8601, so this function returns a expression to parse a string to a DateTime | ||
|
|
||
| // | ||
| // ClickHouse provides toDateTime which expects dates in UTC in format YYYY-MM-DD HH:MM:SS | ||
| // | ||
| // However parseDateTimeBestEffort works with ISO8601 | ||
| // | ||
| return `parseDateTimeBestEffort(${value})` | ||
| } | ||
|
|
||
| dateTimeCast(value) { | ||
|
|
||
| // value yields a string formatted in ISO8601, so this function returns a expression to parse a string to a DateTime | ||
|
|
||
| // | ||
| // ClickHouse provides toDateTime which expects dates in UTC in format YYYY-MM-DD HH:MM:SS | ||
| // | ||
| // However parseDateTimeBestEffort works with ISO8601 | ||
| // | ||
| return `parseDateTimeBestEffort(${value})` | ||
| } | ||
|
|
||
| getFieldAlias(id) { | ||
| const equalIgnoreCase = (a, b) => ( | ||
| typeof a === 'string' && typeof b === 'string' | ||
| && a.toUpperCase() === b.toUpperCase() | ||
| ); | ||
|
|
||
| let field; | ||
|
|
||
| field = this.dimensionsForSelect().find(d => | ||
| equalIgnoreCase(d.dimension, id) | ||
| ); | ||
|
|
||
| if (field) { | ||
| return field.aliasName(); | ||
| } | ||
|
|
||
| field = this.measures.find(d => | ||
| equalIgnoreCase(d.measure, id) || equalIgnoreCase(d.expressionName, id) | ||
| ); | ||
|
|
||
| if (field) { | ||
| return field.aliasName(); | ||
| } | ||
|
|
||
| return null; | ||
| } | ||
|
|
||
| orderHashToString(hash) { | ||
|
|
||
| // | ||
| // ClickHouse doesn't support order by index column, so map these to the alias names | ||
| // | ||
|
|
||
| if (!hash || !hash.id) { | ||
| return null; | ||
| } | ||
|
|
||
| const fieldAlias = this.getFieldAlias(hash.id); | ||
|
|
||
| if (fieldAlias === null) { | ||
| return null; | ||
| } | ||
|
|
||
| const direction = hash.desc ? 'DESC' : 'ASC'; | ||
| return `${fieldAlias} ${direction}`; | ||
| } | ||
|
|
||
| groupByClause() { | ||
|
|
||
| // | ||
| // ClickHouse doesn't support group by index column, so map these to the alias names | ||
| // | ||
|
|
||
| const names = this.dimensionAliasNames(); | ||
| return names.length ? ` GROUP BY ${names.join(', ')}` : ''; | ||
| } | ||
|
|
||
|
|
||
| primaryKeyCount(cubeName, distinct) { | ||
| const primaryKeySql = this.primaryKeySql(this.cubeEvaluator.primaryKeys[cubeName], cubeName); | ||
| if (distinct) { | ||
| return `uniqExact(${primaryKeySql})` | ||
| } | ||
| else { | ||
| return `count(${primaryKeySql})` | ||
| } | ||
| } | ||
|
|
||
|
|
||
| seriesSql(timeDimension) { | ||
| /* | ||
| postgres uses : | ||
|
|
||
| SELECT parseDateTimeBestEffort(date_from), parseDateTimeBestEffort(date_to) FROM | ||
| ( | ||
| VALUES | ||
| ('2017-01-01T00:00:00.000', '2017-01-01T23:59:59.999'), | ||
| ('2017-01-02T00:00:00.000', '2017-01-02T23:59:59.999'), | ||
| ('2017-01-03T00:00:00.000', '2017-01-03T23:59:59.999'), | ||
| ('2017-01-04T00:00:00.000', '2017-01-04T23:59:59.999'), | ||
| ('2017-01-05T00:00:00.000', '2017-01-05T23:59:59.999'), | ||
| ('2017-01-06T00:00:00.000', '2017-01-06T23:59:59.999'), | ||
| ('2017-01-07T00:00:00.000', '2017-01-07T23:59:59.999'), | ||
| ('2017-01-08T00:00:00.000', '2017-01-08T23:59:59.999'), | ||
| ('2017-01-09T00:00:00.000', '2017-01-09T23:59:59.999'), | ||
| ('2017-01-10T00:00:00.000', '2017-01-10T23:59:59.999') | ||
| ) AS dates (date_from, date_to) | ||
| ) AS `visitors.created_at_series` | ||
|
|
||
| */ | ||
| /* | ||
|
|
||
| ClickHouse uses : | ||
|
|
||
| select | ||
| parseDateTimeBestEffort(arrayJoin(['2017-01-01T00:00:00.000','2017-01-02T00:00:00.000'])) as date_from, | ||
| parseDateTimeBestEffort(arrayJoin(['2017-01-01T23:59:59.999','2017-01-02T23:59:59.999'])) as date_to | ||
| ... | ||
| ) | ||
| */ | ||
|
|
||
| let dates_from = [] | ||
| let dates_to = [] | ||
| timeDimension.timeSeries().forEach(([from, to]) => { | ||
| dates_from.push(from) | ||
| dates_to.push(to) | ||
| } | ||
|
|
||
| ); | ||
| return `SELECT parseDateTimeBestEffort(arrayJoin(['${dates_from.join("','")}'])) as date_from, parseDateTimeBestEffort(arrayJoin(['${dates_to.join("','")}'])) as date_to`; | ||
| } | ||
|
|
||
| concatStringsSql(strings) { | ||
| return "toString(" + strings.join(") || toString(") + ")"; | ||
| } | ||
|
|
||
| } | ||
|
|
||
| module.exports = ClickHouseQuery; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| version: "3" | ||
|
|
||
| services: | ||
|
|
||
| clickhouse-server: | ||
| image: yandex/clickhouse-server | ||
| ports: | ||
| - "18123:8123" | ||
|
|
||
| postgresql-server: | ||
| image: postgres:11.2 | ||
| ports: | ||
| - "5432:5432" | ||
| environment: | ||
| POSTGRES_DB: model_test | ||
| POSTGRES_PASSWORD: test | ||
| POSTGRES_USER: test |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why numbers have to be converted to strings?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lots of the cubejs-schema-compiler tests that deal with measures assert that they are strings.
examples
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe that this is due to the fact that the range of values that the javascript number type supports can overflow. Clickhouse caters for this by returning strings for UInt64,Int64 already. I assume postgres does the same, and assumed that this was the protocol for cube.js
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As I understand this is only an issue with
Int64andUInt64data types. How is it handled for Postgres which also has support for 64-bit numbers?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@paveltiunov what should we be doing here with numbers vs strings ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We probably have some variations from driver to driver but as a rule of thumb all numeric types should be returned as strings due to JS can't handle correctly big integers and decimal numbers at all.
Also regarding Timestamp format: driver should always return only local dates in
YYYY-DD-MMTHH:MM:SS.sssformat as all conversions are done in SQL. There's only one case driver can return UTC: it's UTC timestamp representation in cube.js server local timezone. Otherwise there should be no 'Z' at the end.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like something to be added to driver development guidelines :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@inikolaev Yep. Definitely worth adding it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Re timestamps - I am confused as for example
The following test uses timezone America/Los_Angeles
https://github.com/statsbotco/cube.js/blob/bde5ea1dc493f4af599c6fcdcaecbcd399dfcebd/packages/cubejs-schema-compiler/test/GraphBuilderTest.js#L302
However the timestamp asserted is UTC (including a Z)
https://github.com/statsbotco/cube.js/blob/bde5ea1dc493f4af599c6fcdcaecbcd399dfcebd/packages/cubejs-schema-compiler/test/GraphBuilderTest.js#L315
Actually - Every date in that test is formatted the same.
Is there an example
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cameronbraid Yep. It's confusing. The thing is Postgres is that exception from the rule: it provides UTC timestamp representation in cube.js server local timezone. Actually it causing problems so we're considering to fix this in the Postgres driver itself.