diff --git a/src/parser.ts b/src/parser.ts index 06e9d61..db5acfc 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -57,7 +57,14 @@ export const EXECUTION_TYPES: Record = { ANON_BLOCK: 'ANON_BLOCK', }; -const statementsWithEnds = ['CREATE_TRIGGER', 'CREATE_FUNCTION', 'CREATE_PROCEDURE', 'ANON_BLOCK']; +const statementsWithEnds = [ + 'CREATE_TRIGGER', + 'CREATE_FUNCTION', + 'CREATE_PROCEDURE', + 'ANON_BLOCK', + 'UNKNOWN', +]; + const blockOpeners: Record = { generic: ['BEGIN', 'CASE'], psql: ['BEGIN', 'CASE', 'LOOP', 'IF'], @@ -65,7 +72,7 @@ const blockOpeners: Record = { mssql: ['BEGIN', 'CASE'], sqlite: ['BEGIN', 'CASE'], oracle: ['DECLARE', 'BEGIN', 'CASE'], - bigquery: ['BEGIN', 'CASE'], + bigquery: ['BEGIN', 'CASE', 'IF', 'LOOP', 'REPEAT', 'WHILE', 'FOR'], }; interface ParseOptions { @@ -194,7 +201,7 @@ export function parse(input: string, isStrict = true, dialect: Dialect = 'generi continue; } - statementParser = createStatementParserByToken(token, { isStrict, dialect }); + statementParser = createStatementParserByToken(token, nextToken, { isStrict, dialect }); if (cteState.isCte) { statementParser.getStatement().start = cteState.state.start; cteState.isCte = false; @@ -247,7 +254,11 @@ function initState({ input, prevState }: { input?: string; prevState?: State }): }; } -function createStatementParserByToken(token: Token, options: ParseOptions): StatementParser { +function createStatementParserByToken( + token: Token, + nextToken: Token, + options: ParseOptions, +): StatementParser { if (token.type === 'keyword') { switch (token.value.toUpperCase()) { case 'SELECT': @@ -266,12 +277,16 @@ function createStatementParserByToken(token: Token, options: ParseOptions): Stat return createDeleteStatementParser(options); case 'TRUNCATE': return createTruncateStatementParser(options); - case 'DECLARE': case 'BEGIN': + if (['bigquery', 'oracle'].includes(options.dialect) && nextToken.value !== 'TRANSACTION') { + return createBlockStatementParser(options); + } + break; + case 'DECLARE': if (options.dialect === 'oracle') { return createBlockStatementParser(options); } - // eslint-disable-next-line no-fallthrough + break; default: break; } @@ -317,7 +332,7 @@ function createBlockStatementParser(options: ParseOptions) { preCanGoToNext: () => false, validation: { acceptTokens: [ - { type: 'keyword', value: 'DECLARE' }, + ...(options.dialect === 'oracle' ? [{ type: 'keyword', value: 'DECLARE' }] : []), { type: 'keyword', value: 'BEGIN' }, ], }, @@ -635,7 +650,8 @@ function stateMachineStatementParser( if ( statement.type && token.type === 'semicolon' && - (!statementsWithEnds.includes(statement.type) || (openBlocks === 0 && statement.canEnd)) + (!statementsWithEnds.includes(statement.type) || + (openBlocks === 0 && (statement.type === 'UNKNOWN' || statement.canEnd))) ) { statement.endStatement = ';'; return; @@ -667,7 +683,7 @@ function stateMachineStatementParser( !['DEFERRED', 'IMMEDIATE', 'EXCLUSIVE'].includes(nextToken.value.toUpperCase()))))) ) { if ( - ['oracle', 'bigquery'].includes(dialect) && + dialect === 'oracle' && lastBlockOpener?.value === 'DECLARE' && token.value.toUpperCase() === 'BEGIN' ) { @@ -681,8 +697,7 @@ function stateMachineStatementParser( setPrevToken(token); if (statement.type === 'ANON_BLOCK' && !anonBlockStarted) { anonBlockStarted = true; - // don't return - } else { + } else if (statement.type) { return; } } @@ -697,6 +712,7 @@ function stateMachineStatementParser( if (statement.type && statement.start >= 0) { // statement has already been identified // just wait until end of the statement + setPrevToken(token); return; } diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 4f17a9c..cf5fbff 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -28,6 +28,11 @@ const KEYWORDS = [ 'BEGIN', 'DECLARE', 'CASE', + 'LOOP', + 'IF', + 'REPEAT', + 'WHILE', + 'FOR', 'PROCEDURE', ]; diff --git a/test/parser/bigquery.spec.ts b/test/parser/bigquery.spec.ts new file mode 100644 index 0000000..d1af5a5 --- /dev/null +++ b/test/parser/bigquery.spec.ts @@ -0,0 +1,79 @@ +import { parse } from '../../src/parser'; +import { expect } from 'chai'; + +describe('Parser for bigquery', () => { + // all testcases are taken straight from bigquery docs on procedural language + // see https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language + describe('control structures', () => { + [ + `CASE + WHEN + EXISTS(SELECT 1 FROM schema.products_a WHERE product_id = target_product_id) + THEN SELECT 'found product in products_a table'; + WHEN + EXISTS(SELECT 1 FROM schema.products_b WHERE product_id = target_product_id) + THEN SELECT 'found product in products_b table'; + ELSE + SELECT 'did not find product'; + END CASE;`, + `IF EXISTS(SELECT 1 FROM schema.products + WHERE product_id = target_product_id) THEN + SELECT CONCAT('found product ', CAST(target_product_id AS STRING)); + ELSEIF EXISTS(SELECT 1 FROM schema.more_products + WHERE product_id = target_product_id) THEN + SELECT CONCAT('found product from more_products table', + CAST(target_product_id AS STRING)); + ELSE + SELECT CONCAT('did not find product ', CAST(target_product_id AS STRING)); + END IF;`, + `LOOP + SET x = x + 1; + IF x >= 10 THEN + LEAVE; + END IF; + END LOOP;`, + `REPEAT + SET x = x + 1; + SELECT x; + UNTIL x >= 3 + END REPEAT;`, + `WHILE x < 0 DO + SET x = x + 1; + SELECT x; + END WHILE;`, + `FOR record IN + (SELECT word, word_count + FROM bigquery-public-data.samples.shakespeare + LIMIT 5) + DO + SELECT record.word, record.word_count; + END FOR;`, + ].forEach((sql) => { + it(`parses ${sql.substring( + 0, + Math.min(sql.indexOf(' '), sql.indexOf('\n')), + )} structure`, () => { + const result = parse(`${sql}\nSELECT 1;`, false, 'bigquery'); + expect(result.body.length).to.eql(2); + expect(sql.substring(result.body[0].start, result.body[0].end + 1)).to.eql(sql); + expect(result.body[0].type).to.eql('UNKNOWN'); + expect(result.body[1].type).to.eql('SELECT'); + }); + }); + }); + + it('parses BEGIN statement as ANON_BLOCK', () => { + const result = parse(`BEGIN SELECT 1; END; SELECT 1;`, false, 'bigquery'); + expect(result.body.length).to.eql(2); + expect(result.body[0].type).to.eql('ANON_BLOCK'); + expect(result.body[1].type).to.eql('SELECT'); + }); + + it('parses BEGIN TRANSACTION as UNKNOWN', () => { + const result = parse(`BEGIN TRANSACTION; SELECT 1; COMMIT;`, false, 'bigquery'); + expect(result.body.length).to.eql(3); + expect(result.body[0].type).to.eql('UNKNOWN'); + expect(result.body[1].type).to.eql('SELECT'); + expect(result.body[2].type).to.eql('UNKNOWN'); + }); +});