diff --git a/README.md b/README.md index d8e26ac..3fbda46 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ This way you have sure is a valid query before trying to identify the types. * CREATE_TRIGGER * CREATE_FUNCTION * CREATE_INDEX +* CREATE_PROCEDURE * DROP_DATABASE * DROP_SCHEMA * DROP_TABLE @@ -48,6 +49,7 @@ This way you have sure is a valid query before trying to identify the types. * DROP_TRIGGER * DROP_FUNCTION * DROP_INDEX +* DROP_PROCEDURE * ALTER_DATABASE * ALTER_SCHEMA * ALTER_TABLE @@ -55,6 +57,7 @@ This way you have sure is a valid query before trying to identify the types. * ALTER_TRIGGER * ALTER_FUNCTION * ALTER_INDEX +* ALTER_PROCEDURE * ANON_BLOCK (Oracle Database only) * UNKNOWN (only available if strict mode is disabled) diff --git a/src/defines.ts b/src/defines.ts index 558f12f..2b45648 100644 --- a/src/defines.ts +++ b/src/defines.ts @@ -1,4 +1,12 @@ -export const DIALECTS = ['mssql', 'sqlite', 'mysql', 'oracle', 'psql', 'generic'] as const; +export const DIALECTS = [ + 'mssql', + 'sqlite', + 'mysql', + 'oracle', + 'psql', + 'bigquery', + 'generic', +] as const; export type Dialect = typeof DIALECTS[number]; export type StatementType = | 'INSERT' @@ -13,6 +21,7 @@ export type StatementType = | 'CREATE_TRIGGER' | 'CREATE_FUNCTION' | 'CREATE_INDEX' + | 'CREATE_PROCEDURE' | 'DROP_DATABASE' | 'DROP_SCHEMA' | 'DROP_TABLE' @@ -20,6 +29,7 @@ export type StatementType = | 'DROP_TRIGGER' | 'DROP_FUNCTION' | 'DROP_INDEX' + | 'DROP_PROCEDURE' | 'ALTER_DATABASE' | 'ALTER_SCHEMA' | 'ALTER_TABLE' @@ -27,6 +37,7 @@ export type StatementType = | 'ALTER_TRIGGER' | 'ALTER_FUNCTION' | 'ALTER_INDEX' + | 'ALTER_PROCEDURE' | 'ANON_BLOCK' | 'UNKNOWN'; diff --git a/src/parser.ts b/src/parser.ts index d8a6d3a..dd29a12 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -36,6 +36,7 @@ export const EXECUTION_TYPES: Record = { CREATE_TRIGGER: 'MODIFICATION', CREATE_FUNCTION: 'MODIFICATION', CREATE_INDEX: 'MODIFICATION', + CREATE_PROCEDURE: 'MODIFICATION', DROP_DATABASE: 'MODIFICATION', DROP_SCHEMA: 'MODIFICATION', DROP_TABLE: 'MODIFICATION', @@ -43,6 +44,7 @@ export const EXECUTION_TYPES: Record = { DROP_TRIGGER: 'MODIFICATION', DROP_FUNCTION: 'MODIFICATION', DROP_INDEX: 'MODIFICATION', + DROP_PROCEDURE: 'MODIFICATION', ALTER_DATABASE: 'MODIFICATION', ALTER_SCHEMA: 'MODIFICATION', ALTER_TABLE: 'MODIFICATION', @@ -50,11 +52,12 @@ export const EXECUTION_TYPES: Record = { ALTER_TRIGGER: 'MODIFICATION', ALTER_FUNCTION: 'MODIFICATION', ALTER_INDEX: 'MODIFICATION', + ALTER_PROCEDURE: 'MODIFICATION', UNKNOWN: 'UNKNOWN', ANON_BLOCK: 'ANON_BLOCK', }; -const statementsWithEnds = ['CREATE_TRIGGER', 'CREATE_FUNCTION', 'ANON_BLOCK']; +const statementsWithEnds = ['CREATE_TRIGGER', 'CREATE_FUNCTION', 'CREATE_PROCEDURE', 'ANON_BLOCK']; const blockOpeners: Record = { generic: ['BEGIN', 'CASE'], psql: ['BEGIN', 'CASE', 'LOOP', 'IF'], @@ -62,6 +65,7 @@ const blockOpeners: Record = { mssql: ['BEGIN', 'CASE'], sqlite: ['BEGIN', 'CASE'], oracle: ['DECLARE', 'BEGIN', 'CASE'], + bigquery: ['DECLARE', 'BEGIN', 'CASE'], }; interface ParseOptions { @@ -415,6 +419,7 @@ function createCreateStatementParser(options: ParseOptions) { ? [ { type: 'keyword', value: 'DATABASE' }, { type: 'keyword', value: 'SCHEMA' }, + { type: 'keyword', value: 'PROCEDURE' }, ] : []), { type: 'keyword', value: 'TABLE' }, @@ -461,6 +466,7 @@ function createDropStatementParser(options: ParseOptions) { ? [ { type: 'keyword', value: 'DATABASE' }, { type: 'keyword', value: 'SCHEMA' }, + { type: 'keyword', value: 'PROCEDURE' }, ] : []), { type: 'keyword', value: 'TABLE' }, @@ -508,6 +514,9 @@ function createAlterStatementParser(options: ParseOptions) { { type: 'keyword', value: 'TRIGGER' }, { type: 'keyword', value: 'FUNCTION' }, { type: 'keyword', value: 'INDEX' }, + ...(options.dialect !== 'bigquery' + ? [{ type: 'keyword', value: 'PROCEDURE' }] + : []), ] : []), { type: 'keyword', value: 'TABLE' }, @@ -571,10 +580,11 @@ function stateMachineStatementParser( { isStrict, dialect }: ParseOptions, ): StatementParser { let currentStepIndex = 0; - let prevToken: Token; - let prevPrevToken: Token; + let prevToken: Token | undefined; + let prevPrevToken: Token | undefined; + let prevNonWhitespaceToken: Token | undefined; - let lastBlockOpener: Token; + let lastBlockOpener: Token | undefined; let anonBlockStarted = false; let openBlocks = 0; @@ -598,6 +608,9 @@ function stateMachineStatementParser( const setPrevToken = (token: Token) => { prevPrevToken = prevToken; prevToken = token; + if (token.type !== 'whitespace') { + prevNonWhitespaceToken = token; + } }; return { @@ -640,7 +653,7 @@ function stateMachineStatementParser( prevPrevToken?.value.toUpperCase() !== 'END' ) { if ( - dialect === 'oracle' && + ['oracle', 'bigquery'].includes(dialect) && lastBlockOpener?.value === 'DECLARE' && token.value.toUpperCase() === 'BEGIN' ) { @@ -683,16 +696,25 @@ function stateMachineStatementParser( return; } - if (['psql', 'mssql'].includes(dialect) && token.value.toUpperCase() === 'MATERIALIZED') { + if ( + ['psql', 'mssql', 'bigquery'].includes(dialect) && + token.value.toUpperCase() === 'MATERIALIZED' + ) { setPrevToken(token); return; } - // psql allows for optional "OR REPLACE" between "CREATE" and "FUNCTION" - // mysql and psql allow it between "CREATE" and "VIEW" + // technically these dialects don't allow "OR REPLACE" or "OR ALTER" between all statement + // types, but we'll allow it for now. + // For "ALTER", we need to make sure we only catch it here if it directly follows "OR", so + // we don't catch it for "ALTER TABLE" statements if ( - ['psql', 'mysql'].includes(dialect) && - ['OR', 'REPLACE'].includes(token.value.toUpperCase()) + (['psql', 'mysql', 'bigquery'].includes(dialect) && + ['OR', 'REPLACE'].includes(token.value.toUpperCase())) || + (dialect === 'mssql' && + (token.value.toUpperCase() === 'OR' || + (prevNonWhitespaceToken?.value.toUpperCase() === 'OR' && + token.value.toUpperCase() === 'ALTER'))) ) { setPrevToken(token); return; @@ -721,13 +743,13 @@ function stateMachineStatementParser( } if (statement.definer !== undefined && statement.definer > 0) { - if (statement.definer === 1 && prevToken.type === 'whitespace') { + if (statement.definer === 1 && prevToken?.type === 'whitespace') { statement.definer++; setPrevToken(token); return; } - if (statement.definer > 1 && prevToken.type !== 'whitespace') { + if (statement.definer > 1 && prevToken?.type !== 'whitespace') { setPrevToken(token); return; } @@ -748,7 +770,7 @@ function stateMachineStatementParser( } if (statement.algorithm !== undefined && statement.algorithm > 0) { - if (statement.algorithm === 1 && prevToken.type === 'whitespace') { + if (statement.algorithm === 1 && prevToken?.type === 'whitespace') { statement.algorithm++; setPrevToken(token); return; @@ -756,6 +778,7 @@ function stateMachineStatementParser( if ( statement.algorithm > 1 && + prevToken && ['UNDEFINED', 'MERGE', 'TEMPTABLE'].includes(prevToken.value.toUpperCase()) ) { setPrevToken(token); @@ -792,6 +815,7 @@ function stateMachineStatementParser( } if ( + prevToken && currentStep.validation && currentStep.validation.requireBefore && !currentStep.validation.requireBefore.includes(prevToken.type) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 7b94977..4f17a9c 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -28,6 +28,7 @@ const KEYWORDS = [ 'BEGIN', 'DECLARE', 'CASE', + 'PROCEDURE', ]; const INDIVIDUALS: Record = { diff --git a/test/identifier/single-statement.spec.ts b/test/identifier/single-statement.spec.ts index ae8941f..fc45c77 100644 --- a/test/identifier/single-statement.spec.ts +++ b/test/identifier/single-statement.spec.ts @@ -130,7 +130,7 @@ describe('identifier', () => { describe('identifying "CREATE MATERIALIZED VIEW" statement', () => { const query = "CREATE MATERIALIZED VIEW vista AS SELECT 'Hello World';"; - (['psql', 'mssql'] as Dialect[]).forEach((dialect) => { + (['bigquery', 'psql', 'mssql'] as Dialect[]).forEach((dialect) => { it(`should identify for ${dialect}`, () => { const actual = identify(query, { dialect }); const expected = [ @@ -159,7 +159,7 @@ describe('identifier', () => { describe('identify "CREATE OR REPLACE VIEW" statement', () => { const query = "CREATE OR REPLACE VIEW vista AS SELECT 'Hello world';"; - (['mysql', 'psql'] as Dialect[]).forEach((dialect) => { + (['bigquery', 'mysql', 'psql'] as Dialect[]).forEach((dialect) => { it(`should identify for ${dialect}`, () => { const actual = identify(query, { dialect }); const expected = [ @@ -177,13 +177,19 @@ describe('identifier', () => { }); }); - (['generic', 'sqlite', 'mssql'] as Dialect[]).forEach((dialect) => { + (['generic', 'sqlite'] as Dialect[]).forEach((dialect) => { it(`should throw error for ${dialect}`, () => { expect(() => identify(query, { dialect })).to.throw( /^Expected any of these tokens .* instead of type="unknown" value="OR" \(currentStep=1\)/, ); }); }); + + it(`should throw error for mssql`, () => { + expect(() => identify(query, { dialect: 'mssql' })).to.throw( + /^Expected any of these tokens .* instead of type="unknown" value="REPLACE" \(currentStep=1\)/, + ); + }); }); ['TEMP', 'TEMPORARY'].forEach((temp) => { @@ -207,7 +213,7 @@ describe('identifier', () => { }); }); - (['generic', 'mysql', 'mssql'] as Dialect[]).forEach((dialect) => { + (['generic', 'mysql', 'mssql', 'bigquery', 'oracle'] as Dialect[]).forEach((dialect) => { it(`should throw error for ${dialect}`, () => { const regex = new RegExp( `Expected any of these tokens .* instead of type="unknown" value="${temp}" \\(currentStep=1\\)`, @@ -357,6 +363,165 @@ describe('identifier', () => { }); }); + describe('identity PROCEDURE statements', () => { + describe('identify "CREATE PROCEDURE" statements', () => { + (['bigquery', 'generic', 'mssql', 'mysql', 'oracle', 'psql'] as Dialect[]).forEach( + (dialect) => { + it(`should identify statement for ${dialect}`, () => { + const sql = `CREATE PROCEDURE mydataset.create_customer() + BEGIN + DECLARE id STRING; + SET id = GENERATE_UUID(); + INSERT INTO mydataset.customers (customer_id) + VALUES(id); + SELECT FORMAT("Created customer %s", id); + END`; + + const actual = identify(sql, { dialect }); + const expected = [ + { + start: 0, + end: 308, + text: sql, + type: 'CREATE_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); + }, + ); + + (['bigquery', 'mysql', 'psql'] as Dialect[]).forEach((dialect) => { + it(`should identify statement with "OR REPLACE" for ${dialect}`, () => { + const sql = `CREATE OR REPLACE PROCEDURE mydataset.create_customer() + BEGIN + DECLARE id STRING; + SET id = GENERATE_UUID(); + INSERT INTO mydataset.customers (customer_id) + VALUES(id); + SELECT FORMAT("Created customer %s", id); + END`; + + const actual = identify(sql, { dialect }); + const expected = [ + { + start: 0, + end: 305, + text: sql, + type: 'CREATE_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); + }); + + it('should identify statement with "OR ALTER" for mssql', () => { + const sql = `CREATE OR ALTER PROCEDURE mydataset.create_customer() + BEGIN + DECLARE id STRING; + SET id = GENERATE_UUID(); + INSERT INTO mydataset.customers (customer_id) + VALUES(id); + SELECT FORMAT("Created customer %s", id); + END`; + + const actual = identify(sql, { dialect: 'mssql' }); + const expected = [ + { + start: 0, + end: 289, + text: sql, + type: 'CREATE_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); + + it('should error for sqlite', () => { + const sql = `CREATE PROCEDURE mydataset.create_customer() + BEGIN + DECLARE id STRING; + SET id = GENERATE_UUID(); + INSERT INTO mydataset.customers (customer_id) + VALUES(id); + SELECT FORMAT("Created customer %s", id); + END`; + expect(() => identify(sql, { dialect: 'sqlite' })).to.throw( + 'Expected any of these tokens (type="keyword" value="TABLE") or (type="keyword" value="VIEW") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") instead of type="keyword" value="PROCEDURE" (currentStep=1)', + ); + }); + }); + + describe('identify "DROP PROCEDURE" statements', () => { + (['bigquery', 'generic', 'mssql', 'mysql', 'oracle', 'psql'] as Dialect[]).forEach( + (dialect) => { + it(`should identify the statement for ${dialect}`, () => { + const sql = `DROP PROCEDURE mydataset.create_customer`; + + const actual = identify(sql, { dialect }); + const expected = [ + { + start: 0, + end: 39, + text: sql, + type: 'DROP_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); + }, + ); + + it('should error for sqlite', () => { + const sql = `DROP PROCEDURE mydataset.create_customer`; + expect(() => identify(sql, { dialect: 'sqlite' })).to.throw( + 'Expected any of these tokens (type="keyword" value="TABLE") or (type="keyword" value="VIEW") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") instead of type="keyword" value="PROCEDURE" (currentStep=1)', + ); + }); + }); + + describe('identify "ALTER PROCEDURE" statements', () => { + const sql = `ALTER PROCEDURE mydataset.create_customer`; + (['generic', 'mssql', 'mysql', 'oracle', 'psql'] as Dialect[]).forEach((dialect) => { + it('should identify "ALTER PROCEDURE" statement', () => { + const actual = identify(sql, { dialect }); + const expected = [ + { + start: 0, + end: 40, + text: sql, + type: 'ALTER_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); + }); + + it('should throw error for bigquery', () => { + expect(() => identify(sql, { dialect: 'bigquery' })).to.throw( + `Expected any of these tokens (type="keyword" value="DATABASE") or (type="keyword" value="SCHEMA") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") or (type="keyword" value="TABLE") or (type="keyword" value="VIEW") instead of type="keyword" value="PROCEDURE`, + ); + }); + + it('should error for sqlite', () => { + const sql = `DROP PROCEDURE mydataset.create_customer`; + expect(() => identify(sql, { dialect: 'sqlite' })).to.throw( + 'Expected any of these tokens (type="keyword" value="TABLE") or (type="keyword" value="VIEW") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") instead of type="keyword" value="PROCEDURE" (currentStep=1)', + ); + }); + }); + }); + describe('identify "CREATE FUNCTION" statements', () => { it('should identify postgres "CREATE FUNCTION" statement with LANGUAGE at end', () => { const sql = `CREATE FUNCTION quarterly_summary_func(start_date date DEFAULT CURRENT_TIMESTAMP) diff --git a/test/index.spec.ts b/test/index.spec.ts index 5ef8026..2e4ba1e 100644 --- a/test/index.spec.ts +++ b/test/index.spec.ts @@ -4,7 +4,7 @@ import { expect } from 'chai'; describe('identify', () => { it('should throw error for invalid dialect', () => { expect(() => identify('SELECT * FROM foo', { dialect: 'invalid' as Dialect })).to.throw( - 'Unknown dialect. Allowed values: mssql, sqlite, mysql, oracle, psql, generic', + 'Unknown dialect. Allowed values: mssql, sqlite, mysql, oracle, psql, bigquery, generic', ); });