From 7d6c7d08aa74783466731c141923128d79144023 Mon Sep 17 00:00:00 2001 From: Nicolas Garnil Date: Mon, 1 Aug 2022 14:41:37 +0200 Subject: [PATCH 01/10] Add bigquery dialect --- README.md | 1 + src/defines.ts | 11 +++++++++- src/parser.ts | 12 ++++++---- src/tokenizer.ts | 1 + test/identifier/single-statement.spec.ts | 28 +++++++++++++++++++++++- test/index.spec.ts | 2 +- 6 files changed, 48 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index d8e26ac..0df7bba 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ This way you have sure is a valid query before trying to identify the types. * CREATE_TRIGGER * CREATE_FUNCTION * CREATE_INDEX +* CREATE_PROCEDURE * DROP_DATABASE * DROP_SCHEMA * DROP_TABLE diff --git a/src/defines.ts b/src/defines.ts index 558f12f..e141da6 100644 --- a/src/defines.ts +++ b/src/defines.ts @@ -1,4 +1,12 @@ -export const DIALECTS = ['mssql', 'sqlite', 'mysql', 'oracle', 'psql', 'generic'] as const; +export const DIALECTS = [ + 'mssql', + 'sqlite', + 'mysql', + 'oracle', + 'psql', + 'bigquery', + 'generic', +] as const; export type Dialect = typeof DIALECTS[number]; export type StatementType = | 'INSERT' @@ -13,6 +21,7 @@ export type StatementType = | 'CREATE_TRIGGER' | 'CREATE_FUNCTION' | 'CREATE_INDEX' + | 'CREATE_PROCEDURE' | 'DROP_DATABASE' | 'DROP_SCHEMA' | 'DROP_TABLE' diff --git a/src/parser.ts b/src/parser.ts index d8a6d3a..fc57c43 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -36,6 +36,7 @@ export const EXECUTION_TYPES: Record = { CREATE_TRIGGER: 'MODIFICATION', CREATE_FUNCTION: 'MODIFICATION', CREATE_INDEX: 'MODIFICATION', + CREATE_PROCEDURE: 'MODIFICATION', DROP_DATABASE: 'MODIFICATION', DROP_SCHEMA: 'MODIFICATION', DROP_TABLE: 'MODIFICATION', @@ -54,7 +55,7 @@ export const EXECUTION_TYPES: Record = { ANON_BLOCK: 'ANON_BLOCK', }; -const statementsWithEnds = ['CREATE_TRIGGER', 'CREATE_FUNCTION', 'ANON_BLOCK']; +const statementsWithEnds = ['CREATE_TRIGGER', 'CREATE_FUNCTION', 'CREATE_PROCEDURE', 'ANON_BLOCK']; const blockOpeners: Record = { generic: ['BEGIN', 'CASE'], psql: ['BEGIN', 'CASE', 'LOOP', 'IF'], @@ -62,6 +63,7 @@ const blockOpeners: Record = { mssql: ['BEGIN', 'CASE'], sqlite: ['BEGIN', 'CASE'], oracle: ['DECLARE', 'BEGIN', 'CASE'], + bigquery: ['DECLARE', 'BEGIN', 'CASE'], }; interface ParseOptions { @@ -254,7 +256,7 @@ function createStatementParserByToken(token: Token, options: ParseOptions): Stat return createTruncateStatementParser(options); case 'DECLARE': case 'BEGIN': - if (options.dialect === 'oracle') { + if (['oracle', 'bigquery'].includes(options.dialect)) { return createBlockStatementParser(options); } // eslint-disable-next-line no-fallthrough @@ -422,6 +424,7 @@ function createCreateStatementParser(options: ParseOptions) { { type: 'keyword', value: 'TRIGGER' }, { type: 'keyword', value: 'FUNCTION' }, { type: 'keyword', value: 'INDEX' }, + { type: 'keyword', value: 'PROCEDURE' }, ], }, add: (token) => { @@ -508,6 +511,7 @@ function createAlterStatementParser(options: ParseOptions) { { type: 'keyword', value: 'TRIGGER' }, { type: 'keyword', value: 'FUNCTION' }, { type: 'keyword', value: 'INDEX' }, + { type: 'keyword', value: 'PROCEDURE' }, ] : []), { type: 'keyword', value: 'TABLE' }, @@ -640,7 +644,7 @@ function stateMachineStatementParser( prevPrevToken?.value.toUpperCase() !== 'END' ) { if ( - dialect === 'oracle' && + ['oracle', 'bigquery'].includes(dialect) && lastBlockOpener?.value === 'DECLARE' && token.value.toUpperCase() === 'BEGIN' ) { @@ -691,7 +695,7 @@ function stateMachineStatementParser( // psql allows for optional "OR REPLACE" between "CREATE" and "FUNCTION" // mysql and psql allow it between "CREATE" and "VIEW" if ( - ['psql', 'mysql'].includes(dialect) && + ['psql', 'mysql', 'bigquery'].includes(dialect) && ['OR', 'REPLACE'].includes(token.value.toUpperCase()) ) { setPrevToken(token); diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 79dc64e..f44a2bd 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -28,6 +28,7 @@ const KEYWORDS = [ 'BEGIN', 'DECLARE', 'CASE', + 'PROCEDURE', ]; const INDIVIDUALS: Record = { diff --git a/test/identifier/single-statement.spec.ts b/test/identifier/single-statement.spec.ts index d292a3f..8c5f822 100644 --- a/test/identifier/single-statement.spec.ts +++ b/test/identifier/single-statement.spec.ts @@ -89,7 +89,7 @@ describe('identifier', () => { it('should throw error for sqlite', () => { expect(() => identify(sql, { dialect: 'sqlite' })).to.throw( - `Expected any of these tokens (type="keyword" value="TABLE") or (type="keyword" value="VIEW") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") instead of type="keyword" value="${type}" (currentStep=1)`, + `Expected any of these tokens (type="keyword" value="TABLE") or (type="keyword" value="VIEW") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") or (type="keyword" value="PROCEDURE") instead of type="keyword" value="${type}" (currentStep=1)`, ); }); }); @@ -357,6 +357,32 @@ describe('identifier', () => { }); }); + describe('identify bigquery "CREATE PROCEDURE" statements', () => { + it('should identify bigquery "CREATE PROCEDURE" statement', () => { + const sql = `CREATE OR REPLACE PROCEDURE mydataset.create_customer() + BEGIN + DECLARE id STRING; + SET id = GENERATE_UUID(); + INSERT INTO mydataset.customers (customer_id) + VALUES(id); + SELECT FORMAT("Created customer %s", id); + END`; + + const actual = identify(sql, { dialect: 'bigquery' }); + const expected = [ + { + start: 0, + end: 277, + text: sql, + type: 'CREATE_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); + }); + describe('identify "CREATE FUNCTION" statements', () => { it('should identify postgres "CREATE FUNCTION" statement with LANGUAGE at end', () => { const sql = `CREATE FUNCTION quarterly_summary_func(start_date date DEFAULT CURRENT_TIMESTAMP) diff --git a/test/index.spec.ts b/test/index.spec.ts index 5ef8026..2e4ba1e 100644 --- a/test/index.spec.ts +++ b/test/index.spec.ts @@ -4,7 +4,7 @@ import { expect } from 'chai'; describe('identify', () => { it('should throw error for invalid dialect', () => { expect(() => identify('SELECT * FROM foo', { dialect: 'invalid' as Dialect })).to.throw( - 'Unknown dialect. Allowed values: mssql, sqlite, mysql, oracle, psql, generic', + 'Unknown dialect. Allowed values: mssql, sqlite, mysql, oracle, psql, bigquery, generic', ); }); From 7f405431bbcdff78bd297c28c1a4ffa2040f3f75 Mon Sep 17 00:00:00 2001 From: Nicolas Garnil Date: Tue, 2 Aug 2022 15:12:36 +0200 Subject: [PATCH 02/10] add lib --- lib/defines.d.ts | 63 ++++ lib/defines.js | 12 + lib/index.d.ts | 7 + lib/index.js | 33 +++ lib/parser.d.ts | 13 + lib/parser.js | 699 +++++++++++++++++++++++++++++++++++++++++++++ lib/tokenizer.d.ts | 5 + lib/tokenizer.js | 365 +++++++++++++++++++++++ 8 files changed, 1197 insertions(+) create mode 100644 lib/defines.d.ts create mode 100644 lib/defines.js create mode 100644 lib/index.d.ts create mode 100644 lib/index.js create mode 100644 lib/parser.d.ts create mode 100644 lib/parser.js create mode 100644 lib/tokenizer.d.ts create mode 100644 lib/tokenizer.js diff --git a/lib/defines.d.ts b/lib/defines.d.ts new file mode 100644 index 0000000..d65d7f8 --- /dev/null +++ b/lib/defines.d.ts @@ -0,0 +1,63 @@ +export declare const DIALECTS: readonly ["mssql", "sqlite", "mysql", "oracle", "psql", "bigquery", "generic"]; +export declare type Dialect = typeof DIALECTS[number]; +export declare type StatementType = 'INSERT' | 'UPDATE' | 'DELETE' | 'SELECT' | 'TRUNCATE' | 'CREATE_DATABASE' | 'CREATE_SCHEMA' | 'CREATE_TABLE' | 'CREATE_VIEW' | 'CREATE_TRIGGER' | 'CREATE_FUNCTION' | 'CREATE_INDEX' | 'CREATE_PROCEDURE' | 'DROP_DATABASE' | 'DROP_SCHEMA' | 'DROP_TABLE' | 'DROP_VIEW' | 'DROP_TRIGGER' | 'DROP_FUNCTION' | 'DROP_INDEX' | 'ALTER_DATABASE' | 'ALTER_SCHEMA' | 'ALTER_TABLE' | 'ALTER_VIEW' | 'ALTER_TRIGGER' | 'ALTER_FUNCTION' | 'ALTER_INDEX' | 'ANON_BLOCK' | 'UNKNOWN'; +export declare type ExecutionType = 'LISTING' | 'MODIFICATION' | 'ANON_BLOCK' | 'UNKNOWN'; +export interface IdentifyOptions { + strict?: boolean; + dialect?: Dialect; +} +export interface IdentifyResult { + start: number; + end: number; + text: string; + type: StatementType; + executionType: ExecutionType; + parameters: string[]; +} +export interface Statement { + start: number; + end: number; + type?: StatementType; + executionType?: ExecutionType; + endStatement?: string; + canEnd?: boolean; + definer?: number; + algorithm?: number; + sqlSecurity?: number; + parameters: string[]; +} +export interface ConcreteStatement extends Statement { + type: StatementType; + executionType: ExecutionType; +} +export interface State { + start: number; + end: number; + position: number; + input: string; +} +export interface Token { + type: 'whitespace' | 'comment-inline' | 'comment-block' | 'string' | 'semicolon' | 'keyword' | 'parameter' | 'unknown'; + value: string; + start: number; + end: number; +} +export interface ParseResult { + type: 'QUERY'; + start: number; + end: number; + body: ConcreteStatement[]; + tokens: Token[]; +} +export interface Step { + preCanGoToNext: (token?: Token) => boolean; + validation?: { + requireBefore?: string[]; + acceptTokens: { + type: string; + value: string; + }[]; + }; + add: (token: Token) => void; + postCanGoToNext: (token?: Token) => boolean; +} diff --git a/lib/defines.js b/lib/defines.js new file mode 100644 index 0000000..e5868a6 --- /dev/null +++ b/lib/defines.js @@ -0,0 +1,12 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.DIALECTS = void 0; +exports.DIALECTS = [ + 'mssql', + 'sqlite', + 'mysql', + 'oracle', + 'psql', + 'bigquery', + 'generic', +]; diff --git a/lib/index.d.ts b/lib/index.d.ts new file mode 100644 index 0000000..0e7a0c3 --- /dev/null +++ b/lib/index.d.ts @@ -0,0 +1,7 @@ +import type { ExecutionType, IdentifyOptions, IdentifyResult } from './defines'; +export type { ExecutionType, Dialect, IdentifyOptions as Options, IdentifyResult as Result, StatementType, } from './defines'; +/** + * Identifier + */ +export declare function identify(query: string, options?: IdentifyOptions): IdentifyResult[]; +export declare function getExecutionType(command: string): ExecutionType; diff --git a/lib/index.js b/lib/index.js new file mode 100644 index 0000000..af20570 --- /dev/null +++ b/lib/index.js @@ -0,0 +1,33 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.getExecutionType = exports.identify = void 0; +const parser_1 = require("./parser"); +const defines_1 = require("./defines"); +/** + * Identifier + */ +function identify(query, options = {}) { + const isStrict = typeof options.strict === 'undefined' ? true : options.strict === true; + const dialect = typeof options.dialect === 'undefined' ? 'generic' : options.dialect; + if (!defines_1.DIALECTS.includes(dialect)) { + throw new Error(`Unknown dialect. Allowed values: ${defines_1.DIALECTS.join(', ')}`); + } + const result = (0, parser_1.parse)(query, isStrict, dialect); + return result.body.map((statement) => { + const result = { + start: statement.start, + end: statement.end, + text: query.substring(statement.start, statement.end + 1), + type: statement.type, + executionType: statement.executionType, + // we want to sort the postgres params: $1 $2 $3, regardless of the order they appear + parameters: dialect === 'psql' ? statement.parameters.sort() : statement.parameters, + }; + return result; + }); +} +exports.identify = identify; +function getExecutionType(command) { + return parser_1.EXECUTION_TYPES[command] || 'UNKNOWN'; +} +exports.getExecutionType = getExecutionType; diff --git a/lib/parser.d.ts b/lib/parser.d.ts new file mode 100644 index 0000000..7d23efa --- /dev/null +++ b/lib/parser.d.ts @@ -0,0 +1,13 @@ +import type { ExecutionType, Dialect, StatementType, ParseResult } from './defines'; +/** + * Execution types allow to know what is the query behavior + * - LISTING: is when the query list the data + * - MODIFICATION: is when the query modificate the database somehow (structure or data) + * - INFORMATION: is show some data information such as a profile data + * - UNKNOWN + */ +export declare const EXECUTION_TYPES: Record; +/** + * Parser + */ +export declare function parse(input: string, isStrict?: boolean, dialect?: Dialect): ParseResult; diff --git a/lib/parser.js b/lib/parser.js new file mode 100644 index 0000000..5ea87fe --- /dev/null +++ b/lib/parser.js @@ -0,0 +1,699 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.parse = exports.EXECUTION_TYPES = void 0; +const tokenizer_1 = require("./tokenizer"); +/** + * Execution types allow to know what is the query behavior + * - LISTING: is when the query list the data + * - MODIFICATION: is when the query modificate the database somehow (structure or data) + * - INFORMATION: is show some data information such as a profile data + * - UNKNOWN + */ +exports.EXECUTION_TYPES = { + SELECT: 'LISTING', + INSERT: 'MODIFICATION', + DELETE: 'MODIFICATION', + UPDATE: 'MODIFICATION', + TRUNCATE: 'MODIFICATION', + CREATE_DATABASE: 'MODIFICATION', + CREATE_SCHEMA: 'MODIFICATION', + CREATE_TABLE: 'MODIFICATION', + CREATE_VIEW: 'MODIFICATION', + CREATE_TRIGGER: 'MODIFICATION', + CREATE_FUNCTION: 'MODIFICATION', + CREATE_INDEX: 'MODIFICATION', + CREATE_PROCEDURE: 'MODIFICATION', + DROP_DATABASE: 'MODIFICATION', + DROP_SCHEMA: 'MODIFICATION', + DROP_TABLE: 'MODIFICATION', + DROP_VIEW: 'MODIFICATION', + DROP_TRIGGER: 'MODIFICATION', + DROP_FUNCTION: 'MODIFICATION', + DROP_INDEX: 'MODIFICATION', + ALTER_DATABASE: 'MODIFICATION', + ALTER_SCHEMA: 'MODIFICATION', + ALTER_TABLE: 'MODIFICATION', + ALTER_VIEW: 'MODIFICATION', + ALTER_TRIGGER: 'MODIFICATION', + ALTER_FUNCTION: 'MODIFICATION', + ALTER_INDEX: 'MODIFICATION', + UNKNOWN: 'UNKNOWN', + ANON_BLOCK: 'ANON_BLOCK', +}; +const statementsWithEnds = ['CREATE_TRIGGER', 'CREATE_FUNCTION', 'CREATE_PROCEDURE', 'ANON_BLOCK']; +const blockOpeners = { + generic: ['BEGIN', 'CASE'], + psql: ['BEGIN', 'CASE', 'LOOP', 'IF'], + mysql: ['BEGIN', 'CASE', 'LOOP', 'IF'], + mssql: ['BEGIN', 'CASE'], + sqlite: ['BEGIN', 'CASE'], + oracle: ['DECLARE', 'BEGIN', 'CASE'], + bigquery: ['DECLARE', 'BEGIN', 'CASE'], +}; +function createInitialStatement() { + return { + start: -1, + end: 0, + parameters: [], + }; +} +/** + * Parser + */ +function parse(input, isStrict = true, dialect = 'generic') { + const topLevelState = initState({ input }); + const topLevelStatement = { + type: 'QUERY', + start: 0, + end: input.length - 1, + body: [], + tokens: [], + }; + let prevState = topLevelState; + let statementParser = null; + const cteState = { + isCte: false, + asSeen: false, + statementEnd: false, + parens: 0, + state: topLevelState, + }; + const ignoreOutsideBlankTokens = ['whitespace', 'comment-inline', 'comment-block', 'semicolon']; + while (prevState.position < topLevelState.end) { + const tokenState = initState({ prevState }); + const token = (0, tokenizer_1.scanToken)(tokenState, dialect); + if (!statementParser) { + // ignore blank tokens before the start of a CTE / not part of a statement + if (!cteState.isCte && ignoreOutsideBlankTokens.includes(token.type)) { + topLevelStatement.tokens.push(token); + prevState = tokenState; + continue; + } + else if (!cteState.isCte && + token.type === 'keyword' && + token.value.toUpperCase() === 'WITH') { + cteState.isCte = true; + topLevelStatement.tokens.push(token); + cteState.state = tokenState; + prevState = tokenState; + continue; + // If we're scanning in a CTE, handle someone putting a semicolon anywhere (after 'with', + // after semicolon, etc.) along it to "early terminate". + } + else if (cteState.isCte && token.type === 'semicolon') { + topLevelStatement.tokens.push(token); + prevState = tokenState; + topLevelStatement.body.push({ + start: cteState.state.start, + end: token.end, + type: 'UNKNOWN', + executionType: 'UNKNOWN', + parameters: [], + }); + cteState.isCte = false; + cteState.asSeen = false; + cteState.statementEnd = false; + cteState.parens = 0; + continue; + } + else if (cteState.isCte && !cteState.statementEnd) { + if (cteState.asSeen) { + if (token.value === '(') { + cteState.parens++; + } + else if (token.value === ')') { + cteState.parens--; + if (cteState.parens === 0) { + cteState.statementEnd = true; + } + } + } + else if (token.value.toUpperCase() === 'AS') { + cteState.asSeen = true; + } + topLevelStatement.tokens.push(token); + prevState = tokenState; + continue; + } + else if (cteState.isCte && cteState.statementEnd && token.value === ',') { + cteState.asSeen = false; + cteState.statementEnd = false; + topLevelStatement.tokens.push(token); + prevState = tokenState; + continue; + // Ignore blank tokens after the end of the CTE till start of statement + } + else if (cteState.isCte && + cteState.statementEnd && + ignoreOutsideBlankTokens.includes(token.type)) { + topLevelStatement.tokens.push(token); + prevState = tokenState; + continue; + } + statementParser = createStatementParserByToken(token, { isStrict, dialect }); + if (cteState.isCte) { + statementParser.getStatement().start = cteState.state.start; + cteState.isCte = false; + cteState.asSeen = false; + cteState.statementEnd = false; + } + } + statementParser.addToken(token); + topLevelStatement.tokens.push(token); + prevState = tokenState; + const statement = statementParser.getStatement(); + if (statement.endStatement) { + statement.end = token.end; + topLevelStatement.body.push(statement); + statementParser = null; + } + } + // last statement without ending key + if (statementParser) { + const statement = statementParser.getStatement(); + if (!statement.endStatement) { + statement.end = topLevelStatement.end; + topLevelStatement.body.push(statement); + } + } + return topLevelStatement; +} +exports.parse = parse; +function initState({ input, prevState }) { + if (prevState) { + return { + input: prevState.input, + position: prevState.position, + start: prevState.position + 1, + end: prevState.input.length - 1, + }; + } + else if (input === undefined) { + throw new Error('You must define either input or prevState'); + } + return { + input, + position: -1, + start: 0, + end: input.length - 1, + }; +} +function createStatementParserByToken(token, options) { + if (token.type === 'keyword') { + switch (token.value.toUpperCase()) { + case 'SELECT': + return createSelectStatementParser(options); + case 'CREATE': + return createCreateStatementParser(options); + case 'DROP': + return createDropStatementParser(options); + case 'ALTER': + return createAlterStatementParser(options); + case 'INSERT': + return createInsertStatementParser(options); + case 'UPDATE': + return createUpdateStatementParser(options); + case 'DELETE': + return createDeleteStatementParser(options); + case 'TRUNCATE': + return createTruncateStatementParser(options); + case 'DECLARE': + case 'BEGIN': + if (['oracle', 'bigquery'].includes(options.dialect)) { + return createBlockStatementParser(options); + } + // eslint-disable-next-line no-fallthrough + default: + break; + } + } + if (!options.isStrict) { + return createUnknownStatementParser(options); + } + throw new Error(`Invalid statement parser "${token.value}"`); +} +function createSelectStatementParser(options) { + const statement = createInitialStatement(); + const steps = [ + // Select + { + preCanGoToNext: () => false, + validation: { + acceptTokens: [{ type: 'keyword', value: 'SELECT' }], + }, + add: (token) => { + statement.type = 'SELECT'; + if (statement.start < 0) { + statement.start = token.start; + } + }, + postCanGoToNext: () => true, + }, + ]; + return stateMachineStatementParser(statement, steps, options); +} +function createBlockStatementParser(options) { + const statement = createInitialStatement(); + statement.type = 'ANON_BLOCK'; + const steps = [ + // Select + { + preCanGoToNext: () => false, + validation: { + acceptTokens: [ + { type: 'keyword', value: 'DECLARE' }, + { type: 'keyword', value: 'BEGIN' }, + ], + }, + add: (token) => { + if (statement.start < 0) { + statement.start = token.start; + } + }, + postCanGoToNext: () => true, + }, + ]; + return stateMachineStatementParser(statement, steps, options); +} +function createInsertStatementParser(options) { + const statement = createInitialStatement(); + const steps = [ + // Insert + { + preCanGoToNext: () => false, + validation: { + acceptTokens: [{ type: 'keyword', value: 'INSERT' }], + }, + add: (token) => { + statement.type = 'INSERT'; + if (statement.start < 0) { + statement.start = token.start; + } + }, + postCanGoToNext: () => true, + }, + ]; + return stateMachineStatementParser(statement, steps, options); +} +function createUpdateStatementParser(options) { + const statement = createInitialStatement(); + const steps = [ + // Update + { + preCanGoToNext: () => false, + validation: { + acceptTokens: [{ type: 'keyword', value: 'UPDATE' }], + }, + add: (token) => { + statement.type = 'UPDATE'; + if (statement.start < 0) { + statement.start = token.start; + } + }, + postCanGoToNext: () => true, + }, + ]; + return stateMachineStatementParser(statement, steps, options); +} +function createDeleteStatementParser(options) { + const statement = createInitialStatement(); + const steps = [ + // Delete + { + preCanGoToNext: () => false, + validation: { + acceptTokens: [{ type: 'keyword', value: 'DELETE' }], + }, + add: (token) => { + statement.type = 'DELETE'; + if (statement.start < 0) { + statement.start = token.start; + } + }, + postCanGoToNext: () => true, + }, + ]; + return stateMachineStatementParser(statement, steps, options); +} +function createCreateStatementParser(options) { + const statement = createInitialStatement(); + const steps = [ + // Create + { + preCanGoToNext: () => false, + validation: { + acceptTokens: [{ type: 'keyword', value: 'CREATE' }], + }, + add: (token) => { + if (statement.start < 0) { + statement.start = token.start; + } + }, + postCanGoToNext: () => true, + }, + // Table/Database + { + preCanGoToNext: () => false, + validation: { + requireBefore: ['whitespace'], + acceptTokens: [ + ...(options.dialect !== 'sqlite' + ? [ + { type: 'keyword', value: 'DATABASE' }, + { type: 'keyword', value: 'SCHEMA' }, + ] + : []), + { type: 'keyword', value: 'TABLE' }, + { type: 'keyword', value: 'VIEW' }, + { type: 'keyword', value: 'TRIGGER' }, + { type: 'keyword', value: 'FUNCTION' }, + { type: 'keyword', value: 'INDEX' }, + { type: 'keyword', value: 'PROCEDURE' }, + ], + }, + add: (token) => { + statement.type = `CREATE_${token.value.toUpperCase()}`; + }, + postCanGoToNext: () => true, + }, + ]; + return stateMachineStatementParser(statement, steps, options); +} +function createDropStatementParser(options) { + const statement = createInitialStatement(); + const steps = [ + // Drop + { + preCanGoToNext: () => false, + validation: { + acceptTokens: [{ type: 'keyword', value: 'DROP' }], + }, + add: (token) => { + if (statement.start < 0) { + statement.start = token.start; + } + }, + postCanGoToNext: () => true, + }, + // Table/Database + { + preCanGoToNext: () => false, + validation: { + requireBefore: ['whitespace'], + acceptTokens: [ + ...(options.dialect !== 'sqlite' + ? [ + { type: 'keyword', value: 'DATABASE' }, + { type: 'keyword', value: 'SCHEMA' }, + ] + : []), + { type: 'keyword', value: 'TABLE' }, + { type: 'keyword', value: 'VIEW' }, + { type: 'keyword', value: 'TRIGGER' }, + { type: 'keyword', value: 'FUNCTION' }, + { type: 'keyword', value: 'INDEX' }, + ], + }, + add: (token) => { + statement.type = `DROP_${token.value.toUpperCase()}`; + }, + postCanGoToNext: () => true, + }, + ]; + return stateMachineStatementParser(statement, steps, options); +} +function createAlterStatementParser(options) { + const statement = createInitialStatement(); + const steps = [ + { + preCanGoToNext: () => false, + validation: { + acceptTokens: [{ type: 'keyword', value: 'ALTER' }], + }, + add: (token) => { + if (statement.start < 0) { + statement.start = token.start; + } + }, + postCanGoToNext: () => true, + }, + { + preCanGoToNext: () => false, + validation: { + requireBefore: ['whitespace'], + acceptTokens: [ + ...(options.dialect !== 'sqlite' + ? [ + { type: 'keyword', value: 'DATABASE' }, + { type: 'keyword', value: 'SCHEMA' }, + { type: 'keyword', value: 'TRIGGER' }, + { type: 'keyword', value: 'FUNCTION' }, + { type: 'keyword', value: 'INDEX' }, + { type: 'keyword', value: 'PROCEDURE' }, + ] + : []), + { type: 'keyword', value: 'TABLE' }, + { type: 'keyword', value: 'VIEW' }, + ], + }, + add: (token) => { + statement.type = `ALTER_${token.value.toUpperCase()}`; + }, + postCanGoToNext: () => true, + }, + ]; + return stateMachineStatementParser(statement, steps, options); +} +function createTruncateStatementParser(options) { + const statement = createInitialStatement(); + const steps = [ + { + preCanGoToNext: () => false, + validation: { + acceptTokens: [{ type: 'keyword', value: 'TRUNCATE' }], + }, + add: (token) => { + statement.type = 'TRUNCATE'; + if (statement.start < 0) { + statement.start = token.start; + } + }, + postCanGoToNext: () => true, + }, + ]; + return stateMachineStatementParser(statement, steps, options); +} +function createUnknownStatementParser(options) { + const statement = createInitialStatement(); + const steps = [ + { + preCanGoToNext: () => false, + add: (token) => { + statement.type = 'UNKNOWN'; + if (statement.start < 0) { + statement.start = token.start; + } + }, + postCanGoToNext: () => true, + }, + ]; + return stateMachineStatementParser(statement, steps, options); +} +function stateMachineStatementParser(statement, steps, { isStrict, dialect }) { + let currentStepIndex = 0; + let prevToken; + let prevPrevToken; + let lastBlockOpener; + let anonBlockStarted = false; + let openBlocks = 0; + /* eslint arrow-body-style: 0, no-extra-parens: 0 */ + const isValidToken = (step, token) => { + if (!step.validation) { + return true; + } + return (step.validation.acceptTokens.filter((accept) => { + const isValidType = token.type === accept.type; + const isValidValue = !accept.value || token.value.toUpperCase() === accept.value; + return isValidType && isValidValue; + }).length > 0); + }; + const setPrevToken = (token) => { + prevPrevToken = prevToken; + prevToken = token; + }; + return { + getStatement() { + return statement; + }, + addToken(token) { + /* eslint no-param-reassign: 0 */ + if (statement.endStatement) { + throw new Error('This statement has already got to the end.'); + } + if (statement.type && + token.type === 'semicolon' && + (!statementsWithEnds.includes(statement.type) || (openBlocks === 0 && statement.canEnd))) { + statement.endStatement = ';'; + return; + } + if (token.value.toUpperCase() === 'END') { + openBlocks--; + if (openBlocks === 0) { + statement.canEnd = true; + } + setPrevToken(token); + return; + } + if (token.type === 'whitespace') { + setPrevToken(token); + return; + } + if (token.type === 'keyword' && + blockOpeners[dialect].includes(token.value) && + (prevPrevToken === null || prevPrevToken === void 0 ? void 0 : prevPrevToken.value.toUpperCase()) !== 'END') { + if (['oracle', 'bigquery'].includes(dialect) && + (lastBlockOpener === null || lastBlockOpener === void 0 ? void 0 : lastBlockOpener.value) === 'DECLARE' && + token.value.toUpperCase() === 'BEGIN') { + // don't open a new block! + setPrevToken(token); + lastBlockOpener = token; + return; + } + openBlocks++; + lastBlockOpener = token; + setPrevToken(token); + if (statement.type === 'ANON_BLOCK' && !anonBlockStarted) { + anonBlockStarted = true; + // don't return + } + else { + return; + } + } + if (token.type === 'parameter' && + (token.value === '?' || !statement.parameters.includes(token.value))) { + statement.parameters.push(token.value); + } + if (statement.type && statement.start >= 0) { + // statement has already been identified + // just wait until end of the statement + return; + } + // index modifiers + if (token.value.toUpperCase() === 'UNIQUE' || + (dialect === 'mysql' && ['FULLTEXT', 'SPATIAL'].includes(token.value.toUpperCase())) || + (dialect === 'mssql' && ['CLUSTERED', 'NONCLUSTERED'].includes(token.value.toUpperCase()))) { + setPrevToken(token); + return; + } + if (['psql', 'mssql'].includes(dialect) && token.value.toUpperCase() === 'MATERIALIZED') { + setPrevToken(token); + return; + } + // psql allows for optional "OR REPLACE" between "CREATE" and "FUNCTION" + // mysql and psql allow it between "CREATE" and "VIEW" + if (['psql', 'mysql', 'bigquery'].includes(dialect) && + ['OR', 'REPLACE'].includes(token.value.toUpperCase())) { + setPrevToken(token); + return; + } + if (['psql', 'sqlite'].includes(dialect) && + ['TEMP', 'TEMPORARY'].includes(token.value.toUpperCase())) { + setPrevToken(token); + return; + } + // MySQL allows for setting a definer for a function which specifies who the function is executed as. + // This clause is optional, and is defined between the "CREATE" and "FUNCTION" keywords for the statement. + if (dialect === 'mysql' && token.value.toUpperCase() === 'DEFINER') { + statement.definer = 0; + setPrevToken(token); + return; + } + if (statement.definer === 0 && token.value === '=') { + statement.definer++; + setPrevToken(token); + return; + } + if (statement.definer !== undefined && statement.definer > 0) { + if (statement.definer === 1 && prevToken.type === 'whitespace') { + statement.definer++; + setPrevToken(token); + return; + } + if (statement.definer > 1 && prevToken.type !== 'whitespace') { + setPrevToken(token); + return; + } + delete statement.definer; + } + if (dialect === 'mysql' && token.value.toUpperCase() === 'ALGORITHM') { + statement.algorithm = 0; + setPrevToken(token); + return; + } + if (statement.algorithm === 0 && token.value === '=') { + statement.algorithm++; + setPrevToken(token); + return; + } + if (statement.algorithm !== undefined && statement.algorithm > 0) { + if (statement.algorithm === 1 && prevToken.type === 'whitespace') { + statement.algorithm++; + setPrevToken(token); + return; + } + if (statement.algorithm > 1 && + ['UNDEFINED', 'MERGE', 'TEMPTABLE'].includes(prevToken.value.toUpperCase())) { + setPrevToken(token); + return; + } + delete statement.algorithm; + } + if (dialect === 'mysql' && token.value.toUpperCase() === 'SQL') { + statement.sqlSecurity = 0; + setPrevToken(token); + return; + } + if (statement.sqlSecurity !== undefined) { + if ((statement.sqlSecurity === 0 && token.value.toUpperCase() === 'SECURITY') || + (statement.sqlSecurity === 1 && + ['DEFINER', 'INVOKER'].includes(token.value.toUpperCase()))) { + statement.sqlSecurity++; + setPrevToken(token); + return; + } + else if (statement.sqlSecurity === 2) { + delete statement.sqlSecurity; + } + } + let currentStep = steps[currentStepIndex]; + if (currentStep.preCanGoToNext(token)) { + currentStepIndex++; + currentStep = steps[currentStepIndex]; + } + if (currentStep.validation && + currentStep.validation.requireBefore && + !currentStep.validation.requireBefore.includes(prevToken.type)) { + const requireds = currentStep.validation.requireBefore.join(' or '); + throw new Error(`Expected any of these tokens ${requireds} before "${token.value}" (currentStep=${currentStepIndex}).`); + } + if (!isValidToken(currentStep, token) && isStrict) { + const expecteds = currentStep.validation + ? currentStep.validation.acceptTokens + .map((accept) => `(type="${accept.type}" value="${accept.value}")`) + .join(' or ') + : '()'; + throw new Error(`Expected any of these tokens ${expecteds} instead of type="${token.type}" value="${token.value}" (currentStep=${currentStepIndex}).`); + } + currentStep.add(token); + statement.executionType = + statement.type && exports.EXECUTION_TYPES[statement.type] + ? exports.EXECUTION_TYPES[statement.type] + : 'UNKNOWN'; + if (currentStep.postCanGoToNext(token)) { + currentStepIndex++; + } + setPrevToken(token); + }, + }; +} diff --git a/lib/tokenizer.d.ts b/lib/tokenizer.d.ts new file mode 100644 index 0000000..d065550 --- /dev/null +++ b/lib/tokenizer.d.ts @@ -0,0 +1,5 @@ +/** + * Tokenizer + */ +import type { Token, State, Dialect } from './defines'; +export declare function scanToken(state: State, dialect?: Dialect): Token; diff --git a/lib/tokenizer.js b/lib/tokenizer.js new file mode 100644 index 0000000..6ad8e23 --- /dev/null +++ b/lib/tokenizer.js @@ -0,0 +1,365 @@ +"use strict"; +/** + * Tokenizer + */ +Object.defineProperty(exports, "__esModule", { value: true }); +exports.scanToken = void 0; +const KEYWORDS = [ + 'SELECT', + 'INSERT', + 'DELETE', + 'UPDATE', + 'CREATE', + 'DROP', + 'DATABASE', + 'SCHEMA', + 'TABLE', + 'VIEW', + 'TRIGGER', + 'FUNCTION', + 'INDEX', + 'ALTER', + 'TRUNCATE', + 'WITH', + 'AS', + 'MATERIALIZED', + 'BEGIN', + 'DECLARE', + 'CASE', + 'PROCEDURE', +]; +const INDIVIDUALS = { + ';': 'semicolon', +}; +const ENDTOKENS = { + '"': '"', + "'": "'", + '`': '`', + '[': ']', +}; +function scanToken(state, dialect = 'generic') { + const ch = read(state); + if (isWhitespace(ch)) { + return scanWhitespace(state); + } + if (isCommentInline(ch, state)) { + return scanCommentInline(state); + } + if (isCommentBlock(ch, state)) { + return scanCommentBlock(state); + } + if (isString(ch, dialect) && ch !== null) { + return scanString(state, ENDTOKENS[ch]); + } + if (isParameter(ch, state, dialect)) { + return scanParameter(state, dialect); + } + if (isDollarQuotedString(state)) { + return scanDollarQuotedString(state); + } + if (isQuotedIdentifier(ch, dialect) && ch !== null) { + return scanQuotedIdentifier(state, ENDTOKENS[ch]); + } + if (isLetter(ch)) { + return scanWord(state); + } + const individual = scanIndividualCharacter(state); + if (individual) { + return individual; + } + return skipChar(state); +} +exports.scanToken = scanToken; +function read(state, skip = 0) { + if (state.position + skip === state.input.length - 1) { + return null; + } + state.position += 1 + skip; + return state.input[state.position]; +} +function unread(state) { + if (state.position === state.start) { + return; + } + state.position--; +} +function peek(state) { + if (state.position >= state.input.length - 1) { + return null; + } + return state.input[state.position + 1]; +} +function isKeyword(word) { + return KEYWORDS.includes(word.toUpperCase()); +} +function resolveIndividualTokenType(ch) { + return INDIVIDUALS[ch]; +} +function scanWhitespace(state) { + let nextChar; + do { + nextChar = read(state); + } while (isWhitespace(nextChar)); + if (nextChar !== null && !isWhitespace(nextChar)) { + unread(state); + } + const value = state.input.slice(state.start, state.position + 1); + return { + type: 'whitespace', + value, + start: state.start, + end: state.start + value.length - 1, + }; +} +function scanCommentInline(state) { + let nextChar; + do { + nextChar = read(state); + } while (nextChar !== '\n' && nextChar !== null); + if (nextChar !== null && nextChar !== '\n') { + unread(state); + } + const value = state.input.slice(state.start, state.position + 1); + return { + type: 'comment-inline', + value, + start: state.start, + end: state.start + value.length - 1, + }; +} +function scanDollarQuotedString(state) { + const match = /^(\$[a-zA-Z0-9_]*\$)/.exec(state.input.slice(state.start)); + if (!match) { + throw new Error('Could not find dollar quoted string opener'); + } + const label = match[1]; + for (let i = 0; i < label.length - 1; i++) { + read(state); + } + let nextChar = ''; + while (state.input.slice(state.position, state.position + label.length) !== label && + nextChar !== null) { + do { + nextChar = read(state); + } while (nextChar !== '$' && nextChar !== null); + if (nextChar !== '$' && nextChar !== null) { + unread(state); + } + } + for (let i = 0; i < label.length - 1; i++) { + read(state); + } + const value = state.input.slice(state.start, state.position + 1); + return { + type: 'string', + value, + start: state.start, + end: state.start + value.length - 1, + }; +} +function scanString(state, endToken) { + let nextChar; + do { + nextChar = read(state); + // supporting double quote escaping: 'str''ing' + if (nextChar === endToken) { + if (peek(state) === endToken) { + nextChar = read(state, 1); + } + } + } while (nextChar !== endToken && nextChar !== null); + if (nextChar !== null && endToken !== nextChar) { + unread(state); + } + const value = state.input.slice(state.start, state.position + 1); + return { + type: 'string', + value, + start: state.start, + end: state.start + value.length - 1, + }; +} +function scanParameter(state, dialect) { + if (['mysql', 'generic', 'sqlite'].includes(dialect)) { + return { + type: 'parameter', + value: state.input.slice(state.start, state.position + 1), + start: state.start, + end: state.start, + }; + } + if (dialect === 'psql') { + let nextChar; + do { + nextChar = read(state); + } while (!isNaN(Number(nextChar)) && !isWhitespace(nextChar) && nextChar !== null); + if (isWhitespace(nextChar)) + unread(state); + const value = state.input.slice(state.start, state.position + 1); + return { + type: 'parameter', + value, + start: state.start, + end: state.start + value.length - 1, + }; + } + if (dialect === 'mssql') { + let nextChar; + do { + nextChar = read(state); + } while (!isWhitespace(nextChar) && nextChar !== null); + if (isWhitespace(nextChar)) + unread(state); + const value = state.input.slice(state.start, state.position + 1); + return { + type: 'parameter', + value, + start: state.start, + end: state.start + value.length - 1, + }; + } + return { + type: 'parameter', + value: 'unknown', + start: state.start, + end: state.end, + }; +} +function scanCommentBlock(state) { + let nextChar = ''; + let prevChar; + do { + prevChar = nextChar; + nextChar = read(state); + } while ((prevChar || '') + (nextChar || '') !== '*/' && nextChar !== null); + if (nextChar !== null && nextChar !== '/') { + unread(state); + } + const value = state.input.slice(state.start, state.position + 1); + return { + type: 'comment-block', + value, + start: state.start, + end: state.start + value.length - 1, + }; +} +function scanQuotedIdentifier(state, endToken) { + let nextChar; + do { + nextChar = read(state); + } while (endToken !== nextChar && nextChar !== null); + if (nextChar !== null && endToken !== nextChar) { + unread(state); + } + const value = state.input.slice(state.start, state.position + 1); + return { + type: 'keyword', + value, + start: state.start, + end: state.start + value.length - 1, + }; +} +function scanWord(state) { + let nextChar; + do { + nextChar = read(state); + } while (isLetter(nextChar)); + if (nextChar !== null && !isLetter(nextChar)) { + unread(state); + } + const value = state.input.slice(state.start, state.position + 1); + if (!isKeyword(value)) { + return skipWord(state, value); + } + return { + type: 'keyword', + value, + start: state.start, + end: state.start + value.length - 1, + }; +} +function scanIndividualCharacter(state) { + const value = state.input.slice(state.start, state.position + 1); + const type = resolveIndividualTokenType(value); + if (!type) { + return null; + } + return { + type, + value, + start: state.start, + end: state.start + value.length - 1, + }; +} +function skipChar(state) { + return { + type: 'unknown', + value: state.input.slice(state.start, state.position + 1), + start: state.start, + end: state.start, + }; +} +function skipWord(state, value) { + return { + type: 'unknown', + value, + start: state.start, + end: state.start + value.length - 1, + }; +} +function isWhitespace(ch) { + return ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r'; +} +function isString(ch, dialect) { + const stringStart = dialect === 'mysql' ? ["'", '"'] : ["'"]; + return stringStart.includes(ch); +} +function isParameter(ch, state, dialect) { + let pStart = '?'; // ansi standard - sqlite, mysql + if (dialect === 'psql') { + pStart = '$'; + const nextChar = peek(state); + if (nextChar === null || isNaN(Number(nextChar))) { + return false; + } + } + if (dialect === 'mssql') + pStart = ':'; + return ch === pStart; +} +function isDollarQuotedString(state) { + return /^\$[\w]*\$/.exec(state.input.slice(state.start)) !== null; +} +function isQuotedIdentifier(ch, dialect) { + const startQuoteChars = dialect === 'mssql' ? ['"', '['] : ['"', '`']; + return startQuoteChars.includes(ch); +} +function isCommentInline(ch, state) { + let isComment = ch === '-'; + if (!isComment) { + return false; + } + // lookahead + const nextChar = read(state); + isComment = nextChar === '-'; + if (!isComment) { + unread(state); + } + return isComment; +} +function isCommentBlock(ch, state) { + let isComment = ch === '/'; + if (!isComment) { + return false; + } + // lookahead + const nextChar = read(state); + isComment = nextChar === '*'; + if (!isComment) { + unread(state); + } + return isComment; +} +function isLetter(ch) { + return ch !== null && ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch === '_'); +} From 0abbfd026df4b0e8696063e780cc5f53f5e1c41d Mon Sep 17 00:00:00 2001 From: Nicolas Garnil Date: Tue, 2 Aug 2022 16:14:32 +0200 Subject: [PATCH 03/10] add create_procedure to all dialects except sqlite --- src/parser.ts | 4 ++-- test/identifier/single-statement.spec.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index fc57c43..3ecb218 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -256,7 +256,7 @@ function createStatementParserByToken(token: Token, options: ParseOptions): Stat return createTruncateStatementParser(options); case 'DECLARE': case 'BEGIN': - if (['oracle', 'bigquery'].includes(options.dialect)) { + if (['oracle'].includes(options.dialect)) { return createBlockStatementParser(options); } // eslint-disable-next-line no-fallthrough @@ -417,6 +417,7 @@ function createCreateStatementParser(options: ParseOptions) { ? [ { type: 'keyword', value: 'DATABASE' }, { type: 'keyword', value: 'SCHEMA' }, + { type: 'keyword', value: 'PROCEDURE' }, ] : []), { type: 'keyword', value: 'TABLE' }, @@ -424,7 +425,6 @@ function createCreateStatementParser(options: ParseOptions) { { type: 'keyword', value: 'TRIGGER' }, { type: 'keyword', value: 'FUNCTION' }, { type: 'keyword', value: 'INDEX' }, - { type: 'keyword', value: 'PROCEDURE' }, ], }, add: (token) => { diff --git a/test/identifier/single-statement.spec.ts b/test/identifier/single-statement.spec.ts index 8c5f822..183c367 100644 --- a/test/identifier/single-statement.spec.ts +++ b/test/identifier/single-statement.spec.ts @@ -89,7 +89,7 @@ describe('identifier', () => { it('should throw error for sqlite', () => { expect(() => identify(sql, { dialect: 'sqlite' })).to.throw( - `Expected any of these tokens (type="keyword" value="TABLE") or (type="keyword" value="VIEW") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") or (type="keyword" value="PROCEDURE") instead of type="keyword" value="${type}" (currentStep=1)`, + `Expected any of these tokens (type="keyword" value="TABLE") or (type="keyword" value="VIEW") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") instead of type="keyword" value="${type}" (currentStep=1)`, ); }); }); From a030c8e38778b04880f0d42836badfafc1a184d8 Mon Sep 17 00:00:00 2001 From: Nicolas Garnil Date: Tue, 2 Aug 2022 17:38:19 +0200 Subject: [PATCH 04/10] remove lib --- lib/defines.d.ts | 63 ---- lib/defines.js | 12 - lib/index.d.ts | 7 - lib/index.js | 33 --- lib/parser.d.ts | 13 - lib/parser.js | 699 --------------------------------------------- lib/tokenizer.d.ts | 5 - lib/tokenizer.js | 365 ----------------------- 8 files changed, 1197 deletions(-) delete mode 100644 lib/defines.d.ts delete mode 100644 lib/defines.js delete mode 100644 lib/index.d.ts delete mode 100644 lib/index.js delete mode 100644 lib/parser.d.ts delete mode 100644 lib/parser.js delete mode 100644 lib/tokenizer.d.ts delete mode 100644 lib/tokenizer.js diff --git a/lib/defines.d.ts b/lib/defines.d.ts deleted file mode 100644 index d65d7f8..0000000 --- a/lib/defines.d.ts +++ /dev/null @@ -1,63 +0,0 @@ -export declare const DIALECTS: readonly ["mssql", "sqlite", "mysql", "oracle", "psql", "bigquery", "generic"]; -export declare type Dialect = typeof DIALECTS[number]; -export declare type StatementType = 'INSERT' | 'UPDATE' | 'DELETE' | 'SELECT' | 'TRUNCATE' | 'CREATE_DATABASE' | 'CREATE_SCHEMA' | 'CREATE_TABLE' | 'CREATE_VIEW' | 'CREATE_TRIGGER' | 'CREATE_FUNCTION' | 'CREATE_INDEX' | 'CREATE_PROCEDURE' | 'DROP_DATABASE' | 'DROP_SCHEMA' | 'DROP_TABLE' | 'DROP_VIEW' | 'DROP_TRIGGER' | 'DROP_FUNCTION' | 'DROP_INDEX' | 'ALTER_DATABASE' | 'ALTER_SCHEMA' | 'ALTER_TABLE' | 'ALTER_VIEW' | 'ALTER_TRIGGER' | 'ALTER_FUNCTION' | 'ALTER_INDEX' | 'ANON_BLOCK' | 'UNKNOWN'; -export declare type ExecutionType = 'LISTING' | 'MODIFICATION' | 'ANON_BLOCK' | 'UNKNOWN'; -export interface IdentifyOptions { - strict?: boolean; - dialect?: Dialect; -} -export interface IdentifyResult { - start: number; - end: number; - text: string; - type: StatementType; - executionType: ExecutionType; - parameters: string[]; -} -export interface Statement { - start: number; - end: number; - type?: StatementType; - executionType?: ExecutionType; - endStatement?: string; - canEnd?: boolean; - definer?: number; - algorithm?: number; - sqlSecurity?: number; - parameters: string[]; -} -export interface ConcreteStatement extends Statement { - type: StatementType; - executionType: ExecutionType; -} -export interface State { - start: number; - end: number; - position: number; - input: string; -} -export interface Token { - type: 'whitespace' | 'comment-inline' | 'comment-block' | 'string' | 'semicolon' | 'keyword' | 'parameter' | 'unknown'; - value: string; - start: number; - end: number; -} -export interface ParseResult { - type: 'QUERY'; - start: number; - end: number; - body: ConcreteStatement[]; - tokens: Token[]; -} -export interface Step { - preCanGoToNext: (token?: Token) => boolean; - validation?: { - requireBefore?: string[]; - acceptTokens: { - type: string; - value: string; - }[]; - }; - add: (token: Token) => void; - postCanGoToNext: (token?: Token) => boolean; -} diff --git a/lib/defines.js b/lib/defines.js deleted file mode 100644 index e5868a6..0000000 --- a/lib/defines.js +++ /dev/null @@ -1,12 +0,0 @@ -"use strict"; -Object.defineProperty(exports, "__esModule", { value: true }); -exports.DIALECTS = void 0; -exports.DIALECTS = [ - 'mssql', - 'sqlite', - 'mysql', - 'oracle', - 'psql', - 'bigquery', - 'generic', -]; diff --git a/lib/index.d.ts b/lib/index.d.ts deleted file mode 100644 index 0e7a0c3..0000000 --- a/lib/index.d.ts +++ /dev/null @@ -1,7 +0,0 @@ -import type { ExecutionType, IdentifyOptions, IdentifyResult } from './defines'; -export type { ExecutionType, Dialect, IdentifyOptions as Options, IdentifyResult as Result, StatementType, } from './defines'; -/** - * Identifier - */ -export declare function identify(query: string, options?: IdentifyOptions): IdentifyResult[]; -export declare function getExecutionType(command: string): ExecutionType; diff --git a/lib/index.js b/lib/index.js deleted file mode 100644 index af20570..0000000 --- a/lib/index.js +++ /dev/null @@ -1,33 +0,0 @@ -"use strict"; -Object.defineProperty(exports, "__esModule", { value: true }); -exports.getExecutionType = exports.identify = void 0; -const parser_1 = require("./parser"); -const defines_1 = require("./defines"); -/** - * Identifier - */ -function identify(query, options = {}) { - const isStrict = typeof options.strict === 'undefined' ? true : options.strict === true; - const dialect = typeof options.dialect === 'undefined' ? 'generic' : options.dialect; - if (!defines_1.DIALECTS.includes(dialect)) { - throw new Error(`Unknown dialect. Allowed values: ${defines_1.DIALECTS.join(', ')}`); - } - const result = (0, parser_1.parse)(query, isStrict, dialect); - return result.body.map((statement) => { - const result = { - start: statement.start, - end: statement.end, - text: query.substring(statement.start, statement.end + 1), - type: statement.type, - executionType: statement.executionType, - // we want to sort the postgres params: $1 $2 $3, regardless of the order they appear - parameters: dialect === 'psql' ? statement.parameters.sort() : statement.parameters, - }; - return result; - }); -} -exports.identify = identify; -function getExecutionType(command) { - return parser_1.EXECUTION_TYPES[command] || 'UNKNOWN'; -} -exports.getExecutionType = getExecutionType; diff --git a/lib/parser.d.ts b/lib/parser.d.ts deleted file mode 100644 index 7d23efa..0000000 --- a/lib/parser.d.ts +++ /dev/null @@ -1,13 +0,0 @@ -import type { ExecutionType, Dialect, StatementType, ParseResult } from './defines'; -/** - * Execution types allow to know what is the query behavior - * - LISTING: is when the query list the data - * - MODIFICATION: is when the query modificate the database somehow (structure or data) - * - INFORMATION: is show some data information such as a profile data - * - UNKNOWN - */ -export declare const EXECUTION_TYPES: Record; -/** - * Parser - */ -export declare function parse(input: string, isStrict?: boolean, dialect?: Dialect): ParseResult; diff --git a/lib/parser.js b/lib/parser.js deleted file mode 100644 index 5ea87fe..0000000 --- a/lib/parser.js +++ /dev/null @@ -1,699 +0,0 @@ -"use strict"; -Object.defineProperty(exports, "__esModule", { value: true }); -exports.parse = exports.EXECUTION_TYPES = void 0; -const tokenizer_1 = require("./tokenizer"); -/** - * Execution types allow to know what is the query behavior - * - LISTING: is when the query list the data - * - MODIFICATION: is when the query modificate the database somehow (structure or data) - * - INFORMATION: is show some data information such as a profile data - * - UNKNOWN - */ -exports.EXECUTION_TYPES = { - SELECT: 'LISTING', - INSERT: 'MODIFICATION', - DELETE: 'MODIFICATION', - UPDATE: 'MODIFICATION', - TRUNCATE: 'MODIFICATION', - CREATE_DATABASE: 'MODIFICATION', - CREATE_SCHEMA: 'MODIFICATION', - CREATE_TABLE: 'MODIFICATION', - CREATE_VIEW: 'MODIFICATION', - CREATE_TRIGGER: 'MODIFICATION', - CREATE_FUNCTION: 'MODIFICATION', - CREATE_INDEX: 'MODIFICATION', - CREATE_PROCEDURE: 'MODIFICATION', - DROP_DATABASE: 'MODIFICATION', - DROP_SCHEMA: 'MODIFICATION', - DROP_TABLE: 'MODIFICATION', - DROP_VIEW: 'MODIFICATION', - DROP_TRIGGER: 'MODIFICATION', - DROP_FUNCTION: 'MODIFICATION', - DROP_INDEX: 'MODIFICATION', - ALTER_DATABASE: 'MODIFICATION', - ALTER_SCHEMA: 'MODIFICATION', - ALTER_TABLE: 'MODIFICATION', - ALTER_VIEW: 'MODIFICATION', - ALTER_TRIGGER: 'MODIFICATION', - ALTER_FUNCTION: 'MODIFICATION', - ALTER_INDEX: 'MODIFICATION', - UNKNOWN: 'UNKNOWN', - ANON_BLOCK: 'ANON_BLOCK', -}; -const statementsWithEnds = ['CREATE_TRIGGER', 'CREATE_FUNCTION', 'CREATE_PROCEDURE', 'ANON_BLOCK']; -const blockOpeners = { - generic: ['BEGIN', 'CASE'], - psql: ['BEGIN', 'CASE', 'LOOP', 'IF'], - mysql: ['BEGIN', 'CASE', 'LOOP', 'IF'], - mssql: ['BEGIN', 'CASE'], - sqlite: ['BEGIN', 'CASE'], - oracle: ['DECLARE', 'BEGIN', 'CASE'], - bigquery: ['DECLARE', 'BEGIN', 'CASE'], -}; -function createInitialStatement() { - return { - start: -1, - end: 0, - parameters: [], - }; -} -/** - * Parser - */ -function parse(input, isStrict = true, dialect = 'generic') { - const topLevelState = initState({ input }); - const topLevelStatement = { - type: 'QUERY', - start: 0, - end: input.length - 1, - body: [], - tokens: [], - }; - let prevState = topLevelState; - let statementParser = null; - const cteState = { - isCte: false, - asSeen: false, - statementEnd: false, - parens: 0, - state: topLevelState, - }; - const ignoreOutsideBlankTokens = ['whitespace', 'comment-inline', 'comment-block', 'semicolon']; - while (prevState.position < topLevelState.end) { - const tokenState = initState({ prevState }); - const token = (0, tokenizer_1.scanToken)(tokenState, dialect); - if (!statementParser) { - // ignore blank tokens before the start of a CTE / not part of a statement - if (!cteState.isCte && ignoreOutsideBlankTokens.includes(token.type)) { - topLevelStatement.tokens.push(token); - prevState = tokenState; - continue; - } - else if (!cteState.isCte && - token.type === 'keyword' && - token.value.toUpperCase() === 'WITH') { - cteState.isCte = true; - topLevelStatement.tokens.push(token); - cteState.state = tokenState; - prevState = tokenState; - continue; - // If we're scanning in a CTE, handle someone putting a semicolon anywhere (after 'with', - // after semicolon, etc.) along it to "early terminate". - } - else if (cteState.isCte && token.type === 'semicolon') { - topLevelStatement.tokens.push(token); - prevState = tokenState; - topLevelStatement.body.push({ - start: cteState.state.start, - end: token.end, - type: 'UNKNOWN', - executionType: 'UNKNOWN', - parameters: [], - }); - cteState.isCte = false; - cteState.asSeen = false; - cteState.statementEnd = false; - cteState.parens = 0; - continue; - } - else if (cteState.isCte && !cteState.statementEnd) { - if (cteState.asSeen) { - if (token.value === '(') { - cteState.parens++; - } - else if (token.value === ')') { - cteState.parens--; - if (cteState.parens === 0) { - cteState.statementEnd = true; - } - } - } - else if (token.value.toUpperCase() === 'AS') { - cteState.asSeen = true; - } - topLevelStatement.tokens.push(token); - prevState = tokenState; - continue; - } - else if (cteState.isCte && cteState.statementEnd && token.value === ',') { - cteState.asSeen = false; - cteState.statementEnd = false; - topLevelStatement.tokens.push(token); - prevState = tokenState; - continue; - // Ignore blank tokens after the end of the CTE till start of statement - } - else if (cteState.isCte && - cteState.statementEnd && - ignoreOutsideBlankTokens.includes(token.type)) { - topLevelStatement.tokens.push(token); - prevState = tokenState; - continue; - } - statementParser = createStatementParserByToken(token, { isStrict, dialect }); - if (cteState.isCte) { - statementParser.getStatement().start = cteState.state.start; - cteState.isCte = false; - cteState.asSeen = false; - cteState.statementEnd = false; - } - } - statementParser.addToken(token); - topLevelStatement.tokens.push(token); - prevState = tokenState; - const statement = statementParser.getStatement(); - if (statement.endStatement) { - statement.end = token.end; - topLevelStatement.body.push(statement); - statementParser = null; - } - } - // last statement without ending key - if (statementParser) { - const statement = statementParser.getStatement(); - if (!statement.endStatement) { - statement.end = topLevelStatement.end; - topLevelStatement.body.push(statement); - } - } - return topLevelStatement; -} -exports.parse = parse; -function initState({ input, prevState }) { - if (prevState) { - return { - input: prevState.input, - position: prevState.position, - start: prevState.position + 1, - end: prevState.input.length - 1, - }; - } - else if (input === undefined) { - throw new Error('You must define either input or prevState'); - } - return { - input, - position: -1, - start: 0, - end: input.length - 1, - }; -} -function createStatementParserByToken(token, options) { - if (token.type === 'keyword') { - switch (token.value.toUpperCase()) { - case 'SELECT': - return createSelectStatementParser(options); - case 'CREATE': - return createCreateStatementParser(options); - case 'DROP': - return createDropStatementParser(options); - case 'ALTER': - return createAlterStatementParser(options); - case 'INSERT': - return createInsertStatementParser(options); - case 'UPDATE': - return createUpdateStatementParser(options); - case 'DELETE': - return createDeleteStatementParser(options); - case 'TRUNCATE': - return createTruncateStatementParser(options); - case 'DECLARE': - case 'BEGIN': - if (['oracle', 'bigquery'].includes(options.dialect)) { - return createBlockStatementParser(options); - } - // eslint-disable-next-line no-fallthrough - default: - break; - } - } - if (!options.isStrict) { - return createUnknownStatementParser(options); - } - throw new Error(`Invalid statement parser "${token.value}"`); -} -function createSelectStatementParser(options) { - const statement = createInitialStatement(); - const steps = [ - // Select - { - preCanGoToNext: () => false, - validation: { - acceptTokens: [{ type: 'keyword', value: 'SELECT' }], - }, - add: (token) => { - statement.type = 'SELECT'; - if (statement.start < 0) { - statement.start = token.start; - } - }, - postCanGoToNext: () => true, - }, - ]; - return stateMachineStatementParser(statement, steps, options); -} -function createBlockStatementParser(options) { - const statement = createInitialStatement(); - statement.type = 'ANON_BLOCK'; - const steps = [ - // Select - { - preCanGoToNext: () => false, - validation: { - acceptTokens: [ - { type: 'keyword', value: 'DECLARE' }, - { type: 'keyword', value: 'BEGIN' }, - ], - }, - add: (token) => { - if (statement.start < 0) { - statement.start = token.start; - } - }, - postCanGoToNext: () => true, - }, - ]; - return stateMachineStatementParser(statement, steps, options); -} -function createInsertStatementParser(options) { - const statement = createInitialStatement(); - const steps = [ - // Insert - { - preCanGoToNext: () => false, - validation: { - acceptTokens: [{ type: 'keyword', value: 'INSERT' }], - }, - add: (token) => { - statement.type = 'INSERT'; - if (statement.start < 0) { - statement.start = token.start; - } - }, - postCanGoToNext: () => true, - }, - ]; - return stateMachineStatementParser(statement, steps, options); -} -function createUpdateStatementParser(options) { - const statement = createInitialStatement(); - const steps = [ - // Update - { - preCanGoToNext: () => false, - validation: { - acceptTokens: [{ type: 'keyword', value: 'UPDATE' }], - }, - add: (token) => { - statement.type = 'UPDATE'; - if (statement.start < 0) { - statement.start = token.start; - } - }, - postCanGoToNext: () => true, - }, - ]; - return stateMachineStatementParser(statement, steps, options); -} -function createDeleteStatementParser(options) { - const statement = createInitialStatement(); - const steps = [ - // Delete - { - preCanGoToNext: () => false, - validation: { - acceptTokens: [{ type: 'keyword', value: 'DELETE' }], - }, - add: (token) => { - statement.type = 'DELETE'; - if (statement.start < 0) { - statement.start = token.start; - } - }, - postCanGoToNext: () => true, - }, - ]; - return stateMachineStatementParser(statement, steps, options); -} -function createCreateStatementParser(options) { - const statement = createInitialStatement(); - const steps = [ - // Create - { - preCanGoToNext: () => false, - validation: { - acceptTokens: [{ type: 'keyword', value: 'CREATE' }], - }, - add: (token) => { - if (statement.start < 0) { - statement.start = token.start; - } - }, - postCanGoToNext: () => true, - }, - // Table/Database - { - preCanGoToNext: () => false, - validation: { - requireBefore: ['whitespace'], - acceptTokens: [ - ...(options.dialect !== 'sqlite' - ? [ - { type: 'keyword', value: 'DATABASE' }, - { type: 'keyword', value: 'SCHEMA' }, - ] - : []), - { type: 'keyword', value: 'TABLE' }, - { type: 'keyword', value: 'VIEW' }, - { type: 'keyword', value: 'TRIGGER' }, - { type: 'keyword', value: 'FUNCTION' }, - { type: 'keyword', value: 'INDEX' }, - { type: 'keyword', value: 'PROCEDURE' }, - ], - }, - add: (token) => { - statement.type = `CREATE_${token.value.toUpperCase()}`; - }, - postCanGoToNext: () => true, - }, - ]; - return stateMachineStatementParser(statement, steps, options); -} -function createDropStatementParser(options) { - const statement = createInitialStatement(); - const steps = [ - // Drop - { - preCanGoToNext: () => false, - validation: { - acceptTokens: [{ type: 'keyword', value: 'DROP' }], - }, - add: (token) => { - if (statement.start < 0) { - statement.start = token.start; - } - }, - postCanGoToNext: () => true, - }, - // Table/Database - { - preCanGoToNext: () => false, - validation: { - requireBefore: ['whitespace'], - acceptTokens: [ - ...(options.dialect !== 'sqlite' - ? [ - { type: 'keyword', value: 'DATABASE' }, - { type: 'keyword', value: 'SCHEMA' }, - ] - : []), - { type: 'keyword', value: 'TABLE' }, - { type: 'keyword', value: 'VIEW' }, - { type: 'keyword', value: 'TRIGGER' }, - { type: 'keyword', value: 'FUNCTION' }, - { type: 'keyword', value: 'INDEX' }, - ], - }, - add: (token) => { - statement.type = `DROP_${token.value.toUpperCase()}`; - }, - postCanGoToNext: () => true, - }, - ]; - return stateMachineStatementParser(statement, steps, options); -} -function createAlterStatementParser(options) { - const statement = createInitialStatement(); - const steps = [ - { - preCanGoToNext: () => false, - validation: { - acceptTokens: [{ type: 'keyword', value: 'ALTER' }], - }, - add: (token) => { - if (statement.start < 0) { - statement.start = token.start; - } - }, - postCanGoToNext: () => true, - }, - { - preCanGoToNext: () => false, - validation: { - requireBefore: ['whitespace'], - acceptTokens: [ - ...(options.dialect !== 'sqlite' - ? [ - { type: 'keyword', value: 'DATABASE' }, - { type: 'keyword', value: 'SCHEMA' }, - { type: 'keyword', value: 'TRIGGER' }, - { type: 'keyword', value: 'FUNCTION' }, - { type: 'keyword', value: 'INDEX' }, - { type: 'keyword', value: 'PROCEDURE' }, - ] - : []), - { type: 'keyword', value: 'TABLE' }, - { type: 'keyword', value: 'VIEW' }, - ], - }, - add: (token) => { - statement.type = `ALTER_${token.value.toUpperCase()}`; - }, - postCanGoToNext: () => true, - }, - ]; - return stateMachineStatementParser(statement, steps, options); -} -function createTruncateStatementParser(options) { - const statement = createInitialStatement(); - const steps = [ - { - preCanGoToNext: () => false, - validation: { - acceptTokens: [{ type: 'keyword', value: 'TRUNCATE' }], - }, - add: (token) => { - statement.type = 'TRUNCATE'; - if (statement.start < 0) { - statement.start = token.start; - } - }, - postCanGoToNext: () => true, - }, - ]; - return stateMachineStatementParser(statement, steps, options); -} -function createUnknownStatementParser(options) { - const statement = createInitialStatement(); - const steps = [ - { - preCanGoToNext: () => false, - add: (token) => { - statement.type = 'UNKNOWN'; - if (statement.start < 0) { - statement.start = token.start; - } - }, - postCanGoToNext: () => true, - }, - ]; - return stateMachineStatementParser(statement, steps, options); -} -function stateMachineStatementParser(statement, steps, { isStrict, dialect }) { - let currentStepIndex = 0; - let prevToken; - let prevPrevToken; - let lastBlockOpener; - let anonBlockStarted = false; - let openBlocks = 0; - /* eslint arrow-body-style: 0, no-extra-parens: 0 */ - const isValidToken = (step, token) => { - if (!step.validation) { - return true; - } - return (step.validation.acceptTokens.filter((accept) => { - const isValidType = token.type === accept.type; - const isValidValue = !accept.value || token.value.toUpperCase() === accept.value; - return isValidType && isValidValue; - }).length > 0); - }; - const setPrevToken = (token) => { - prevPrevToken = prevToken; - prevToken = token; - }; - return { - getStatement() { - return statement; - }, - addToken(token) { - /* eslint no-param-reassign: 0 */ - if (statement.endStatement) { - throw new Error('This statement has already got to the end.'); - } - if (statement.type && - token.type === 'semicolon' && - (!statementsWithEnds.includes(statement.type) || (openBlocks === 0 && statement.canEnd))) { - statement.endStatement = ';'; - return; - } - if (token.value.toUpperCase() === 'END') { - openBlocks--; - if (openBlocks === 0) { - statement.canEnd = true; - } - setPrevToken(token); - return; - } - if (token.type === 'whitespace') { - setPrevToken(token); - return; - } - if (token.type === 'keyword' && - blockOpeners[dialect].includes(token.value) && - (prevPrevToken === null || prevPrevToken === void 0 ? void 0 : prevPrevToken.value.toUpperCase()) !== 'END') { - if (['oracle', 'bigquery'].includes(dialect) && - (lastBlockOpener === null || lastBlockOpener === void 0 ? void 0 : lastBlockOpener.value) === 'DECLARE' && - token.value.toUpperCase() === 'BEGIN') { - // don't open a new block! - setPrevToken(token); - lastBlockOpener = token; - return; - } - openBlocks++; - lastBlockOpener = token; - setPrevToken(token); - if (statement.type === 'ANON_BLOCK' && !anonBlockStarted) { - anonBlockStarted = true; - // don't return - } - else { - return; - } - } - if (token.type === 'parameter' && - (token.value === '?' || !statement.parameters.includes(token.value))) { - statement.parameters.push(token.value); - } - if (statement.type && statement.start >= 0) { - // statement has already been identified - // just wait until end of the statement - return; - } - // index modifiers - if (token.value.toUpperCase() === 'UNIQUE' || - (dialect === 'mysql' && ['FULLTEXT', 'SPATIAL'].includes(token.value.toUpperCase())) || - (dialect === 'mssql' && ['CLUSTERED', 'NONCLUSTERED'].includes(token.value.toUpperCase()))) { - setPrevToken(token); - return; - } - if (['psql', 'mssql'].includes(dialect) && token.value.toUpperCase() === 'MATERIALIZED') { - setPrevToken(token); - return; - } - // psql allows for optional "OR REPLACE" between "CREATE" and "FUNCTION" - // mysql and psql allow it between "CREATE" and "VIEW" - if (['psql', 'mysql', 'bigquery'].includes(dialect) && - ['OR', 'REPLACE'].includes(token.value.toUpperCase())) { - setPrevToken(token); - return; - } - if (['psql', 'sqlite'].includes(dialect) && - ['TEMP', 'TEMPORARY'].includes(token.value.toUpperCase())) { - setPrevToken(token); - return; - } - // MySQL allows for setting a definer for a function which specifies who the function is executed as. - // This clause is optional, and is defined between the "CREATE" and "FUNCTION" keywords for the statement. - if (dialect === 'mysql' && token.value.toUpperCase() === 'DEFINER') { - statement.definer = 0; - setPrevToken(token); - return; - } - if (statement.definer === 0 && token.value === '=') { - statement.definer++; - setPrevToken(token); - return; - } - if (statement.definer !== undefined && statement.definer > 0) { - if (statement.definer === 1 && prevToken.type === 'whitespace') { - statement.definer++; - setPrevToken(token); - return; - } - if (statement.definer > 1 && prevToken.type !== 'whitespace') { - setPrevToken(token); - return; - } - delete statement.definer; - } - if (dialect === 'mysql' && token.value.toUpperCase() === 'ALGORITHM') { - statement.algorithm = 0; - setPrevToken(token); - return; - } - if (statement.algorithm === 0 && token.value === '=') { - statement.algorithm++; - setPrevToken(token); - return; - } - if (statement.algorithm !== undefined && statement.algorithm > 0) { - if (statement.algorithm === 1 && prevToken.type === 'whitespace') { - statement.algorithm++; - setPrevToken(token); - return; - } - if (statement.algorithm > 1 && - ['UNDEFINED', 'MERGE', 'TEMPTABLE'].includes(prevToken.value.toUpperCase())) { - setPrevToken(token); - return; - } - delete statement.algorithm; - } - if (dialect === 'mysql' && token.value.toUpperCase() === 'SQL') { - statement.sqlSecurity = 0; - setPrevToken(token); - return; - } - if (statement.sqlSecurity !== undefined) { - if ((statement.sqlSecurity === 0 && token.value.toUpperCase() === 'SECURITY') || - (statement.sqlSecurity === 1 && - ['DEFINER', 'INVOKER'].includes(token.value.toUpperCase()))) { - statement.sqlSecurity++; - setPrevToken(token); - return; - } - else if (statement.sqlSecurity === 2) { - delete statement.sqlSecurity; - } - } - let currentStep = steps[currentStepIndex]; - if (currentStep.preCanGoToNext(token)) { - currentStepIndex++; - currentStep = steps[currentStepIndex]; - } - if (currentStep.validation && - currentStep.validation.requireBefore && - !currentStep.validation.requireBefore.includes(prevToken.type)) { - const requireds = currentStep.validation.requireBefore.join(' or '); - throw new Error(`Expected any of these tokens ${requireds} before "${token.value}" (currentStep=${currentStepIndex}).`); - } - if (!isValidToken(currentStep, token) && isStrict) { - const expecteds = currentStep.validation - ? currentStep.validation.acceptTokens - .map((accept) => `(type="${accept.type}" value="${accept.value}")`) - .join(' or ') - : '()'; - throw new Error(`Expected any of these tokens ${expecteds} instead of type="${token.type}" value="${token.value}" (currentStep=${currentStepIndex}).`); - } - currentStep.add(token); - statement.executionType = - statement.type && exports.EXECUTION_TYPES[statement.type] - ? exports.EXECUTION_TYPES[statement.type] - : 'UNKNOWN'; - if (currentStep.postCanGoToNext(token)) { - currentStepIndex++; - } - setPrevToken(token); - }, - }; -} diff --git a/lib/tokenizer.d.ts b/lib/tokenizer.d.ts deleted file mode 100644 index d065550..0000000 --- a/lib/tokenizer.d.ts +++ /dev/null @@ -1,5 +0,0 @@ -/** - * Tokenizer - */ -import type { Token, State, Dialect } from './defines'; -export declare function scanToken(state: State, dialect?: Dialect): Token; diff --git a/lib/tokenizer.js b/lib/tokenizer.js deleted file mode 100644 index 6ad8e23..0000000 --- a/lib/tokenizer.js +++ /dev/null @@ -1,365 +0,0 @@ -"use strict"; -/** - * Tokenizer - */ -Object.defineProperty(exports, "__esModule", { value: true }); -exports.scanToken = void 0; -const KEYWORDS = [ - 'SELECT', - 'INSERT', - 'DELETE', - 'UPDATE', - 'CREATE', - 'DROP', - 'DATABASE', - 'SCHEMA', - 'TABLE', - 'VIEW', - 'TRIGGER', - 'FUNCTION', - 'INDEX', - 'ALTER', - 'TRUNCATE', - 'WITH', - 'AS', - 'MATERIALIZED', - 'BEGIN', - 'DECLARE', - 'CASE', - 'PROCEDURE', -]; -const INDIVIDUALS = { - ';': 'semicolon', -}; -const ENDTOKENS = { - '"': '"', - "'": "'", - '`': '`', - '[': ']', -}; -function scanToken(state, dialect = 'generic') { - const ch = read(state); - if (isWhitespace(ch)) { - return scanWhitespace(state); - } - if (isCommentInline(ch, state)) { - return scanCommentInline(state); - } - if (isCommentBlock(ch, state)) { - return scanCommentBlock(state); - } - if (isString(ch, dialect) && ch !== null) { - return scanString(state, ENDTOKENS[ch]); - } - if (isParameter(ch, state, dialect)) { - return scanParameter(state, dialect); - } - if (isDollarQuotedString(state)) { - return scanDollarQuotedString(state); - } - if (isQuotedIdentifier(ch, dialect) && ch !== null) { - return scanQuotedIdentifier(state, ENDTOKENS[ch]); - } - if (isLetter(ch)) { - return scanWord(state); - } - const individual = scanIndividualCharacter(state); - if (individual) { - return individual; - } - return skipChar(state); -} -exports.scanToken = scanToken; -function read(state, skip = 0) { - if (state.position + skip === state.input.length - 1) { - return null; - } - state.position += 1 + skip; - return state.input[state.position]; -} -function unread(state) { - if (state.position === state.start) { - return; - } - state.position--; -} -function peek(state) { - if (state.position >= state.input.length - 1) { - return null; - } - return state.input[state.position + 1]; -} -function isKeyword(word) { - return KEYWORDS.includes(word.toUpperCase()); -} -function resolveIndividualTokenType(ch) { - return INDIVIDUALS[ch]; -} -function scanWhitespace(state) { - let nextChar; - do { - nextChar = read(state); - } while (isWhitespace(nextChar)); - if (nextChar !== null && !isWhitespace(nextChar)) { - unread(state); - } - const value = state.input.slice(state.start, state.position + 1); - return { - type: 'whitespace', - value, - start: state.start, - end: state.start + value.length - 1, - }; -} -function scanCommentInline(state) { - let nextChar; - do { - nextChar = read(state); - } while (nextChar !== '\n' && nextChar !== null); - if (nextChar !== null && nextChar !== '\n') { - unread(state); - } - const value = state.input.slice(state.start, state.position + 1); - return { - type: 'comment-inline', - value, - start: state.start, - end: state.start + value.length - 1, - }; -} -function scanDollarQuotedString(state) { - const match = /^(\$[a-zA-Z0-9_]*\$)/.exec(state.input.slice(state.start)); - if (!match) { - throw new Error('Could not find dollar quoted string opener'); - } - const label = match[1]; - for (let i = 0; i < label.length - 1; i++) { - read(state); - } - let nextChar = ''; - while (state.input.slice(state.position, state.position + label.length) !== label && - nextChar !== null) { - do { - nextChar = read(state); - } while (nextChar !== '$' && nextChar !== null); - if (nextChar !== '$' && nextChar !== null) { - unread(state); - } - } - for (let i = 0; i < label.length - 1; i++) { - read(state); - } - const value = state.input.slice(state.start, state.position + 1); - return { - type: 'string', - value, - start: state.start, - end: state.start + value.length - 1, - }; -} -function scanString(state, endToken) { - let nextChar; - do { - nextChar = read(state); - // supporting double quote escaping: 'str''ing' - if (nextChar === endToken) { - if (peek(state) === endToken) { - nextChar = read(state, 1); - } - } - } while (nextChar !== endToken && nextChar !== null); - if (nextChar !== null && endToken !== nextChar) { - unread(state); - } - const value = state.input.slice(state.start, state.position + 1); - return { - type: 'string', - value, - start: state.start, - end: state.start + value.length - 1, - }; -} -function scanParameter(state, dialect) { - if (['mysql', 'generic', 'sqlite'].includes(dialect)) { - return { - type: 'parameter', - value: state.input.slice(state.start, state.position + 1), - start: state.start, - end: state.start, - }; - } - if (dialect === 'psql') { - let nextChar; - do { - nextChar = read(state); - } while (!isNaN(Number(nextChar)) && !isWhitespace(nextChar) && nextChar !== null); - if (isWhitespace(nextChar)) - unread(state); - const value = state.input.slice(state.start, state.position + 1); - return { - type: 'parameter', - value, - start: state.start, - end: state.start + value.length - 1, - }; - } - if (dialect === 'mssql') { - let nextChar; - do { - nextChar = read(state); - } while (!isWhitespace(nextChar) && nextChar !== null); - if (isWhitespace(nextChar)) - unread(state); - const value = state.input.slice(state.start, state.position + 1); - return { - type: 'parameter', - value, - start: state.start, - end: state.start + value.length - 1, - }; - } - return { - type: 'parameter', - value: 'unknown', - start: state.start, - end: state.end, - }; -} -function scanCommentBlock(state) { - let nextChar = ''; - let prevChar; - do { - prevChar = nextChar; - nextChar = read(state); - } while ((prevChar || '') + (nextChar || '') !== '*/' && nextChar !== null); - if (nextChar !== null && nextChar !== '/') { - unread(state); - } - const value = state.input.slice(state.start, state.position + 1); - return { - type: 'comment-block', - value, - start: state.start, - end: state.start + value.length - 1, - }; -} -function scanQuotedIdentifier(state, endToken) { - let nextChar; - do { - nextChar = read(state); - } while (endToken !== nextChar && nextChar !== null); - if (nextChar !== null && endToken !== nextChar) { - unread(state); - } - const value = state.input.slice(state.start, state.position + 1); - return { - type: 'keyword', - value, - start: state.start, - end: state.start + value.length - 1, - }; -} -function scanWord(state) { - let nextChar; - do { - nextChar = read(state); - } while (isLetter(nextChar)); - if (nextChar !== null && !isLetter(nextChar)) { - unread(state); - } - const value = state.input.slice(state.start, state.position + 1); - if (!isKeyword(value)) { - return skipWord(state, value); - } - return { - type: 'keyword', - value, - start: state.start, - end: state.start + value.length - 1, - }; -} -function scanIndividualCharacter(state) { - const value = state.input.slice(state.start, state.position + 1); - const type = resolveIndividualTokenType(value); - if (!type) { - return null; - } - return { - type, - value, - start: state.start, - end: state.start + value.length - 1, - }; -} -function skipChar(state) { - return { - type: 'unknown', - value: state.input.slice(state.start, state.position + 1), - start: state.start, - end: state.start, - }; -} -function skipWord(state, value) { - return { - type: 'unknown', - value, - start: state.start, - end: state.start + value.length - 1, - }; -} -function isWhitespace(ch) { - return ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r'; -} -function isString(ch, dialect) { - const stringStart = dialect === 'mysql' ? ["'", '"'] : ["'"]; - return stringStart.includes(ch); -} -function isParameter(ch, state, dialect) { - let pStart = '?'; // ansi standard - sqlite, mysql - if (dialect === 'psql') { - pStart = '$'; - const nextChar = peek(state); - if (nextChar === null || isNaN(Number(nextChar))) { - return false; - } - } - if (dialect === 'mssql') - pStart = ':'; - return ch === pStart; -} -function isDollarQuotedString(state) { - return /^\$[\w]*\$/.exec(state.input.slice(state.start)) !== null; -} -function isQuotedIdentifier(ch, dialect) { - const startQuoteChars = dialect === 'mssql' ? ['"', '['] : ['"', '`']; - return startQuoteChars.includes(ch); -} -function isCommentInline(ch, state) { - let isComment = ch === '-'; - if (!isComment) { - return false; - } - // lookahead - const nextChar = read(state); - isComment = nextChar === '-'; - if (!isComment) { - unread(state); - } - return isComment; -} -function isCommentBlock(ch, state) { - let isComment = ch === '/'; - if (!isComment) { - return false; - } - // lookahead - const nextChar = read(state); - isComment = nextChar === '*'; - if (!isComment) { - unread(state); - } - return isComment; -} -function isLetter(ch) { - return ch !== null && ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch === '_'); -} From ea973a81ead5cb561ceb4fb1e722e3ad8d8356c8 Mon Sep 17 00:00:00 2001 From: Nicolas Garnil Date: Tue, 2 Aug 2022 17:42:34 +0200 Subject: [PATCH 05/10] undo add bigquery to BEGIN statement --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 3ecb218..b3b1f7b 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -256,7 +256,7 @@ function createStatementParserByToken(token: Token, options: ParseOptions): Stat return createTruncateStatementParser(options); case 'DECLARE': case 'BEGIN': - if (['oracle'].includes(options.dialect)) { + if (options.dialect === 'oracle') { return createBlockStatementParser(options); } // eslint-disable-next-line no-fallthrough From ee29727ce64e9fea82c4e8689952dc4996911273 Mon Sep 17 00:00:00 2001 From: Nicolas Garnil Date: Tue, 2 Aug 2022 17:50:02 +0200 Subject: [PATCH 06/10] add DROP PROCEDURE and ALTER PROCEDURE --- README.md | 2 ++ src/defines.ts | 2 ++ src/parser.ts | 3 ++ test/identifier/single-statement.spec.ts | 40 +++++++++++++++++++++++- 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0df7bba..3fbda46 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ This way you have sure is a valid query before trying to identify the types. * DROP_TRIGGER * DROP_FUNCTION * DROP_INDEX +* DROP_PROCEDURE * ALTER_DATABASE * ALTER_SCHEMA * ALTER_TABLE @@ -56,6 +57,7 @@ This way you have sure is a valid query before trying to identify the types. * ALTER_TRIGGER * ALTER_FUNCTION * ALTER_INDEX +* ALTER_PROCEDURE * ANON_BLOCK (Oracle Database only) * UNKNOWN (only available if strict mode is disabled) diff --git a/src/defines.ts b/src/defines.ts index e141da6..2b45648 100644 --- a/src/defines.ts +++ b/src/defines.ts @@ -29,6 +29,7 @@ export type StatementType = | 'DROP_TRIGGER' | 'DROP_FUNCTION' | 'DROP_INDEX' + | 'DROP_PROCEDURE' | 'ALTER_DATABASE' | 'ALTER_SCHEMA' | 'ALTER_TABLE' @@ -36,6 +37,7 @@ export type StatementType = | 'ALTER_TRIGGER' | 'ALTER_FUNCTION' | 'ALTER_INDEX' + | 'ALTER_PROCEDURE' | 'ANON_BLOCK' | 'UNKNOWN'; diff --git a/src/parser.ts b/src/parser.ts index b3b1f7b..c850328 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -44,6 +44,7 @@ export const EXECUTION_TYPES: Record = { DROP_TRIGGER: 'MODIFICATION', DROP_FUNCTION: 'MODIFICATION', DROP_INDEX: 'MODIFICATION', + DROP_PROCEDURE: 'MODIFICATION', ALTER_DATABASE: 'MODIFICATION', ALTER_SCHEMA: 'MODIFICATION', ALTER_TABLE: 'MODIFICATION', @@ -51,6 +52,7 @@ export const EXECUTION_TYPES: Record = { ALTER_TRIGGER: 'MODIFICATION', ALTER_FUNCTION: 'MODIFICATION', ALTER_INDEX: 'MODIFICATION', + ALTER_PROCEDURE: 'MODIFICATION', UNKNOWN: 'UNKNOWN', ANON_BLOCK: 'ANON_BLOCK', }; @@ -464,6 +466,7 @@ function createDropStatementParser(options: ParseOptions) { ? [ { type: 'keyword', value: 'DATABASE' }, { type: 'keyword', value: 'SCHEMA' }, + { type: 'keyword', value: 'PROCEDURE' }, ] : []), { type: 'keyword', value: 'TABLE' }, diff --git a/test/identifier/single-statement.spec.ts b/test/identifier/single-statement.spec.ts index 183c367..3201212 100644 --- a/test/identifier/single-statement.spec.ts +++ b/test/identifier/single-statement.spec.ts @@ -357,7 +357,7 @@ describe('identifier', () => { }); }); - describe('identify bigquery "CREATE PROCEDURE" statements', () => { + describe('identify "CREATE PROCEDURE" statements', () => { it('should identify bigquery "CREATE PROCEDURE" statement', () => { const sql = `CREATE OR REPLACE PROCEDURE mydataset.create_customer() BEGIN @@ -383,6 +383,44 @@ describe('identifier', () => { }); }); + describe('identify "DROP PROCEDURE" statements', () => { + it('should identify "DROP PROCEDURE" statement', () => { + const sql = `DROP PROCEDURE mydataset.create_customer`; + + const actual = identify(sql, { dialect: 'bigquery' }); + const expected = [ + { + start: 0, + end: 39, + text: sql, + type: 'DROP_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); + }); + + describe('identify "ALTER PROCEDURE" statements', () => { + it('should identify "ALTER PROCEDURE" statement', () => { + const sql = `ALTER PROCEDURE mydataset.create_customer`; + + const actual = identify(sql, { dialect: 'bigquery' }); + const expected = [ + { + start: 0, + end: 39, + text: sql, + type: 'ALTER_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); + }); + describe('identify "CREATE FUNCTION" statements', () => { it('should identify postgres "CREATE FUNCTION" statement with LANGUAGE at end', () => { const sql = `CREATE FUNCTION quarterly_summary_func(start_date date DEFAULT CURRENT_TIMESTAMP) From c9c8eee992fedde43d82c2ac8c097f09f00035f6 Mon Sep 17 00:00:00 2001 From: Nicolas Garnil Date: Tue, 2 Aug 2022 17:57:18 +0200 Subject: [PATCH 07/10] bigquery doesn't supports alter procedure --- src/parser.ts | 4 +++- test/identifier/single-statement.spec.ts | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index c850328..4b6b682 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -514,7 +514,9 @@ function createAlterStatementParser(options: ParseOptions) { { type: 'keyword', value: 'TRIGGER' }, { type: 'keyword', value: 'FUNCTION' }, { type: 'keyword', value: 'INDEX' }, - { type: 'keyword', value: 'PROCEDURE' }, + ...(options.dialect !== 'bigquery' + ? [{ type: 'keyword', value: 'PROCEDURE' }] + : []), ] : []), { type: 'keyword', value: 'TABLE' }, diff --git a/test/identifier/single-statement.spec.ts b/test/identifier/single-statement.spec.ts index 3201212..3f0c008 100644 --- a/test/identifier/single-statement.spec.ts +++ b/test/identifier/single-statement.spec.ts @@ -406,11 +406,11 @@ describe('identifier', () => { it('should identify "ALTER PROCEDURE" statement', () => { const sql = `ALTER PROCEDURE mydataset.create_customer`; - const actual = identify(sql, { dialect: 'bigquery' }); + const actual = identify(sql, { dialect: 'mysql' }); const expected = [ { start: 0, - end: 39, + end: 40, text: sql, type: 'ALTER_PROCEDURE', executionType: 'MODIFICATION', From 5a3c5802799d0455456a52222ae07a6bc210f209 Mon Sep 17 00:00:00 2001 From: Nicolas Garnil Date: Tue, 2 Aug 2022 18:05:44 +0200 Subject: [PATCH 08/10] add tests for ALTER PROCEDURE --- test/identifier/single-statement.spec.ts | 35 ++++++++++++++---------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/test/identifier/single-statement.spec.ts b/test/identifier/single-statement.spec.ts index 3f0c008..dad2e82 100644 --- a/test/identifier/single-statement.spec.ts +++ b/test/identifier/single-statement.spec.ts @@ -403,21 +403,28 @@ describe('identifier', () => { }); describe('identify "ALTER PROCEDURE" statements', () => { - it('should identify "ALTER PROCEDURE" statement', () => { - const sql = `ALTER PROCEDURE mydataset.create_customer`; + const sql = `ALTER PROCEDURE mydataset.create_customer`; + (['oracle', 'psql', 'mysql', 'mssql'] as Dialect[]).forEach((dialect) => { + it('should identify "ALTER PROCEDURE" statement', () => { + const actual = identify(sql, { dialect }); + const expected = [ + { + start: 0, + end: 40, + text: sql, + type: 'ALTER_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); + }); - const actual = identify(sql, { dialect: 'mysql' }); - const expected = [ - { - start: 0, - end: 40, - text: sql, - type: 'ALTER_PROCEDURE', - executionType: 'MODIFICATION', - parameters: [], - }, - ]; - expect(actual).to.eql(expected); + it('should throw error for bigquery', () => { + expect(() => identify(sql, { dialect: 'bigquery' })).to.throw( + `Expected any of these tokens (type="keyword" value="DATABASE") or (type="keyword" value="SCHEMA") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") or (type="keyword" value="TABLE") or (type="keyword" value="VIEW") instead of type="keyword" value="PROCEDURE`, + ); }); }); From 9e2da11da10a3f9896c5cb39e4bbcd1c61e61904 Mon Sep 17 00:00:00 2001 From: Matthew Peveler Date: Fri, 5 Aug 2022 11:20:28 -0400 Subject: [PATCH 09/10] increase test coverage Signed-off-by: Matthew Peveler --- src/parser.ts | 37 +++-- test/identifier/single-statement.spec.ts | 186 ++++++++++++++++------- 2 files changed, 157 insertions(+), 66 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 4b6b682..dd29a12 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -580,10 +580,11 @@ function stateMachineStatementParser( { isStrict, dialect }: ParseOptions, ): StatementParser { let currentStepIndex = 0; - let prevToken: Token; - let prevPrevToken: Token; + let prevToken: Token | undefined; + let prevPrevToken: Token | undefined; + let prevNonWhitespaceToken: Token | undefined; - let lastBlockOpener: Token; + let lastBlockOpener: Token | undefined; let anonBlockStarted = false; let openBlocks = 0; @@ -607,6 +608,9 @@ function stateMachineStatementParser( const setPrevToken = (token: Token) => { prevPrevToken = prevToken; prevToken = token; + if (token.type !== 'whitespace') { + prevNonWhitespaceToken = token; + } }; return { @@ -692,16 +696,25 @@ function stateMachineStatementParser( return; } - if (['psql', 'mssql'].includes(dialect) && token.value.toUpperCase() === 'MATERIALIZED') { + if ( + ['psql', 'mssql', 'bigquery'].includes(dialect) && + token.value.toUpperCase() === 'MATERIALIZED' + ) { setPrevToken(token); return; } - // psql allows for optional "OR REPLACE" between "CREATE" and "FUNCTION" - // mysql and psql allow it between "CREATE" and "VIEW" + // technically these dialects don't allow "OR REPLACE" or "OR ALTER" between all statement + // types, but we'll allow it for now. + // For "ALTER", we need to make sure we only catch it here if it directly follows "OR", so + // we don't catch it for "ALTER TABLE" statements if ( - ['psql', 'mysql', 'bigquery'].includes(dialect) && - ['OR', 'REPLACE'].includes(token.value.toUpperCase()) + (['psql', 'mysql', 'bigquery'].includes(dialect) && + ['OR', 'REPLACE'].includes(token.value.toUpperCase())) || + (dialect === 'mssql' && + (token.value.toUpperCase() === 'OR' || + (prevNonWhitespaceToken?.value.toUpperCase() === 'OR' && + token.value.toUpperCase() === 'ALTER'))) ) { setPrevToken(token); return; @@ -730,13 +743,13 @@ function stateMachineStatementParser( } if (statement.definer !== undefined && statement.definer > 0) { - if (statement.definer === 1 && prevToken.type === 'whitespace') { + if (statement.definer === 1 && prevToken?.type === 'whitespace') { statement.definer++; setPrevToken(token); return; } - if (statement.definer > 1 && prevToken.type !== 'whitespace') { + if (statement.definer > 1 && prevToken?.type !== 'whitespace') { setPrevToken(token); return; } @@ -757,7 +770,7 @@ function stateMachineStatementParser( } if (statement.algorithm !== undefined && statement.algorithm > 0) { - if (statement.algorithm === 1 && prevToken.type === 'whitespace') { + if (statement.algorithm === 1 && prevToken?.type === 'whitespace') { statement.algorithm++; setPrevToken(token); return; @@ -765,6 +778,7 @@ function stateMachineStatementParser( if ( statement.algorithm > 1 && + prevToken && ['UNDEFINED', 'MERGE', 'TEMPTABLE'].includes(prevToken.value.toUpperCase()) ) { setPrevToken(token); @@ -801,6 +815,7 @@ function stateMachineStatementParser( } if ( + prevToken && currentStep.validation && currentStep.validation.requireBefore && !currentStep.validation.requireBefore.includes(prevToken.type) diff --git a/test/identifier/single-statement.spec.ts b/test/identifier/single-statement.spec.ts index 0c02556..861d3cd 100644 --- a/test/identifier/single-statement.spec.ts +++ b/test/identifier/single-statement.spec.ts @@ -130,7 +130,7 @@ describe('identifier', () => { describe('identifying "CREATE MATERIALIZED VIEW" statement', () => { const query = "CREATE MATERIALIZED VIEW vista AS SELECT 'Hello World';"; - (['psql', 'mssql'] as Dialect[]).forEach((dialect) => { + (['bigquery', 'psql', 'mssql'] as Dialect[]).forEach((dialect) => { it(`should identify for ${dialect}`, () => { const actual = identify(query, { dialect }); const expected = [ @@ -159,7 +159,7 @@ describe('identifier', () => { describe('identify "CREATE OR REPLACE VIEW" statement', () => { const query = "CREATE OR REPLACE VIEW vista AS SELECT 'Hello world';"; - (['mysql', 'psql'] as Dialect[]).forEach((dialect) => { + (['bigquery', 'mysql', 'psql'] as Dialect[]).forEach((dialect) => { it(`should identify for ${dialect}`, () => { const actual = identify(query, { dialect }); const expected = [ @@ -177,13 +177,19 @@ describe('identifier', () => { }); }); - (['generic', 'sqlite', 'mssql'] as Dialect[]).forEach((dialect) => { + (['generic', 'sqlite'] as Dialect[]).forEach((dialect) => { it(`should throw error for ${dialect}`, () => { expect(() => identify(query, { dialect })).to.throw( /^Expected any of these tokens .* instead of type="unknown" value="OR" \(currentStep=1\)/, ); }); }); + + it(`should throw error for mssql`, () => { + expect(() => identify(query, { dialect: 'mssql' })).to.throw( + /^Expected any of these tokens .* instead of type="unknown" value="REPLACE" \(currentStep=1\)/, + ); + }); }); ['TEMP', 'TEMPORARY'].forEach((temp) => { @@ -207,7 +213,7 @@ describe('identifier', () => { }); }); - (['generic', 'mysql', 'mssql'] as Dialect[]).forEach((dialect) => { + (['generic', 'mysql', 'mssql', 'bigquery', 'oracle'] as Dialect[]).forEach((dialect) => { it(`should throw error for ${dialect}`, () => { const regex = new RegExp( `Expected any of these tokens .* instead of type="unknown" value="${temp}" \\(currentStep=1\\)`, @@ -357,62 +363,112 @@ describe('identifier', () => { }); }); - describe('identify "CREATE PROCEDURE" statements', () => { - it('should identify bigquery "CREATE PROCEDURE" statement', () => { - const sql = `CREATE OR REPLACE PROCEDURE mydataset.create_customer() - BEGIN - DECLARE id STRING; - SET id = GENERATE_UUID(); - INSERT INTO mydataset.customers (customer_id) - VALUES(id); - SELECT FORMAT("Created customer %s", id); - END`; - - const actual = identify(sql, { dialect: 'bigquery' }); - const expected = [ - { - start: 0, - end: 277, - text: sql, - type: 'CREATE_PROCEDURE', - executionType: 'MODIFICATION', - parameters: [], + describe('identity PROCEDURE statements', () => { + describe('identify "CREATE PROCEDURE" statements', () => { + (['bigquery', 'generic', 'mssql', 'mysql', 'oracle', 'psql'] as Dialect[]).forEach( + (dialect) => { + it(`should identify statement for ${dialect}`, () => { + const sql = `CREATE PROCEDURE mydataset.create_customer() + BEGIN + DECLARE id STRING; + SET id = GENERATE_UUID(); + INSERT INTO mydataset.customers (customer_id) + VALUES(id); + SELECT FORMAT("Created customer %s", id); + END`; + + const actual = identify(sql, { dialect }); + const expected = [ + { + start: 0, + end: 308, + text: sql, + type: 'CREATE_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); }, - ]; - expect(actual).to.eql(expected); - }); - }); + ); - describe('identify "DROP PROCEDURE" statements', () => { - it('should identify "DROP PROCEDURE" statement', () => { - const sql = `DROP PROCEDURE mydataset.create_customer`; + (['bigquery', 'mysql', 'psql'] as Dialect[]).forEach((dialect) => { + it(`should identify statement with "OR REPLACE" for ${dialect}`, () => { + const sql = `CREATE OR REPLACE PROCEDURE mydataset.create_customer() + BEGIN + DECLARE id STRING; + SET id = GENERATE_UUID(); + INSERT INTO mydataset.customers (customer_id) + VALUES(id); + SELECT FORMAT("Created customer %s", id); + END`; + + const actual = identify(sql, { dialect }); + const expected = [ + { + start: 0, + end: 305, + text: sql, + type: 'CREATE_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); + }); - const actual = identify(sql, { dialect: 'bigquery' }); - const expected = [ - { - start: 0, - end: 39, - text: sql, - type: 'DROP_PROCEDURE', - executionType: 'MODIFICATION', - parameters: [], - }, - ]; - expect(actual).to.eql(expected); + it('should identify statement with "OR ALTER" for mssql', () => { + const sql = `CREATE OR ALTER PROCEDURE mydataset.create_customer() + BEGIN + DECLARE id STRING; + SET id = GENERATE_UUID(); + INSERT INTO mydataset.customers (customer_id) + VALUES(id); + SELECT FORMAT("Created customer %s", id); + END`; + + const actual = identify(sql, { dialect: 'mssql' }); + const expected = [ + { + start: 0, + end: 289, + text: sql, + type: 'CREATE_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); + + it('should error for sqlite', () => { + const sql = `CREATE PROCEDURE mydataset.create_customer() + BEGIN + DECLARE id STRING; + SET id = GENERATE_UUID(); + INSERT INTO mydataset.customers (customer_id) + VALUES(id); + SELECT FORMAT("Created customer %s", id); + END`; + expect(() => identify(sql, { dialect: 'sqlite' })).to.throw( + 'Expected any of these tokens (type="keyword" value="TABLE") or (type="keyword" value="VIEW") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") instead of type="keyword" value="PROCEDURE" (currentStep=1)', + ); + }); }); - }); - describe('identify "ALTER PROCEDURE" statements', () => { - const sql = `ALTER PROCEDURE mydataset.create_customer`; - (['oracle', 'psql', 'mysql', 'mssql'] as Dialect[]).forEach((dialect) => { - it('should identify "ALTER PROCEDURE" statement', () => { - const actual = identify(sql, { dialect }); + describe('identify "DROP PROCEDURE" statements', () => { + it('should identify "DROP PROCEDURE" statement', () => { + const sql = `DROP PROCEDURE mydataset.create_customer`; + + const actual = identify(sql, { dialect: 'bigquery' }); const expected = [ { start: 0, - end: 40, + end: 39, text: sql, - type: 'ALTER_PROCEDURE', + type: 'DROP_PROCEDURE', executionType: 'MODIFICATION', parameters: [], }, @@ -421,10 +477,30 @@ describe('identifier', () => { }); }); - it('should throw error for bigquery', () => { - expect(() => identify(sql, { dialect: 'bigquery' })).to.throw( - `Expected any of these tokens (type="keyword" value="DATABASE") or (type="keyword" value="SCHEMA") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") or (type="keyword" value="TABLE") or (type="keyword" value="VIEW") instead of type="keyword" value="PROCEDURE`, - ); + describe('identify "ALTER PROCEDURE" statements', () => { + const sql = `ALTER PROCEDURE mydataset.create_customer`; + (['oracle', 'psql', 'mysql', 'mssql'] as Dialect[]).forEach((dialect) => { + it('should identify "ALTER PROCEDURE" statement', () => { + const actual = identify(sql, { dialect }); + const expected = [ + { + start: 0, + end: 40, + text: sql, + type: 'ALTER_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); + }); + + it('should throw error for bigquery', () => { + expect(() => identify(sql, { dialect: 'bigquery' })).to.throw( + `Expected any of these tokens (type="keyword" value="DATABASE") or (type="keyword" value="SCHEMA") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") or (type="keyword" value="TABLE") or (type="keyword" value="VIEW") instead of type="keyword" value="PROCEDURE`, + ); + }); }); }); From 7af0557461f9f7a6a55fdc20c2e89bb85dda7fdf Mon Sep 17 00:00:00 2001 From: Matthew Peveler Date: Fri, 5 Aug 2022 11:25:32 -0400 Subject: [PATCH 10/10] add more tests --- test/identifier/single-statement.spec.ts | 48 ++++++++++++++++-------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/test/identifier/single-statement.spec.ts b/test/identifier/single-statement.spec.ts index 861d3cd..fc45c77 100644 --- a/test/identifier/single-statement.spec.ts +++ b/test/identifier/single-statement.spec.ts @@ -459,27 +459,38 @@ describe('identifier', () => { }); describe('identify "DROP PROCEDURE" statements', () => { - it('should identify "DROP PROCEDURE" statement', () => { - const sql = `DROP PROCEDURE mydataset.create_customer`; + (['bigquery', 'generic', 'mssql', 'mysql', 'oracle', 'psql'] as Dialect[]).forEach( + (dialect) => { + it(`should identify the statement for ${dialect}`, () => { + const sql = `DROP PROCEDURE mydataset.create_customer`; - const actual = identify(sql, { dialect: 'bigquery' }); - const expected = [ - { - start: 0, - end: 39, - text: sql, - type: 'DROP_PROCEDURE', - executionType: 'MODIFICATION', - parameters: [], - }, - ]; - expect(actual).to.eql(expected); + const actual = identify(sql, { dialect }); + const expected = [ + { + start: 0, + end: 39, + text: sql, + type: 'DROP_PROCEDURE', + executionType: 'MODIFICATION', + parameters: [], + }, + ]; + expect(actual).to.eql(expected); + }); + }, + ); + + it('should error for sqlite', () => { + const sql = `DROP PROCEDURE mydataset.create_customer`; + expect(() => identify(sql, { dialect: 'sqlite' })).to.throw( + 'Expected any of these tokens (type="keyword" value="TABLE") or (type="keyword" value="VIEW") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") instead of type="keyword" value="PROCEDURE" (currentStep=1)', + ); }); }); describe('identify "ALTER PROCEDURE" statements', () => { const sql = `ALTER PROCEDURE mydataset.create_customer`; - (['oracle', 'psql', 'mysql', 'mssql'] as Dialect[]).forEach((dialect) => { + (['generic', 'mssql', 'mysql', 'oracle', 'psql'] as Dialect[]).forEach((dialect) => { it('should identify "ALTER PROCEDURE" statement', () => { const actual = identify(sql, { dialect }); const expected = [ @@ -501,6 +512,13 @@ describe('identifier', () => { `Expected any of these tokens (type="keyword" value="DATABASE") or (type="keyword" value="SCHEMA") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") or (type="keyword" value="TABLE") or (type="keyword" value="VIEW") instead of type="keyword" value="PROCEDURE`, ); }); + + it('should error for sqlite', () => { + const sql = `DROP PROCEDURE mydataset.create_customer`; + expect(() => identify(sql, { dialect: 'sqlite' })).to.throw( + 'Expected any of these tokens (type="keyword" value="TABLE") or (type="keyword" value="VIEW") or (type="keyword" value="TRIGGER") or (type="keyword" value="FUNCTION") or (type="keyword" value="INDEX") instead of type="keyword" value="PROCEDURE" (currentStep=1)', + ); + }); }); });