From fc0cf6fe7baa0b47d3cbbd5b7e338e5615a7ebeb Mon Sep 17 00:00:00 2001 From: harttle Date: Sat, 8 Feb 2020 03:15:55 +0800 Subject: [PATCH] fix: expression and string literal parser, #186 --- package-lock.json | 8 ++-- src/parser/expression-tokenizer.ts | 50 +++++++++++++++++++++++ src/parser/literal.ts | 52 +++++++++++++++++++++++- src/render/expression.ts | 48 ++++------------------ test/integration/builtin/tags/if.ts | 12 +++++- test/unit/parser/expression-tokenizer.ts | 43 ++++++++++++++++++++ test/unit/parser/literal.ts | 37 ++++++++++++++++- test/unit/render/expression.ts | 22 +++++++++- 8 files changed, 222 insertions(+), 50 deletions(-) create mode 100644 src/parser/expression-tokenizer.ts create mode 100644 test/unit/parser/expression-tokenizer.ts diff --git a/package-lock.json b/package-lock.json index ffae40c32c..9715779a8d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "liquidjs", - "version": "9.1.1", + "version": "9.6.2", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -10814,8 +10814,7 @@ }, "commander": { "version": "2.17.1", - "bundled": true, - "optional": true + "bundled": true }, "commondir": { "version": "1.0.1", @@ -11579,8 +11578,7 @@ "dependencies": { "source-map": { "version": "0.6.1", - "bundled": true, - "optional": true + "bundled": true } } }, diff --git a/src/parser/expression-tokenizer.ts b/src/parser/expression-tokenizer.ts new file mode 100644 index 0000000000..3c28ed7e35 --- /dev/null +++ b/src/parser/expression-tokenizer.ts @@ -0,0 +1,50 @@ +const rBlank = /\s/ +const rPunctuation = /[<>=!]/ + +enum ParseState { + INIT = 1, + SINGLE_QUOTE = 2, + DOUBLE_QUOTE = 4, + QUOTE = 6, + BRACKET = 8 +} + +export function * tokenize (expr: string): IterableIterator { + const N = expr.length + const stack = [ParseState.INIT] + let str = '' + let lastIsPunc = false + + for (let i = 0; i < N; i++) { + const c = expr[i] + const top = stack[stack.length - 1] + const isPunc = rPunctuation.test(c) + if (c === '\\') { + str += expr.substr(i++, 2) + } else if (top === ParseState.SINGLE_QUOTE && c === "'") { + str += c + stack.pop() + } else if (top === ParseState.DOUBLE_QUOTE && c === '"') { + str += c + stack.pop() + } else if (ParseState.QUOTE & top) { + str += c + } else if (top === ParseState.BRACKET && c === ']') { + str += c + stack.pop() + } else if (top === ParseState.INIT && rBlank.exec(c)) { + if (str) yield str + str = '' + } else if (top === ParseState.INIT && isPunc !== lastIsPunc) { + if (str) yield str + str = c + } else { + if (c === '"') stack.push(ParseState.DOUBLE_QUOTE) + else if (c === "'") stack.push(ParseState.SINGLE_QUOTE) + else if (c === '[') stack.push(ParseState.BRACKET) + str += c + } + lastIsPunc = isPunc + } + if (str) yield str +} diff --git a/src/parser/literal.ts b/src/parser/literal.ts index 3f95c34a76..2262c40442 100644 --- a/src/parser/literal.ts +++ b/src/parser/literal.ts @@ -5,6 +5,24 @@ import { BlankDrop } from '../drop/blank-drop' type literal = true | false | NullDrop | EmptyDrop | BlankDrop | number | string +const rHex = /[\da-fA-F]/ +const rOct = /[0-7]/ +const escapeChar = { + b: '\b', + f: '\f', + n: '\n', + r: '\r', + t: '\t', + v: '\x0B' +} + +function hexVal (c: string) { + const code = c.charCodeAt(0) + if (code >= 97) return code - 87 + if (code >= 65) return code - 55 + return code - 48 +} + export function parseLiteral (str: string): literal | undefined { str = str.trim() @@ -14,5 +32,37 @@ export function parseLiteral (str: string): literal | undefined { if (str === 'empty') return new EmptyDrop() if (str === 'blank') return new BlankDrop() if (!isNaN(Number(str))) return Number(str) - if ((str[0] === '"' || str[0] === "'") && str[0] === last(str)) return str.slice(1, -1) + if ((str[0] === '"' || str[0] === "'") && str[0] === last(str)) return parseStringLiteral(str) +} + +export function parseStringLiteral (str: string): string { + let ret = '' + for (let i = 1; i < str.length - 1; i++) { + if (str[i] !== '\\') { + ret += str[i] + continue + } + if (escapeChar[str[i + 1]] !== undefined) { + ret += escapeChar[str[++i]] + } else if (str[i + 1] === 'u') { + let val = 0 + let j = i + 2 + while (j <= i + 5 && rHex.test(str[j])) { + val = val * 16 + hexVal(str[j++]) + } + i = j - 1 + ret += String.fromCharCode(val) + } else if (!rOct.test(str[i + 1])) { + ret += str[++i] + } else { + let j = i + 1 + let val = 0 + while (j <= i + 3 && rOct.test(str[j])) { + val = val * 8 + hexVal(str[j++]) + } + i = j - 1 + ret += String.fromCharCode(val) + } + } + return ret } diff --git a/src/render/expression.ts b/src/render/expression.ts index d79dcf655a..144c735ae5 100644 --- a/src/render/expression.ts +++ b/src/render/expression.ts @@ -4,20 +4,24 @@ import { Value } from './value' import { Context } from '../context/context' import { toValue } from '../util/underscore' import { isOperator, precedence, operatorImpls } from './operator' +import { tokenize } from '../parser/expression-tokenizer' export class Expression { private operands: any[] = [] private postfix: string[] public constructor (str = '') { - this.postfix = [...toPostfix(str)] + this.postfix = [...toPostfix(tokenize(str))] } public * evaluate (ctx: Context) { assert(ctx, 'unable to evaluate: context not defined') for (const token of this.postfix) { if (isOperator(token)) { - this.evaluateOnce(token) + const r = this.operands.pop() + const l = this.operands.pop() + const result = operatorImpls[token](l, r) + this.operands.push(result) } else if (isRange(token)) { this.operands.push(yield rangeValue(token, ctx)) } else this.operands.push(yield new Value(token).evaluate(ctx)) @@ -27,47 +31,11 @@ export class Expression { public * value (ctx: Context) { return toValue(yield this.evaluate(ctx)) } - private evaluateOnce (token: string) { - const r = this.operands.pop() - const l = this.operands.pop() - const result = operatorImpls[token](l, r) - this.operands.push(result) - } -} - -function * tokenize (expr: string): IterableIterator { - const N = expr.length - let str = '' - const pairs = { '"': '"', "'": "'", '[': ']', '(': ')' } - - for (let i = 0; i < N; i++) { - const c = expr[i] - switch (c) { - case '[': - case '"': - case "'": - str += c - while (i + 1 < N) { - str += expr[++i] - if (expr[i] === pairs[c]) break - } - break - case ' ': - case '\t': - case '\n': - if (str) yield str - str = '' - break - default: - str += c - } - } - if (str) yield str } -function * toPostfix (expr: string): IterableIterator { +function * toPostfix (tokens: IterableIterator): IterableIterator { const ops = [] - for (const token of tokenize(expr)) { + for (const token of tokens) { if (isOperator(token)) { while (ops.length && precedence[ops[ops.length - 1]] > precedence[token]) { yield ops.pop()! diff --git a/test/integration/builtin/tags/if.ts b/test/integration/builtin/tags/if.ts index 65cf09625e..66f2c251e9 100644 --- a/test/integration/builtin/tags/if.ts +++ b/test/integration/builtin/tags/if.ts @@ -65,8 +65,18 @@ describe('tags/if', function () { const html = await liquid.parseAndRender(src) return expect(html).to.equal('') }) + it('should allow no spaces around operator for literal', async function () { + const src = `{% if true==true %}success{%else%}fail{% endif %}` + const html = await liquid.parseAndRender(src) + return expect(html).to.equal('success') + }) + it('should allow no spaces around operator for variables', async function () { + const src = `{%assign var = 1%}{%if var ==1%}success{%else%}fail{%endif%}` + const html = await liquid.parseAndRender(src) + return expect(html).to.equal('success') + }) }) - describe('comparasion to null', function () { + describe('compare to null', function () { it('should evaluate false for null < 10', async function () { const src = '{% if null < 10 %}yes{% else %}no{% endif %}' const html = await liquid.parseAndRender(src, scope) diff --git a/test/unit/parser/expression-tokenizer.ts b/test/unit/parser/expression-tokenizer.ts new file mode 100644 index 0000000000..8e817bfc14 --- /dev/null +++ b/test/unit/parser/expression-tokenizer.ts @@ -0,0 +1,43 @@ +import { tokenize } from '../../../src/parser/expression-tokenizer' +import { expect } from 'chai' + +describe('expression tokenizer', () => { + describe('spaces', () => { + it('should tokenize a + b', () => { + expect([...tokenize('a + b')]).to.deep.equal(['a', '+', 'b']) + }) + it('should tokenize a==1', () => { + expect([...tokenize('a==1')]).to.deep.equal(['a', '==', '1']) + }) + }) + + describe('range', () => { + it('should tokenize (1..3) contains 3', () => { + expect([...tokenize('(1..3)')]).to.deep.equal(['(1..3)']) + }) + }) + + describe('bracket', () => { + it('should tokenize a[b] = c', () => { + expect([...tokenize('a[b] = c')]).to.deep.equal(['a[b]', '=', 'c']) + }) + it('should tokenize c[a["b"]] < c', () => { + expect([...tokenize('c[a["b"]] < c')]).to.deep.equal(['c[a["b"]]', '<', 'c']) + }) + it('should tokenize "][" == var', () => { + expect([...tokenize('"][" == var')]).to.deep.equal(['"]["', '==', 'var']) + }) + }) + + describe('quotes', () => { + it('should tokenize " " == var', () => { + expect([...tokenize('" " == var')]).to.deep.equal(['" "', '==', 'var']) + }) + it('should tokenize "\\\'" == var', () => { + expect([...tokenize('"\\\'" == var')]).to.deep.equal(['"\\\'"', '==', 'var']) + }) + it('should tokenize "\\"" == var', () => { + expect([...tokenize('"\\"" == var')]).to.deep.equal(['"\\""', '==', 'var']) + }) + }) +}) diff --git a/test/unit/parser/literal.ts b/test/unit/parser/literal.ts index 265eabe142..c5b6cc9c93 100644 --- a/test/unit/parser/literal.ts +++ b/test/unit/parser/literal.ts @@ -1,8 +1,8 @@ import { expect } from 'chai' -import { parseLiteral } from '../../../src/parser/literal' +import { parseLiteral, parseStringLiteral } from '../../../src/parser/literal' import { NullDrop } from '../../../src/drop/null-drop' -describe('parseLiteral', function () { +describe('parseLiteral()', function () { it('should eval boolean literal', async function () { expect(parseLiteral('true')).to.equal(true) expect(parseLiteral('TrUE')).to.equal(undefined) @@ -25,3 +25,36 @@ describe('parseLiteral', function () { expect(parseLiteral('null')).to.be.instanceOf(NullDrop) }) }) + +describe('parseStringLiteral()', function () { + it('should parse octal escape', () => { + expect(parseStringLiteral(String.raw`"\1010"`)).to.equal('A0') + expect(parseStringLiteral(String.raw`"\12"`)).to.equal('\n') + expect(parseStringLiteral(String.raw`"\01"`)).to.equal('\u0001') + expect(parseStringLiteral(String.raw`"\0"`)).to.equal('\0') + }) + it('should skip invalid octal escape', () => { + expect(parseStringLiteral(String.raw`"\9"`)).to.equal('9') + }) + it('should parse \n, \t, \r', () => { + expect(parseStringLiteral(String.raw`"fo\no"`)).to.equal('fo\no') + expect(parseStringLiteral(String.raw`'fo\to'`)).to.equal('fo\to') + expect(parseStringLiteral(String.raw`'fo\ro'`)).to.equal('fo\ro') + }) + it('should parse unicode(hex) escape', () => { + expect(parseStringLiteral('"\\u003C"')).to.equal('<') + expect(parseStringLiteral('"\\u003cZ"')).to.equal(' { + expect(parseStringLiteral('"\\u41Z"')).to.equal('AZ') + expect(parseStringLiteral('"\\uZ"')).to.equal('\0Z') + }) + it('should parse quote escape', () => { + expect(parseStringLiteral(String.raw`"fo\'o"`)).to.equal("fo'o") + expect(parseStringLiteral(String.raw`'fo\"o'`)).to.equal('fo"o') + }) + it('should parse slash escape', () => { + expect(parseStringLiteral(String.raw`'fo\\o'`)).to.equal('fo\\o') + }) +}) \ No newline at end of file diff --git a/test/unit/render/expression.ts b/test/unit/render/expression.ts index c208bf91b2..ebddef6e0f 100644 --- a/test/unit/render/expression.ts +++ b/test/unit/render/expression.ts @@ -11,9 +11,14 @@ describe('Expression', function () { one: 1, two: 2, empty: '', + quote: '"', + space: ' ', x: 'XXX', y: undefined, - z: null + z: null, + obj: { + ']': 'right bracket' + } }) }) @@ -26,6 +31,8 @@ describe('Expression', function () { }) it('should eval simple expression', async function () { + expect(await toThenable(new Expression('1==2').value(ctx))).to.equal(false) + expect(await toThenable(new Expression('1<2').value(ctx))).to.equal(true) expect(await toThenable(new Expression('1 < 2').value(ctx))).to.equal(true) expect(await toThenable(new Expression('1 < 2').value(ctx))).to.equal(true) expect(await toThenable(new Expression('2 <= 2').value(ctx))).to.equal(true) @@ -40,6 +47,19 @@ describe('Expression', function () { expect(await toThenable(new Expression('"<=" == "<="').value(ctx))).to.equal(true) }) + it('should allow space in quoted value', async function () { + expect(await toThenable(new Expression('" " == space').value(ctx))).to.equal(true) + }) + + describe('escape', () => { + it('should escape quote', async function () { + expect(await toThenable(new Expression('"\\"" == quote').value(ctx))).to.equal(true) + }) + it('should escape square bracket', async function () { + expect(await toThenable(new Expression('obj["]"] == "right bracket"').value(ctx))).to.equal(true) + }) + }) + describe('complex expression', function () { it('should support value or value', async function () { expect(await toThenable(new Expression('false or true').value(ctx))).to.equal(true)