From 27e59886f80a599454f240809a28dadef9ac949b Mon Sep 17 00:00:00 2001 From: gerrard Date: Mon, 23 Mar 2020 16:03:19 +0800 Subject: [PATCH 1/2] support unicode length validation --- lib/jsonschema.lua | 38 ++++++++++++++++++++++++++++++-------- t/draft4.lua | 6 ------ t/draft6.lua | 6 ------ t/draft7.lua | 6 ------ 4 files changed, 30 insertions(+), 26 deletions(-) diff --git a/lib/jsonschema.lua b/lib/jsonschema.lua index 7d2bb58..06ecb32 100644 --- a/lib/jsonschema.lua +++ b/lib/jsonschema.lua @@ -758,16 +758,38 @@ generate_validator = function(ctx, schema) if schema.minLength or schema.maxLength or schema.pattern then ctx:stmt(sformat('if %s == "string" then', datatype)) if schema.minLength then - ctx:stmt(sformat(' if #%s < %d then', ctx:param(1), schema.minLength)) - ctx:stmt(sformat(' return false, %s("string too short, expected at least %d, got %%d", #%s)', - ctx:libfunc('string.format'), schema.minLength, ctx:param(1))) - ctx:stmt( ' end') + ctx:stmt(' local c, j = 0, 1') + ctx:stmt(sformat('for i = 1, #%s do', ctx:param(1))) + ctx:stmt(sformat(' if j > #%s then break end', ctx:param(1))) + ctx:stmt(sformat(' local cb= string.byte(%s, j) ', ctx:param(1))) + ctx:stmt(' if cb >= 0 and cb <= 127 then j = j + 1') + ctx:stmt(' elseif cb >= 192 and cb <= 223 then j = j + 2') + ctx:stmt(' elseif cb >= 224 and cb <= 239 then j = j + 3') + ctx:stmt(' elseif cb >= 240 and cb <= 247 then j = j + 4 end') + ctx:stmt(' c = c + 1') + ctx:stmt(' end') + ctx:stmt(sformat(' ngx.log(ngx.INFO, "length for %s :", c) ', ctx:param(1))) + ctx:stmt(sformat('if c < %d then', schema.minLength)) + ctx:stmt(sformat(' return false, %s("string too short, expected at least %d, got ") .. c', + ctx:libfunc('string.format'), schema.minLength)) + ctx:stmt( 'end') end if schema.maxLength then - ctx:stmt(sformat(' if #%s > %d then', ctx:param(1), schema.maxLength)) - ctx:stmt(sformat(' return false, %s("string too long, expected at most %d, got %%d", #%s)', - ctx:libfunc('string.format'), schema.maxLength, ctx:param(1))) - ctx:stmt( ' end') + ctx:stmt(' local c, j = 0, 1') + ctx:stmt(sformat('for i = 1, #%s do', ctx:param(1))) + ctx:stmt(sformat(' if j > #%s then break end', ctx:param(1))) + ctx:stmt(sformat(' local cb= string.byte(%s, j) ', ctx:param(1))) + ctx:stmt(' if cb >= 0 and cb <= 127 then j = j + 1') + ctx:stmt(' elseif cb >= 192 and cb <= 223 then j = j + 2') + ctx:stmt(' elseif cb >= 224 and cb <= 239 then j = j + 3') + ctx:stmt(' elseif cb >= 240 and cb <= 247 then j = j + 4 end') + ctx:stmt(' c = c + 1') + ctx:stmt(' end') + ctx:stmt(sformat(' ngx.log(ngx.INFO, "length for %s :", c) ', ctx:param(1))) + ctx:stmt(sformat('if c > %d then', schema.maxLength)) + ctx:stmt(sformat(' return false, %s("string too long, expected at most %d, got ") .. c', + ctx:libfunc('string.format'), schema.maxLength)) + ctx:stmt( 'end') end if schema.pattern then ctx:stmt(sformat(' if not %s(%s, %q) then', ctx:libfunc('custom.match_pattern'), ctx:param(1), schema.pattern)) diff --git a/t/draft4.lua b/t/draft4.lua index 58c3735..da0f55d 100644 --- a/t/draft4.lua +++ b/t/draft4.lua @@ -25,12 +25,6 @@ local blacklist = { ['regexes are not anchored by default and are case sensitive'] = { ['recognized members are accounted for'] = true, -- uses a unsupported pattern construct }, - ['minLength validation'] = { - ['one supplementary Unicode code point is not long enough'] = true, -- unicode handling - }, - ['maxLength validation'] = { - ['two supplementary Unicode code points is long enough'] = true, -- unicode handling - }, ['required validation'] = { ['ignores arrays'] = true }, diff --git a/t/draft6.lua b/t/draft6.lua index 466e255..5157a87 100644 --- a/t/draft6.lua +++ b/t/draft6.lua @@ -25,12 +25,6 @@ local blacklist = { ['regexes are not anchored by default and are case sensitive'] = { ['recognized members are accounted for'] = true, -- uses a unsupported pattern construct }, - ['minLength validation'] = { - ['one supplementary Unicode code point is not long enough'] = true, -- unicode handling - }, - ['maxLength validation'] = { - ['two supplementary Unicode code points is long enough'] = true, -- unicode handling - }, ['required validation'] = { ['ignores arrays'] = true }, diff --git a/t/draft7.lua b/t/draft7.lua index 91023d6..332fc92 100644 --- a/t/draft7.lua +++ b/t/draft7.lua @@ -25,12 +25,6 @@ local blacklist = { ['regexes are not anchored by default and are case sensitive'] = { ['recognized members are accounted for'] = true, -- uses a unsupported pattern construct }, - ['minLength validation'] = { - ['one supplementary Unicode code point is not long enough'] = true, -- unicode handling - }, - ['maxLength validation'] = { - ['two supplementary Unicode code points is long enough'] = true, -- unicode handling - }, ['required validation'] = { ['ignores arrays'] = true }, From 7643ecfd7b35ef2ee0d55b67dc9ceec8d4b77f10 Mon Sep 17 00:00:00 2001 From: gerrard Date: Tue, 24 Mar 2020 11:13:51 +0800 Subject: [PATCH 2/2] optimize --- lib/jsonschema.lua | 58 ++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/lib/jsonschema.lua b/lib/jsonschema.lua index 06ecb32..4b485dc 100644 --- a/lib/jsonschema.lua +++ b/lib/jsonschema.lua @@ -406,6 +406,22 @@ local function to_lua_code(var) return code .. "}" end +local function utf8_len_func(ctx) + return sformat([[function(s) + local c, j=0, 1 + while j <= #s do + local cb = %s(s, j) + if cb >= 0 and cb <= 127 then j = j + 1 + elseif cb >= 192 and cb <= 223 then j = j + 2 + elseif cb >= 224 and cb <= 239 then j = j + 3 + elseif cb >= 240 and cb <= 247 then j = j + 4 + end + c = c + 1 + end + return c + end]], ctx:libfunc("string.byte")) +end + generate_validator = function(ctx, schema) -- get type informations as they will be necessary anyway local datatype = ctx:localvar(sformat('%s(%s)', @@ -758,38 +774,20 @@ generate_validator = function(ctx, schema) if schema.minLength or schema.maxLength or schema.pattern then ctx:stmt(sformat('if %s == "string" then', datatype)) if schema.minLength then - ctx:stmt(' local c, j = 0, 1') - ctx:stmt(sformat('for i = 1, #%s do', ctx:param(1))) - ctx:stmt(sformat(' if j > #%s then break end', ctx:param(1))) - ctx:stmt(sformat(' local cb= string.byte(%s, j) ', ctx:param(1))) - ctx:stmt(' if cb >= 0 and cb <= 127 then j = j + 1') - ctx:stmt(' elseif cb >= 192 and cb <= 223 then j = j + 2') - ctx:stmt(' elseif cb >= 224 and cb <= 239 then j = j + 3') - ctx:stmt(' elseif cb >= 240 and cb <= 247 then j = j + 4 end') - ctx:stmt(' c = c + 1') - ctx:stmt(' end') - ctx:stmt(sformat(' ngx.log(ngx.INFO, "length for %s :", c) ', ctx:param(1))) - ctx:stmt(sformat('if c < %d then', schema.minLength)) - ctx:stmt(sformat(' return false, %s("string too short, expected at least %d, got ") .. c', - ctx:libfunc('string.format'), schema.minLength)) - ctx:stmt( 'end') + ctx:stmt(sformat(' local utf8_len_func = %s', utf8_len_func(ctx))) + ctx:stmt(sformat(' local c = utf8_len_func(%s)',ctx:param(1))) + ctx:stmt(sformat(' if c < %d then', schema.minLength)) + ctx:stmt(sformat(' return false, %s("string too short, expected at least %d, got ") ..c', + ctx:libfunc('string.format'), schema.minLength)) + ctx:stmt( ' end') end if schema.maxLength then - ctx:stmt(' local c, j = 0, 1') - ctx:stmt(sformat('for i = 1, #%s do', ctx:param(1))) - ctx:stmt(sformat(' if j > #%s then break end', ctx:param(1))) - ctx:stmt(sformat(' local cb= string.byte(%s, j) ', ctx:param(1))) - ctx:stmt(' if cb >= 0 and cb <= 127 then j = j + 1') - ctx:stmt(' elseif cb >= 192 and cb <= 223 then j = j + 2') - ctx:stmt(' elseif cb >= 224 and cb <= 239 then j = j + 3') - ctx:stmt(' elseif cb >= 240 and cb <= 247 then j = j + 4 end') - ctx:stmt(' c = c + 1') - ctx:stmt(' end') - ctx:stmt(sformat(' ngx.log(ngx.INFO, "length for %s :", c) ', ctx:param(1))) - ctx:stmt(sformat('if c > %d then', schema.maxLength)) - ctx:stmt(sformat(' return false, %s("string too long, expected at most %d, got ") .. c', - ctx:libfunc('string.format'), schema.maxLength)) - ctx:stmt( 'end') + ctx:stmt(sformat(' local utf8_len_func = %s', utf8_len_func(ctx))) + ctx:stmt(sformat(' local c = utf8_len_func(%s)',ctx:param(1))) + ctx:stmt(sformat(' if c > %d then', schema.maxLength)) + ctx:stmt(sformat(' return false, %s("string too long, expected at most %d, got ") .. c', + ctx:libfunc('string.format'), schema.maxLength)) + ctx:stmt( ' end') end if schema.pattern then ctx:stmt(sformat(' if not %s(%s, %q) then', ctx:libfunc('custom.match_pattern'), ctx:param(1), schema.pattern))