diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go index a7a94e93e..5eef456c3 100644 --- a/internal/sanitize/sanitize.go +++ b/internal/sanitize/sanitize.go @@ -18,6 +18,12 @@ type Query struct { Parts []Part } +// utf.DecodeRune returns the utf8.RuneError for errors. But that is actually rune U+FFFD -- the unicode replacement +// character. utf8.RuneError is not an error if it is also width 3. +// +// https://github.com/jackc/pgx/issues/1380 +const replacementcharacterwidth = 3 + func (q *Query) Sanitize(args ...interface{}) (string, error) { argUse := make([]bool, len(args)) buf := &bytes.Buffer{} @@ -138,11 +144,13 @@ func rawState(l *sqlLexer) stateFn { return multilineCommentState } case utf8.RuneError: - if l.pos-l.start > 0 { - l.parts = append(l.parts, l.src[l.start:l.pos]) - l.start = l.pos + if width != replacementcharacterwidth { + if l.pos-l.start > 0 { + l.parts = append(l.parts, l.src[l.start:l.pos]) + l.start = l.pos + } + return nil } - return nil } } } @@ -160,11 +168,13 @@ func singleQuoteState(l *sqlLexer) stateFn { } l.pos += width case utf8.RuneError: - if l.pos-l.start > 0 { - l.parts = append(l.parts, l.src[l.start:l.pos]) - l.start = l.pos + if width != replacementcharacterwidth { + if l.pos-l.start > 0 { + l.parts = append(l.parts, l.src[l.start:l.pos]) + l.start = l.pos + } + return nil } - return nil } } } @@ -182,11 +192,13 @@ func doubleQuoteState(l *sqlLexer) stateFn { } l.pos += width case utf8.RuneError: - if l.pos-l.start > 0 { - l.parts = append(l.parts, l.src[l.start:l.pos]) - l.start = l.pos + if width != replacementcharacterwidth { + if l.pos-l.start > 0 { + l.parts = append(l.parts, l.src[l.start:l.pos]) + l.start = l.pos + } + return nil } - return nil } } } @@ -228,11 +240,13 @@ func escapeStringState(l *sqlLexer) stateFn { } l.pos += width case utf8.RuneError: - if l.pos-l.start > 0 { - l.parts = append(l.parts, l.src[l.start:l.pos]) - l.start = l.pos + if width != replacementcharacterwidth { + if l.pos-l.start > 0 { + l.parts = append(l.parts, l.src[l.start:l.pos]) + l.start = l.pos + } + return nil } - return nil } } } @@ -249,11 +263,13 @@ func oneLineCommentState(l *sqlLexer) stateFn { case '\n', '\r': return rawState case utf8.RuneError: - if l.pos-l.start > 0 { - l.parts = append(l.parts, l.src[l.start:l.pos]) - l.start = l.pos + if width != replacementcharacterwidth { + if l.pos-l.start > 0 { + l.parts = append(l.parts, l.src[l.start:l.pos]) + l.start = l.pos + } + return nil } - return nil } } } @@ -283,11 +299,13 @@ func multilineCommentState(l *sqlLexer) stateFn { l.nested-- case utf8.RuneError: - if l.pos-l.start > 0 { - l.parts = append(l.parts, l.src[l.start:l.pos]) - l.start = l.pos + if width != replacementcharacterwidth { + if l.pos-l.start > 0 { + l.parts = append(l.parts, l.src[l.start:l.pos]) + l.start = l.pos + } + return nil } - return nil } } } diff --git a/internal/sanitize/sanitize_test.go b/internal/sanitize/sanitize_test.go index bbac84b02..1dc0f95fc 100644 --- a/internal/sanitize/sanitize_test.go +++ b/internal/sanitize/sanitize_test.go @@ -88,6 +88,16 @@ func TestNewQuery(t *testing.T) { sql: "select 42, -- \\nis a Deep Thought's favorite number\r$1", expected: sanitize.Query{Parts: []sanitize.Part{"select 42, -- \\nis a Deep Thought's favorite number\r", 1}}, }, + { + // https://github.com/jackc/pgx/issues/1380 + sql: "select 'hello w�rld'", + expected: sanitize.Query{Parts: []sanitize.Part{"select 'hello w�rld'"}}, + }, + { + // Unterminated quoted string + sql: "select 'hello world", + expected: sanitize.Query{Parts: []sanitize.Part{"select 'hello world"}}, + }, } for i, tt := range successTests {