Skip to content

Commit

Permalink
Fix query sanitizer
Browse files Browse the repository at this point in the history
...when query text has contains Unicode replacement character.
uft8.RuneError actually is a valid character.
  • Loading branch information
jackc committed Nov 15, 2022
1 parent f2b3210 commit 3edc1b5
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 24 deletions.
66 changes: 42 additions & 24 deletions internal/sanitize/sanitize.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ type Query struct {
Parts []Part
}

// utf.DecodeRune returns the utf8.RuneError for errors. But that is actually rune U+FFFD -- the unicode replacement
// character. utf8.RuneError is not an error if it is also width 3.
//
// https://github.com/jackc/pgx/issues/1380
const replacementcharacterwidth = 3

func (q *Query) Sanitize(args ...interface{}) (string, error) {
argUse := make([]bool, len(args))
buf := &bytes.Buffer{}
Expand Down Expand Up @@ -138,11 +144,13 @@ func rawState(l *sqlLexer) stateFn {
return multilineCommentState
}
case utf8.RuneError:
if l.pos-l.start > 0 {
l.parts = append(l.parts, l.src[l.start:l.pos])
l.start = l.pos
if width != replacementcharacterwidth {
if l.pos-l.start > 0 {
l.parts = append(l.parts, l.src[l.start:l.pos])
l.start = l.pos
}
return nil
}
return nil
}
}
}
Expand All @@ -160,11 +168,13 @@ func singleQuoteState(l *sqlLexer) stateFn {
}
l.pos += width
case utf8.RuneError:
if l.pos-l.start > 0 {
l.parts = append(l.parts, l.src[l.start:l.pos])
l.start = l.pos
if width != replacementcharacterwidth {
if l.pos-l.start > 0 {
l.parts = append(l.parts, l.src[l.start:l.pos])
l.start = l.pos
}
return nil
}
return nil
}
}
}
Expand All @@ -182,11 +192,13 @@ func doubleQuoteState(l *sqlLexer) stateFn {
}
l.pos += width
case utf8.RuneError:
if l.pos-l.start > 0 {
l.parts = append(l.parts, l.src[l.start:l.pos])
l.start = l.pos
if width != replacementcharacterwidth {
if l.pos-l.start > 0 {
l.parts = append(l.parts, l.src[l.start:l.pos])
l.start = l.pos
}
return nil
}
return nil
}
}
}
Expand Down Expand Up @@ -228,11 +240,13 @@ func escapeStringState(l *sqlLexer) stateFn {
}
l.pos += width
case utf8.RuneError:
if l.pos-l.start > 0 {
l.parts = append(l.parts, l.src[l.start:l.pos])
l.start = l.pos
if width != replacementcharacterwidth {
if l.pos-l.start > 0 {
l.parts = append(l.parts, l.src[l.start:l.pos])
l.start = l.pos
}
return nil
}
return nil
}
}
}
Expand All @@ -249,11 +263,13 @@ func oneLineCommentState(l *sqlLexer) stateFn {
case '\n', '\r':
return rawState
case utf8.RuneError:
if l.pos-l.start > 0 {
l.parts = append(l.parts, l.src[l.start:l.pos])
l.start = l.pos
if width != replacementcharacterwidth {
if l.pos-l.start > 0 {
l.parts = append(l.parts, l.src[l.start:l.pos])
l.start = l.pos
}
return nil
}
return nil
}
}
}
Expand Down Expand Up @@ -283,11 +299,13 @@ func multilineCommentState(l *sqlLexer) stateFn {
l.nested--

case utf8.RuneError:
if l.pos-l.start > 0 {
l.parts = append(l.parts, l.src[l.start:l.pos])
l.start = l.pos
if width != replacementcharacterwidth {
if l.pos-l.start > 0 {
l.parts = append(l.parts, l.src[l.start:l.pos])
l.start = l.pos
}
return nil
}
return nil
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions internal/sanitize/sanitize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ func TestNewQuery(t *testing.T) {
sql: "select 42, -- \\nis a Deep Thought's favorite number\r$1",
expected: sanitize.Query{Parts: []sanitize.Part{"select 42, -- \\nis a Deep Thought's favorite number\r", 1}},
},
{
// https://github.com/jackc/pgx/issues/1380
sql: "select 'hello w�rld'",
expected: sanitize.Query{Parts: []sanitize.Part{"select 'hello w�rld'"}},
},
{
// Unterminated quoted string
sql: "select 'hello world",
expected: sanitize.Query{Parts: []sanitize.Part{"select 'hello world"}},
},
}

for i, tt := range successTests {
Expand Down

0 comments on commit 3edc1b5

Please sign in to comment.