Skip to content

Commit

Permalink
Parse trace data from SQL comments (#226)
Browse files Browse the repository at this point in the history
  • Loading branch information
endor committed Nov 2, 2021
1 parent 1eb8fad commit 983d4b5
Show file tree
Hide file tree
Showing 2 changed files with 205 additions and 60 deletions.
83 changes: 61 additions & 22 deletions parsers/postgresql/postgresql.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,27 @@ import (
"sync"
"time"

"github.com/sirupsen/logrus"
"github.com/honeycombio/honeytail/event"
"github.com/honeycombio/honeytail/parsers"
"github.com/honeycombio/mysqltools/query/normalizer"
"github.com/sirupsen/logrus"
)

const (
// Regex string that matches timestamps in log
timestampRe = `\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}[.0-9]* [A-Z]+`
defaultPrefix = "%t [%p-%l] %u@%d"
// Regex string that matches the header of a slow query log line
slowQueryHeader = `\s*(?P<level>[A-Z0-9]+):\s+duration: (?P<duration>[0-9\.]+) ms\s+(?:(statement)|(execute \S+)): `
slowQueryHeader = `\s*(?P<level>[A-Z0-9]+):\s+duration: (?P<duration>[0-9\.]+) ms\s+(?:(statement)|(execute \S+)): `
traceFromTrace1 = `\/\*.*trace_id=['"](?P<trace_id>[^'"]+)['"].*parent_id=['"](?P<parent_id>[^'"]+)['"].*\*\/`
traceFromTrace2 = `\/\*.*parent_id=['"](?P<parent_id>[^'"]+)['"].*trace_id=['"](?P<trace_id>[^'"]+)['"].*\*\/`
traceFromTraceParent = `\/\*.*traceparent=['"]\d{2}-(?P<trace_id>[^-]+)-(?P<parent_id>[^-]+)-\d{2}['"].*\*\/`
)

var slowQueryHeaderRegex = &parsers.ExtRegexp{regexp.MustCompile(slowQueryHeader)}
var slowQueryHeaderRegex = &parsers.ExtRegexp{Regexp: regexp.MustCompile(slowQueryHeader)}
var traceFromTraceRegex1 = &parsers.ExtRegexp{Regexp: regexp.MustCompile(traceFromTrace1)}
var traceFromTraceRegex2 = &parsers.ExtRegexp{Regexp: regexp.MustCompile(traceFromTrace2)}
var traceFromTraceParentRegex = &parsers.ExtRegexp{Regexp: regexp.MustCompile(traceFromTraceParent)}

// prefixField represents a specific format specifier in the log_line_prefix string
// (see module comment for details).
Expand All @@ -85,22 +91,22 @@ func (p *prefixField) ReString() string {
}

var prefixValues = map[string]prefixField{
"%a": prefixField{Name: "application", Pattern: "\\S+"},
"%u": prefixField{Name: "user", Pattern: "\\S+"},
"%d": prefixField{Name: "database", Pattern: "\\S+"},
"%r": prefixField{Name: "host_port", Pattern: "\\S+"},
"%h": prefixField{Name: "host", Pattern: "\\S+"},
"%p": prefixField{Name: "pid", Pattern: "\\d+"},
"%t": prefixField{Name: "timestamp", Pattern: timestampRe},
"%m": prefixField{Name: "timestamp_millis", Pattern: timestampRe},
"%n": prefixField{Name: "timestamp_unix", Pattern: "\\d+"},
"%i": prefixField{Name: "command_tag", Pattern: "\\S+"},
"%e": prefixField{Name: "sql_state", Pattern: "\\S+"},
"%c": prefixField{Name: "session_id", Pattern: "\\d+"},
"%l": prefixField{Name: "session_line_number", Pattern: "\\d+"},
"%s": prefixField{Name: "session_start", Pattern: timestampRe},
"%v": prefixField{Name: "virtual_transaction_id", Pattern: "\\S+"},
"%x": prefixField{Name: "transaction_id", Pattern: "\\S+"},
"%a": {Name: "application", Pattern: "\\S+"},
"%u": {Name: "user", Pattern: "\\S+"},
"%d": {Name: "database", Pattern: "\\S+"},
"%r": {Name: "host_port", Pattern: "\\S+"},
"%h": {Name: "host", Pattern: "\\S+"},
"%p": {Name: "pid", Pattern: "\\d+"},
"%t": {Name: "timestamp", Pattern: timestampRe},
"%m": {Name: "timestamp_millis", Pattern: timestampRe},
"%n": {Name: "timestamp_unix", Pattern: "\\d+"},
"%i": {Name: "command_tag", Pattern: "\\S+"},
"%e": {Name: "sql_state", Pattern: "\\S+"},
"%c": {Name: "session_id", Pattern: "\\d+"},
"%l": {Name: "session_line_number", Pattern: "\\d+"},
"%s": {Name: "session_start", Pattern: timestampRe},
"%v": {Name: "virtual_transaction_id", Pattern: "\\S+"},
"%x": {Name: "transaction_id", Pattern: "\\S+"},
}

type Options struct {
Expand Down Expand Up @@ -138,8 +144,7 @@ func (p *Parser) ProcessLines(lines <-chan string, send chan<- event.Event, pref
// of database logs. Don't confuse this with p.pgPrefixRegex, which
// is a compiled regex for parsing the postgres-specific line
// prefix.
var prefix string
prefix = prefixRegex.FindString(line)
var prefix = prefixRegex.FindString(line)
line = strings.TrimPrefix(line, prefix)
}
if !isContinuationLine(line) && len(groupedLines) > 0 {
Expand Down Expand Up @@ -214,20 +219,54 @@ func (p *Parser) handleEvent(rawEvent []string) *event.Event {
for _, line := range rawEvent[1:] {
query += " " + strings.TrimLeft(line, " \t")
}

// Also try to parse the trace data from a SQL comment
match, traceData := parseTraceData(query)

if match {
if traceId, ok := traceData["trace_id"]; ok {
ev.Data["trace.trace_id"] = traceId
}

if parentId, ok := traceData["parent_id"]; ok {
ev.Data["trace.parent_id"] = parentId
}
}

normalizedQuery := normalizer.NormalizeQuery(query)

ev.Data["query"] = query
ev.Data["normalized_query"] = normalizedQuery

if len(normalizer.LastTables) > 0 {
ev.Data["tables"] = strings.Join(normalizer.LastTables, " ")
}

if len(normalizer.LastComments) > 0 {
ev.Data["comments"] = "/* " + strings.Join(normalizer.LastComments, " */ /* ") + " */"
}

return ev
}

func parseTraceData(query string) (matched bool, fields map[string]string) {
match, _, traceData := parsePrefix(traceFromTraceRegex1, query)

if match {
return match, traceData
}

match, _, traceData = parsePrefix(traceFromTraceRegex2, query)

if match {
return match, traceData
}

match, _, traceData = parsePrefix(traceFromTraceParentRegex, query)

return match, traceData
}

func isContinuationLine(line string) bool {
return strings.HasPrefix(line, "\t")
}
Expand Down Expand Up @@ -291,5 +330,5 @@ func buildPrefixRegexp(prefixFormat string) (*parsers.ExtRegexp, error) {
if err != nil {
return nil, err
}
return &parsers.ExtRegexp{re}, nil
return &parsers.ExtRegexp{Regexp: re}, nil
}
182 changes: 144 additions & 38 deletions parsers/postgresql/postgresql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ func TestSingleQueryParsing(t *testing.T) {
expected: event.Event{
Timestamp: time.Date(2017, 11, 7, 23, 5, 16, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "postgres",
"duration": 0.681,
"pid": 3053,
"user": "postgres",
"database": "postgres",
"duration": 0.681,
"pid": 3053,
"session_line_number": 3,
"query": "SELECT d.datname as \"Name\", pg_catalog.pg_get_userbyid(d.datdba) as \"Owner\", pg_catalog.pg_encoding_to_char(d.encoding) as \"Encoding\", d.datcollate as \"Collate\", d.datctype as \"Ctype\", pg_catalog.array_to_string(d.datacl, E'\\n') AS \"Access privileges\" FROM pg_catalog.pg_database d ORDER BY 1;",
"normalized_query": "select d.datname as ?, pg_catalog.pg_get_userbyid(d.datdba) as ?, pg_catalog.pg_encoding_to_char(d.encoding) as ?, d.datcollate as ?, d.datctype as ?, pg_catalog.array_to_string(d.datacl, e?) as ? from pg_catalog.pg_database d order by ?;",
Expand All @@ -49,10 +49,10 @@ func TestSingleQueryParsing(t *testing.T) {
expected: event.Event{
Timestamp: time.Date(2017, 11, 8, 3, 2, 49, 314000000, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"session_line_number": 1,
"virtual_transaction_id": "3/0",
"transaction_id": "0",
Expand All @@ -71,10 +71,10 @@ func TestSingleQueryParsing(t *testing.T) {
expected: event.Event{
Timestamp: time.Date(2017, 11, 9, 20, 15, 41, 402000000, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"session_line_number": 1,
"query": "select * from test;",
"normalized_query": "select * from test;",
Expand All @@ -88,16 +88,122 @@ func TestSingleQueryParsing(t *testing.T) {
expected: event.Event{
Timestamp: time.Date(2017, 11, 7, 23, 5, 16, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "postgres",
"duration": 0.681,
"pid": 3053,
"user": "postgres",
"database": "postgres",
"duration": 0.681,
"pid": 3053,
"session_line_number": 3,
"query": "SELECT c FROM sbtest1 WHERE id=$1",
"normalized_query": "select c from sbtest1 where id=$?",
},
},
},
{
description: "extract the trace id and parent id from a flat comment",
in: `1510258541402 [8544-1] postgres@test LOG: duration: 2.753 ms statement: select * from test /* trace_id='5bd66ef5095369c7b0d1f8f4bd33716a', parent_id='c532cb4098ac3dd2' */`,
prefixFormat: "%n [%p-%l] %u@%d",
expected: event.Event{
Timestamp: time.Date(2017, 11, 9, 20, 15, 41, 402000000, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"session_line_number": 1,
"query": "select * from test /* trace_id='5bd66ef5095369c7b0d1f8f4bd33716a', parent_id='c532cb4098ac3dd2' */",
"normalized_query": "select * from test",
"tables": "test",
"trace.trace_id": "5bd66ef5095369c7b0d1f8f4bd33716a",
"trace.parent_id": "c532cb4098ac3dd2",
},
},
},
{
description: "extract the trace id and parent id from a flat comment with different quotes",
in: `1510258541402 [8544-1] postgres@test LOG: duration: 2.753 ms statement: select * from test /* trace_id="5bd66ef5095369c7b0d1f8f4bd33716a", parent_id="c532cb4098ac3dd2" */`,
prefixFormat: "%n [%p-%l] %u@%d",
expected: event.Event{
Timestamp: time.Date(2017, 11, 9, 20, 15, 41, 402000000, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"session_line_number": 1,
"query": "select * from test /* trace_id=\"5bd66ef5095369c7b0d1f8f4bd33716a\", parent_id=\"c532cb4098ac3dd2\" */",
"normalized_query": "select * from test",
"tables": "test",
"trace.trace_id": "5bd66ef5095369c7b0d1f8f4bd33716a",
"trace.parent_id": "c532cb4098ac3dd2",
},
},
},
{
description: "extract the trace id and parent id from a flat comment in different order",
in: `1510258541402 [8544-1] postgres@test LOG: duration: 2.753 ms statement: select * from test /* parent_id='c532cb4098ac3dd2' trace_id='5bd66ef5095369c7b0d1f8f4bd33716a' */`,
prefixFormat: "%n [%p-%l] %u@%d",
expected: event.Event{
Timestamp: time.Date(2017, 11, 9, 20, 15, 41, 402000000, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"session_line_number": 1,
"query": "select * from test /* parent_id='c532cb4098ac3dd2' trace_id='5bd66ef5095369c7b0d1f8f4bd33716a' */",
"normalized_query": "select * from test",
"tables": "test",
"trace.trace_id": "5bd66ef5095369c7b0d1f8f4bd33716a",
"trace.parent_id": "c532cb4098ac3dd2",
},
},
},
{
description: "extract the trace id and parent id from a deep comment",
in: `2017-11-07 23:05:16 UTC [3053-3] postgres@postgres LOG: duration: 0.681 ms statement: SELECT d.datname as "Name",
pg_catalog.pg_get_userbyid(d.datdba) as "Owner",
pg_catalog.pg_encoding_to_char(d.encoding) as "Encoding",
d.datcollate as "Collate",
d.datctype as "Ctype",
pg_catalog.array_to_string(d.datacl, E'\n') AS "Access privileges"
FROM pg_catalog.pg_database d
ORDER BY 1 /* trace.trace_id='5bd66ef5095369c7b0d1f8f4bd33716a', trace.parent_id='c532cb4098ac3dd2' */;`,
prefixFormat: "%t [%p-%l] %u@%d",
expected: event.Event{
Timestamp: time.Date(2017, 11, 7, 23, 5, 16, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "postgres",
"duration": 0.681,
"pid": 3053,
"session_line_number": 3,
"query": "SELECT d.datname as \"Name\", pg_catalog.pg_get_userbyid(d.datdba) as \"Owner\", pg_catalog.pg_encoding_to_char(d.encoding) as \"Encoding\", d.datcollate as \"Collate\", d.datctype as \"Ctype\", pg_catalog.array_to_string(d.datacl, E'\\n') AS \"Access privileges\" FROM pg_catalog.pg_database d ORDER BY 1 /* trace.trace_id='5bd66ef5095369c7b0d1f8f4bd33716a', trace.parent_id='c532cb4098ac3dd2' */;",
"normalized_query": "select d.datname as ?, pg_catalog.pg_get_userbyid(d.datdba) as ?, pg_catalog.pg_encoding_to_char(d.encoding) as ?, d.datcollate as ?, d.datctype as ?, pg_catalog.array_to_string(d.datacl, e?) as ? from pg_catalog.pg_database d order by ? /* trace.trace_id=?, trace.parent_id=? */;",
"trace.trace_id": "5bd66ef5095369c7b0d1f8f4bd33716a",
"trace.parent_id": "c532cb4098ac3dd2",
},
},
},
{
description: "extract the trace id and parent id from a traceparent",
in: `1510258541402 [8544-1] postgres@test LOG: duration: 2.753 ms statement: select * from test /* traceparent='00-5bd66ef5095369c7b0d1f8f4bd33716a-c532cb4098ac3dd2-01', */`,
prefixFormat: "%n [%p-%l] %u@%d",
expected: event.Event{
Timestamp: time.Date(2017, 11, 9, 20, 15, 41, 402000000, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"session_line_number": 1,
"query": "select * from test /* traceparent='00-5bd66ef5095369c7b0d1f8f4bd33716a-c532cb4098ac3dd2-01', */",
"normalized_query": "select * from test",
"tables": "test",
"trace.trace_id": "5bd66ef5095369c7b0d1f8f4bd33716a",
"trace.parent_id": "c532cb4098ac3dd2",
},
},
},
}

for _, tc := range testcases {
Expand Down Expand Up @@ -128,49 +234,49 @@ func TestMultipleQueryParsing(t *testing.T) {
WHERE id=2;
`
out := []event.Event{
event.Event{
{
Timestamp: time.Date(2017, 11, 7, 1, 43, 18, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 9.263,
"pid": 3542,
"user": "postgres",
"database": "test",
"duration": 9.263,
"pid": 3542,
"session_line_number": 5,
"query": "INSERT INTO test (id, name, value) VALUES (1, 'Alice', 'foo');",
"normalized_query": "insert into test (id, name, value) values (?, ?, ?);",
},
},
event.Event{
{
Timestamp: time.Date(2017, 11, 7, 1, 43, 27, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 0.841,
"pid": 3542,
"user": "postgres",
"database": "test",
"duration": 0.841,
"pid": 3542,
"session_line_number": 6,
"query": "INSERT INTO test (id, name, value) VALUES (2, 'Bob', 'bar');",
"normalized_query": "insert into test (id, name, value) values (?, ?, ?);",
},
},
event.Event{
{
Timestamp: time.Date(2017, 11, 7, 1, 43, 39, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 15.577,
"pid": 3542,
"user": "postgres",
"database": "test",
"duration": 15.577,
"pid": 3542,
"session_line_number": 7,
"query": "SELECT * FROM test WHERE id=1;",
"normalized_query": "select * from test where id=?;",
},
},
event.Event{
{
Timestamp: time.Date(2017, 11, 7, 1, 43, 42, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 0.501,
"pid": 3542,
"user": "postgres",
"database": "test",
"duration": 0.501,
"pid": 3542,
"session_line_number": 8,
"query": "SELECT * FROM test WHERE id=2;",
"normalized_query": "select * from test where id=?;",
Expand Down Expand Up @@ -232,8 +338,8 @@ func TestCustomLogLinePrefix(t *testing.T) {
expected := &event.Event{
Timestamp: time.Date(0001, 1, 1, 0, 0, 0, 0, time.UTC),
Data: map[string]interface{}{
"duration": 1050.729,
"pid": 200,
"duration": 1050.729,
"pid": 200,
"session_line_number": 1,
"query": "UPDATE \"repositories\" SET \"current_build_id\" = 341933279, \"updated_at\" = '2018-02-15 15:21:55.174858' WHERE \"repositories\".\"id\" = 16235973",
"normalized_query": "update ? set ? = ?, ? = ? where ?.? = ?",
Expand Down

0 comments on commit 983d4b5

Please sign in to comment.