Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse trace data from SQL comments #226

Merged
merged 1 commit into from
Nov 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 61 additions & 22 deletions parsers/postgresql/postgresql.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,27 @@ import (
"sync"
"time"

"github.com/sirupsen/logrus"
"github.com/honeycombio/honeytail/event"
"github.com/honeycombio/honeytail/parsers"
"github.com/honeycombio/mysqltools/query/normalizer"
"github.com/sirupsen/logrus"
)

const (
// Regex string that matches timestamps in log
timestampRe = `\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}[.0-9]* [A-Z]+`
defaultPrefix = "%t [%p-%l] %u@%d"
// Regex string that matches the header of a slow query log line
slowQueryHeader = `\s*(?P<level>[A-Z0-9]+):\s+duration: (?P<duration>[0-9\.]+) ms\s+(?:(statement)|(execute \S+)): `
slowQueryHeader = `\s*(?P<level>[A-Z0-9]+):\s+duration: (?P<duration>[0-9\.]+) ms\s+(?:(statement)|(execute \S+)): `
traceFromTrace1 = `\/\*.*trace_id=['"](?P<trace_id>[^'"]+)['"].*parent_id=['"](?P<parent_id>[^'"]+)['"].*\*\/`
traceFromTrace2 = `\/\*.*parent_id=['"](?P<parent_id>[^'"]+)['"].*trace_id=['"](?P<trace_id>[^'"]+)['"].*\*\/`
traceFromTraceParent = `\/\*.*traceparent=['"]\d{2}-(?P<trace_id>[^-]+)-(?P<parent_id>[^-]+)-\d{2}['"].*\*\/`
)

var slowQueryHeaderRegex = &parsers.ExtRegexp{regexp.MustCompile(slowQueryHeader)}
var slowQueryHeaderRegex = &parsers.ExtRegexp{Regexp: regexp.MustCompile(slowQueryHeader)}
var traceFromTraceRegex1 = &parsers.ExtRegexp{Regexp: regexp.MustCompile(traceFromTrace1)}
var traceFromTraceRegex2 = &parsers.ExtRegexp{Regexp: regexp.MustCompile(traceFromTrace2)}
var traceFromTraceParentRegex = &parsers.ExtRegexp{Regexp: regexp.MustCompile(traceFromTraceParent)}

// prefixField represents a specific format specifier in the log_line_prefix string
// (see module comment for details).
Expand All @@ -85,22 +91,22 @@ func (p *prefixField) ReString() string {
}

var prefixValues = map[string]prefixField{
"%a": prefixField{Name: "application", Pattern: "\\S+"},
"%u": prefixField{Name: "user", Pattern: "\\S+"},
"%d": prefixField{Name: "database", Pattern: "\\S+"},
"%r": prefixField{Name: "host_port", Pattern: "\\S+"},
"%h": prefixField{Name: "host", Pattern: "\\S+"},
"%p": prefixField{Name: "pid", Pattern: "\\d+"},
"%t": prefixField{Name: "timestamp", Pattern: timestampRe},
"%m": prefixField{Name: "timestamp_millis", Pattern: timestampRe},
"%n": prefixField{Name: "timestamp_unix", Pattern: "\\d+"},
"%i": prefixField{Name: "command_tag", Pattern: "\\S+"},
"%e": prefixField{Name: "sql_state", Pattern: "\\S+"},
"%c": prefixField{Name: "session_id", Pattern: "\\d+"},
"%l": prefixField{Name: "session_line_number", Pattern: "\\d+"},
"%s": prefixField{Name: "session_start", Pattern: timestampRe},
"%v": prefixField{Name: "virtual_transaction_id", Pattern: "\\S+"},
"%x": prefixField{Name: "transaction_id", Pattern: "\\S+"},
"%a": {Name: "application", Pattern: "\\S+"},
"%u": {Name: "user", Pattern: "\\S+"},
"%d": {Name: "database", Pattern: "\\S+"},
"%r": {Name: "host_port", Pattern: "\\S+"},
"%h": {Name: "host", Pattern: "\\S+"},
"%p": {Name: "pid", Pattern: "\\d+"},
"%t": {Name: "timestamp", Pattern: timestampRe},
"%m": {Name: "timestamp_millis", Pattern: timestampRe},
"%n": {Name: "timestamp_unix", Pattern: "\\d+"},
"%i": {Name: "command_tag", Pattern: "\\S+"},
"%e": {Name: "sql_state", Pattern: "\\S+"},
"%c": {Name: "session_id", Pattern: "\\d+"},
"%l": {Name: "session_line_number", Pattern: "\\d+"},
"%s": {Name: "session_start", Pattern: timestampRe},
"%v": {Name: "virtual_transaction_id", Pattern: "\\S+"},
"%x": {Name: "transaction_id", Pattern: "\\S+"},
}

type Options struct {
Expand Down Expand Up @@ -138,8 +144,7 @@ func (p *Parser) ProcessLines(lines <-chan string, send chan<- event.Event, pref
// of database logs. Don't confuse this with p.pgPrefixRegex, which
// is a compiled regex for parsing the postgres-specific line
// prefix.
var prefix string
prefix = prefixRegex.FindString(line)
var prefix = prefixRegex.FindString(line)
line = strings.TrimPrefix(line, prefix)
}
if !isContinuationLine(line) && len(groupedLines) > 0 {
Expand Down Expand Up @@ -214,20 +219,54 @@ func (p *Parser) handleEvent(rawEvent []string) *event.Event {
for _, line := range rawEvent[1:] {
query += " " + strings.TrimLeft(line, " \t")
}

// Also try to parse the trace data from a SQL comment
match, traceData := parseTraceData(query)

if match {
if traceId, ok := traceData["trace_id"]; ok {
ev.Data["trace.trace_id"] = traceId
}

if parentId, ok := traceData["parent_id"]; ok {
ev.Data["trace.parent_id"] = parentId
}
}

normalizedQuery := normalizer.NormalizeQuery(query)

ev.Data["query"] = query
ev.Data["normalized_query"] = normalizedQuery

if len(normalizer.LastTables) > 0 {
ev.Data["tables"] = strings.Join(normalizer.LastTables, " ")
}

if len(normalizer.LastComments) > 0 {
ev.Data["comments"] = "/* " + strings.Join(normalizer.LastComments, " */ /* ") + " */"
}

return ev
}

func parseTraceData(query string) (matched bool, fields map[string]string) {
match, _, traceData := parsePrefix(traceFromTraceRegex1, query)

if match {
return match, traceData
}

match, _, traceData = parsePrefix(traceFromTraceRegex2, query)

if match {
return match, traceData
}

match, _, traceData = parsePrefix(traceFromTraceParentRegex, query)

return match, traceData
}

func isContinuationLine(line string) bool {
return strings.HasPrefix(line, "\t")
}
Expand Down Expand Up @@ -291,5 +330,5 @@ func buildPrefixRegexp(prefixFormat string) (*parsers.ExtRegexp, error) {
if err != nil {
return nil, err
}
return &parsers.ExtRegexp{re}, nil
return &parsers.ExtRegexp{Regexp: re}, nil
}
182 changes: 144 additions & 38 deletions parsers/postgresql/postgresql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ func TestSingleQueryParsing(t *testing.T) {
expected: event.Event{
Timestamp: time.Date(2017, 11, 7, 23, 5, 16, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "postgres",
"duration": 0.681,
"pid": 3053,
"user": "postgres",
"database": "postgres",
"duration": 0.681,
"pid": 3053,
"session_line_number": 3,
"query": "SELECT d.datname as \"Name\", pg_catalog.pg_get_userbyid(d.datdba) as \"Owner\", pg_catalog.pg_encoding_to_char(d.encoding) as \"Encoding\", d.datcollate as \"Collate\", d.datctype as \"Ctype\", pg_catalog.array_to_string(d.datacl, E'\\n') AS \"Access privileges\" FROM pg_catalog.pg_database d ORDER BY 1;",
"normalized_query": "select d.datname as ?, pg_catalog.pg_get_userbyid(d.datdba) as ?, pg_catalog.pg_encoding_to_char(d.encoding) as ?, d.datcollate as ?, d.datctype as ?, pg_catalog.array_to_string(d.datacl, e?) as ? from pg_catalog.pg_database d order by ?;",
Expand All @@ -49,10 +49,10 @@ func TestSingleQueryParsing(t *testing.T) {
expected: event.Event{
Timestamp: time.Date(2017, 11, 8, 3, 2, 49, 314000000, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"session_line_number": 1,
"virtual_transaction_id": "3/0",
"transaction_id": "0",
Expand All @@ -71,10 +71,10 @@ func TestSingleQueryParsing(t *testing.T) {
expected: event.Event{
Timestamp: time.Date(2017, 11, 9, 20, 15, 41, 402000000, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"session_line_number": 1,
"query": "select * from test;",
"normalized_query": "select * from test;",
Expand All @@ -88,16 +88,122 @@ func TestSingleQueryParsing(t *testing.T) {
expected: event.Event{
Timestamp: time.Date(2017, 11, 7, 23, 5, 16, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "postgres",
"duration": 0.681,
"pid": 3053,
"user": "postgres",
"database": "postgres",
"duration": 0.681,
"pid": 3053,
"session_line_number": 3,
"query": "SELECT c FROM sbtest1 WHERE id=$1",
"normalized_query": "select c from sbtest1 where id=$?",
},
},
},
{
description: "extract the trace id and parent id from a flat comment",
in: `1510258541402 [8544-1] postgres@test LOG: duration: 2.753 ms statement: select * from test /* trace_id='5bd66ef5095369c7b0d1f8f4bd33716a', parent_id='c532cb4098ac3dd2' */`,
prefixFormat: "%n [%p-%l] %u@%d",
expected: event.Event{
Timestamp: time.Date(2017, 11, 9, 20, 15, 41, 402000000, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"session_line_number": 1,
"query": "select * from test /* trace_id='5bd66ef5095369c7b0d1f8f4bd33716a', parent_id='c532cb4098ac3dd2' */",
"normalized_query": "select * from test",
"tables": "test",
"trace.trace_id": "5bd66ef5095369c7b0d1f8f4bd33716a",
"trace.parent_id": "c532cb4098ac3dd2",
},
},
},
{
description: "extract the trace id and parent id from a flat comment with different quotes",
in: `1510258541402 [8544-1] postgres@test LOG: duration: 2.753 ms statement: select * from test /* trace_id="5bd66ef5095369c7b0d1f8f4bd33716a", parent_id="c532cb4098ac3dd2" */`,
prefixFormat: "%n [%p-%l] %u@%d",
expected: event.Event{
Timestamp: time.Date(2017, 11, 9, 20, 15, 41, 402000000, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"session_line_number": 1,
"query": "select * from test /* trace_id=\"5bd66ef5095369c7b0d1f8f4bd33716a\", parent_id=\"c532cb4098ac3dd2\" */",
"normalized_query": "select * from test",
"tables": "test",
"trace.trace_id": "5bd66ef5095369c7b0d1f8f4bd33716a",
"trace.parent_id": "c532cb4098ac3dd2",
},
},
},
{
description: "extract the trace id and parent id from a flat comment in different order",
in: `1510258541402 [8544-1] postgres@test LOG: duration: 2.753 ms statement: select * from test /* parent_id='c532cb4098ac3dd2' trace_id='5bd66ef5095369c7b0d1f8f4bd33716a' */`,
prefixFormat: "%n [%p-%l] %u@%d",
expected: event.Event{
Timestamp: time.Date(2017, 11, 9, 20, 15, 41, 402000000, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"session_line_number": 1,
"query": "select * from test /* parent_id='c532cb4098ac3dd2' trace_id='5bd66ef5095369c7b0d1f8f4bd33716a' */",
"normalized_query": "select * from test",
"tables": "test",
"trace.trace_id": "5bd66ef5095369c7b0d1f8f4bd33716a",
"trace.parent_id": "c532cb4098ac3dd2",
},
},
},
{
description: "extract the trace id and parent id from a deep comment",
in: `2017-11-07 23:05:16 UTC [3053-3] postgres@postgres LOG: duration: 0.681 ms statement: SELECT d.datname as "Name",
pg_catalog.pg_get_userbyid(d.datdba) as "Owner",
pg_catalog.pg_encoding_to_char(d.encoding) as "Encoding",
d.datcollate as "Collate",
d.datctype as "Ctype",
pg_catalog.array_to_string(d.datacl, E'\n') AS "Access privileges"
FROM pg_catalog.pg_database d
ORDER BY 1 /* trace.trace_id='5bd66ef5095369c7b0d1f8f4bd33716a', trace.parent_id='c532cb4098ac3dd2' */;`,
prefixFormat: "%t [%p-%l] %u@%d",
expected: event.Event{
Timestamp: time.Date(2017, 11, 7, 23, 5, 16, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "postgres",
"duration": 0.681,
"pid": 3053,
"session_line_number": 3,
"query": "SELECT d.datname as \"Name\", pg_catalog.pg_get_userbyid(d.datdba) as \"Owner\", pg_catalog.pg_encoding_to_char(d.encoding) as \"Encoding\", d.datcollate as \"Collate\", d.datctype as \"Ctype\", pg_catalog.array_to_string(d.datacl, E'\\n') AS \"Access privileges\" FROM pg_catalog.pg_database d ORDER BY 1 /* trace.trace_id='5bd66ef5095369c7b0d1f8f4bd33716a', trace.parent_id='c532cb4098ac3dd2' */;",
"normalized_query": "select d.datname as ?, pg_catalog.pg_get_userbyid(d.datdba) as ?, pg_catalog.pg_encoding_to_char(d.encoding) as ?, d.datcollate as ?, d.datctype as ?, pg_catalog.array_to_string(d.datacl, e?) as ? from pg_catalog.pg_database d order by ? /* trace.trace_id=?, trace.parent_id=? */;",
"trace.trace_id": "5bd66ef5095369c7b0d1f8f4bd33716a",
"trace.parent_id": "c532cb4098ac3dd2",
},
},
},
{
description: "extract the trace id and parent id from a traceparent",
in: `1510258541402 [8544-1] postgres@test LOG: duration: 2.753 ms statement: select * from test /* traceparent='00-5bd66ef5095369c7b0d1f8f4bd33716a-c532cb4098ac3dd2-01', */`,
prefixFormat: "%n [%p-%l] %u@%d",
expected: event.Event{
Timestamp: time.Date(2017, 11, 9, 20, 15, 41, 402000000, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 2.753,
"pid": 8544,
"session_line_number": 1,
"query": "select * from test /* traceparent='00-5bd66ef5095369c7b0d1f8f4bd33716a-c532cb4098ac3dd2-01', */",
"normalized_query": "select * from test",
"tables": "test",
"trace.trace_id": "5bd66ef5095369c7b0d1f8f4bd33716a",
"trace.parent_id": "c532cb4098ac3dd2",
},
},
},
}

for _, tc := range testcases {
Expand Down Expand Up @@ -128,49 +234,49 @@ func TestMultipleQueryParsing(t *testing.T) {
WHERE id=2;
`
out := []event.Event{
event.Event{
{
Timestamp: time.Date(2017, 11, 7, 1, 43, 18, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 9.263,
"pid": 3542,
"user": "postgres",
"database": "test",
"duration": 9.263,
"pid": 3542,
"session_line_number": 5,
"query": "INSERT INTO test (id, name, value) VALUES (1, 'Alice', 'foo');",
"normalized_query": "insert into test (id, name, value) values (?, ?, ?);",
},
},
event.Event{
{
Timestamp: time.Date(2017, 11, 7, 1, 43, 27, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 0.841,
"pid": 3542,
"user": "postgres",
"database": "test",
"duration": 0.841,
"pid": 3542,
"session_line_number": 6,
"query": "INSERT INTO test (id, name, value) VALUES (2, 'Bob', 'bar');",
"normalized_query": "insert into test (id, name, value) values (?, ?, ?);",
},
},
event.Event{
{
Timestamp: time.Date(2017, 11, 7, 1, 43, 39, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 15.577,
"pid": 3542,
"user": "postgres",
"database": "test",
"duration": 15.577,
"pid": 3542,
"session_line_number": 7,
"query": "SELECT * FROM test WHERE id=1;",
"normalized_query": "select * from test where id=?;",
},
},
event.Event{
{
Timestamp: time.Date(2017, 11, 7, 1, 43, 42, 0, time.UTC),
Data: map[string]interface{}{
"user": "postgres",
"database": "test",
"duration": 0.501,
"pid": 3542,
"user": "postgres",
"database": "test",
"duration": 0.501,
"pid": 3542,
"session_line_number": 8,
"query": "SELECT * FROM test WHERE id=2;",
"normalized_query": "select * from test where id=?;",
Expand Down Expand Up @@ -232,8 +338,8 @@ func TestCustomLogLinePrefix(t *testing.T) {
expected := &event.Event{
Timestamp: time.Date(0001, 1, 1, 0, 0, 0, 0, time.UTC),
Data: map[string]interface{}{
"duration": 1050.729,
"pid": 200,
"duration": 1050.729,
"pid": 200,
"session_line_number": 1,
"query": "UPDATE \"repositories\" SET \"current_build_id\" = 341933279, \"updated_at\" = '2018-02-15 15:21:55.174858' WHERE \"repositories\".\"id\" = 16235973",
"normalized_query": "update ? set ? = ?, ? = ? where ?.? = ?",
Expand Down