Skip to content

Commit

Permalink
feat: Only keep HTTP headers we want to keep (#254)
Browse files Browse the repository at this point in the history
## Which problem is this PR solving?
When processing HTTP request/response pairs, the header structs contain
a lot of information we do not care about. From profiling data, we can
see a large portion of the agent's live bytes are coming from
`textproto.readMimeHeader()`, where headers are created. These bytes are
held until the request/response pair is processed.

This PR adds extracts a copy of the header entries we care about and
replaces the Header struct on the request/response, meaning the previous
larger Header struct can be GC'd earlier and not wait for pairing to
complete.

- Closes #253

## Short description of the changes
- Add new extractHeader function that creates a new Header and copies
header key/values we want to keep
- Initially we only keep "User-Agent" - more can be added in the future
- Update HTTP request and response parse funcs to use new extractHeader
utility
- Add test to verify behaviour of extractHeader

## How to verify that this has the expected result
The header that's part of the HTTP request / response now only contains
entries we care about and allows the expensive original Header to be
cleaned up earlier.
  • Loading branch information
MikeGoldsmith committed Oct 2, 2023
1 parent 5b9cd06 commit 2cdf976
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 0 deletions.
24 changes: 24 additions & 0 deletions assemblers/tcp_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ func (reader *tcpReader) reassembledSG(sg reassembly.ScatterGather, ac reassembl
Msg("Error reading HTTP request")
return
}
// We only care about a few headers, so recreate the header with just the ones we need
req.Header = extractHeaders(req.Header)
// We don't need the body, so just close it if set
if req.Body != nil {
req.Body.Close()
Expand Down Expand Up @@ -99,6 +101,8 @@ func (reader *tcpReader) reassembledSG(sg reassembly.ScatterGather, ac reassembl
Msg("Error reading HTTP response")
return
}
// We only care about a few headers, so recreate the header with just the ones we need
res.Header = extractHeaders(res.Header)
// We don't need the body, so just close it if set
if res.Body != nil {
res.Body.Close()
Expand All @@ -123,3 +127,23 @@ func (reader *tcpReader) processEvent(requestId int64, entry *entry) {
DstIp: reader.dstIp,
}
}

var headersToExtract = []string{
"User-Agent",
}

// extractHeaders returns a new http.Header object with only specified headers from the original.
// The original request/response header contains a lot of stuff we don't really care about
// and stays in memory until the request/response pair is processed
func extractHeaders(header http.Header) http.Header {
cleanHeader := http.Header{}
if header == nil {
return cleanHeader
}
for _, headerName := range headersToExtract {
if headerValue := header.Get(headerName); headerValue != "" {
cleanHeader.Set(headerName, headerValue)
}
}
return cleanHeader
}
46 changes: 46 additions & 0 deletions assemblers/tcp_reader_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package assemblers

import (
"net/http"
"testing"

"github.com/stretchr/testify/assert"
)

func TestExtractHeader(t *testing.T) {
testCases := []struct {
name string
header http.Header
expected http.Header
}{
{
name: "nil header",
header: nil,
expected: http.Header{},
},
{
name: "empty header",
header: http.Header{},
expected: http.Header{},
},
{
name: "only extracts headers we want to keep",
header: http.Header{
"Accept": []string{"test"},
"Host": []string{"test"},
"Cookie": []string{"test"},
"User-Agent": []string{"test"},
},
expected: http.Header{
"User-Agent": []string{"test"},
},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
result := extractHeaders(tc.header)
assert.Equal(t, tc.expected, result)
})
}
}

0 comments on commit 2cdf976

Please sign in to comment.