Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat/mediatype: avoid printing binary files. #7

Merged
merged 2 commits into from
Mar 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# httpretty
[![GoDoc](https://godoc.org/github.com/henvic/httpretty?status.svg)](https://godoc.org/github.com/henvic/httpretty) [![Build Status](https://travis-ci.org/henvic/httpretty.svg?branch=master)](https://travis-ci.org/henvic/httpretty) [![Coverage Status](https://coveralls.io/repos/henvic/httpretty/badge.svg)](https://coveralls.io/r/henvic/httpretty) [![Go Report Card](https://goreportcard.com/badge/github.com/henvic/httpretty)](https://goreportcard.com/report/github.com/henvic/httpretty)
[![GoDoc](https://godoc.org/github.com/henvic/httpretty?status.svg)](https://godoc.org/github.com/henvic/httpretty) [![Build Status](https://travis-ci.org/henvic/httpretty.svg?branch=master)](https://travis-ci.org/henvic/httpretty) [![Coverage Status](https://coveralls.io/repos/henvic/httpretty/badge.svg)](https://coveralls.io/r/henvic/httpretty) [![Go Report Card](https://goreportcard.com/badge/github.com/henvic/httpretty)](https://goreportcard.com/report/github.com/henvic/httpretty) [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/3669/badge)](https://bestpractices.coreinfrastructure.org/projects/3669)

Package httpretty prints the HTTP requests of your Go programs pretty on your terminal screen. It is mostly inspired in [curl](https://curl.haxx.se)'s `--verbose` mode, and also on the [httputil.DumpRequest](https://golang.org/pkg/net/http/httputil/) and similar functions.

Expand Down
81 changes: 81 additions & 0 deletions binary_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package httpretty

import (
"bytes"
"testing"
)

func TestIsBinary(t *testing.T) {
testCases := []struct {
desc string
data []byte
binary bool
}{
{
desc: "Empty",
binary: false,
},
{
desc: "Text",
data: []byte("plain text"),
binary: false,
},
{
desc: "More text",
data: []byte("plain text\n"),
binary: false,
},
{
desc: "Text with UTF16 Big Endian BOM",
data: []byte("\xFE\xFFevil plain text"),
binary: false,
},
{
desc: "Text with UTF16 Little Endian BOM",
data: []byte("\xFF\xFEevil plain text"),
binary: false,
},
{
desc: "Text with UTF8 BOM",
data: []byte("\xEF\xBB\xBFevil plain text"),
binary: false,
},
{
desc: "Binary",
data: []byte{1, 2, 3},
binary: true,
},
{
desc: "Binary over 512bytes",
data: bytes.Repeat([]byte{1, 2, 3, 4, 5, 6, 7, 8}, 65),
binary: true,
},
{
desc: "JPEG image",
data: []byte("\xFF\xD8\xFF"),
binary: true,
},
{
desc: "AVI video",
data: []byte("RIFF,O\n\x00AVI LISTÀ"),
binary: true,
},
{
desc: "RAR",
data: []byte("Rar!\x1A\x07\x00"),
binary: true,
},
{
desc: "PDF",
data: []byte("\x25\x50\x44\x46\x2d\x31\x2e\x33\x0a\x25\xc4\xe5\xf2\xe5\xeb\xa7"),
binary: true,
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
if got := isBinary(tc.data); got != tc.binary {
t.Errorf("wanted isBinary(%v) = %v, got %v instead", tc.data, tc.binary, got)
}
})
}
}
110 changes: 110 additions & 0 deletions client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,116 @@ form received
}
}

func TestOutgoingBinaryBody(t *testing.T) {
t.Parallel()

ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header()["Date"] = nil
fmt.Fprint(w, "\x25\x50\x44\x46\x2d\x31\x2e\x33\x0a\x25\xc4\xe5\xf2\xe5\xeb\xa7")
}))
defer ts.Close()

logger := &Logger{
RequestHeader: true,
RequestBody: true,
ResponseHeader: true,
ResponseBody: true,
}

var buf bytes.Buffer
logger.SetOutput(&buf)

client := &http.Client{
Transport: logger.RoundTripper(newTransport()),
}

b := []byte("RIFF\x00\x00\x00\x00WEBPVP")
uri := fmt.Sprintf("%s/convert", ts.URL)
req, err := http.NewRequest(http.MethodPost, uri, bytes.NewReader(b))
req.Header.Add("Content-Type", "image/webp")

if err != nil {
t.Errorf("cannot create request: %v", err)
}

_, err = client.Do(req)

if err != nil {
t.Errorf("cannot connect to the server: %v", err)
}

want := fmt.Sprintf(`* Request to %s
> POST /convert HTTP/1.1
> Host: %s
> Content-Type: image/webp

* body contains binary data
< HTTP/1.1 200 OK
< Content-Length: 16
< Content-Type: application/pdf

* body contains binary data
`, uri, ts.Listener.Addr())

if got := buf.String(); got != want {
t.Errorf("logged HTTP request %s; want %s", got, want)
}
}

func TestOutgoingBinaryBodyNoMediatypeHeader(t *testing.T) {
t.Parallel()

ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header()["Date"] = nil
w.Header()["Content-Type"] = nil
fmt.Fprint(w, "\x25\x50\x44\x46\x2d\x31\x2e\x33\x0a\x25\xc4\xe5\xf2\xe5\xeb\xa7")
}))
defer ts.Close()

logger := &Logger{
RequestHeader: true,
RequestBody: true,
ResponseHeader: true,
ResponseBody: true,
}

var buf bytes.Buffer
logger.SetOutput(&buf)

client := &http.Client{
Transport: logger.RoundTripper(newTransport()),
}

b := []byte("RIFF\x00\x00\x00\x00WEBPVP")
uri := fmt.Sprintf("%s/convert", ts.URL)
req, err := http.NewRequest(http.MethodPost, uri, bytes.NewReader(b))

if err != nil {
t.Errorf("cannot create request: %v", err)
}

_, err = client.Do(req)

if err != nil {
t.Errorf("cannot connect to the server: %v", err)
}

want := fmt.Sprintf(`* Request to %s
> POST /convert HTTP/1.1
> Host: %s

* body contains binary data
< HTTP/1.1 200 OK
< Content-Length: 16

* body contains binary data
`, uri, ts.Listener.Addr())

if got := buf.String(); got != want {
t.Errorf("logged HTTP request %s; want %s", got, want)
}
}

type longRequestHandler struct{}

func (h longRequestHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
Expand Down
87 changes: 87 additions & 0 deletions printer.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"net"
"net/http"
"sort"
"strings"
"time"

"github.com/henvic/httpretty/internal/color"
Expand Down Expand Up @@ -204,6 +205,11 @@ func (p *printer) printResponseBodyOut(resp *http.Response) {
return
}

if contentType := resp.Header.Get("Content-Type"); contentType != "" && isBinaryMediatype(contentType) {
p.println("* body contains binary data")
return
}

if p.logger.MaxResponseBody > 0 && resp.ContentLength > p.logger.MaxResponseBody {
p.printf("* body is too long (%d bytes) to print, skipping (longer than %d bytes)\n", resp.ContentLength, p.logger.MaxResponseBody)
return
Expand All @@ -230,6 +236,72 @@ func (p *printer) printResponseBodyOut(resp *http.Response) {
p.printBodyReader(contentType, tee)
}

// isBinary uses heuristics to guess if file is binary (actually, "printable" in the terminal).
// See discussion at https://groups.google.com/forum/#!topic/golang-nuts/YeLL7L7SwWs
func isBinary(body []byte) bool {
if len(body) > 512 {
body = body[512:]
}

// If file contains UTF-8 OR UTF-16 BOM, consider it non-binary.
// Reference: https://tools.ietf.org/html/draft-ietf-websec-mime-sniff-03#section-5
if len(body) >= 3 && (bytes.Equal(body[:2], []byte{0xFE, 0xFF}) || // UTF-16BE BOM
bytes.Equal(body[:2], []byte{0xFF, 0xFE}) || // UTF-16LE BOM
bytes.Equal(body[:3], []byte{0xEF, 0xBB, 0xBF})) { // UTF-8 BOM
return false
}

// If all of the first n octets are binary data octets, consider it binary.
// Reference: https://github.com/golang/go/blob/349e7df2c3d0f9b5429e7c86121499c137faac7e/src/net/http/sniff.go#L297-L309
// c.f. section 5, step 4.
for _, b := range body {
switch {
case b <= 0x08,
b == 0x0B,
0x0E <= b && b <= 0x1A,
0x1C <= b && b <= 0x1F:
return true
}
}

// Otherwise, check against a white list of binary mimetypes.
mediatype, _, err := mime.ParseMediaType(http.DetectContentType(body))
if err != nil {
return false
}

return isBinaryMediatype(mediatype)
}

var binaryMediatypes = map[string]struct{}{
"application/pdf": struct{}{},
"application/postscript": struct{}{},
"image": struct{}{}, // for practical reasons, any image (including SVG) is considered binary data
"audio": struct{}{},
"application/ogg": struct{}{},
"video": struct{}{},
"application/vnd.ms-fontobject": struct{}{},
"font": struct{}{},
"application/x-gzip": struct{}{},
"application/zip": struct{}{},
"application/x-rar-compressed": struct{}{},
"application/wasm": struct{}{},
}

func isBinaryMediatype(mediatype string) bool {
if _, ok := binaryMediatypes[mediatype]; ok {
return true
}

if parts := strings.SplitN(mediatype, "/", 2); len(parts) == 2 {
if _, ok := binaryMediatypes[parts[0]]; ok {
return true
}
}

return false
}

const maxDefaultUnknownReadable = 4096 // bytes

func (p *printer) printBodyUnknownLength(contentType string, maxLength int64, r io.ReadCloser) (newBody io.ReadCloser) {
Expand Down Expand Up @@ -416,6 +488,11 @@ func (p *printer) printServerResponse(req *http.Request, rec *responseRecorder)
return
}

if mediatype := req.Header.Get("Content-Type"); mediatype != "" && isBinaryMediatype(mediatype) {
p.println("* body contains binary data")
return
}

if p.logger.MaxResponseBody > 0 && rec.size > p.logger.MaxResponseBody {
p.printf("* body is too long (%d bytes) to print, skipping (longer than %d bytes)\n", rec.size, p.logger.MaxResponseBody)
return
Expand All @@ -442,6 +519,11 @@ func (p *printer) printBodyReader(contentType string, r io.Reader) {
return
}

if isBinary(body) {
p.println("* body contains binary data")
return
}

for _, f := range p.logger.Formatters {
if ok := p.safeBodyMatch(f, mediatype); !ok {
continue
Expand Down Expand Up @@ -561,6 +643,11 @@ func (p *printer) printRequestBody(req *http.Request) {
return
}

if mediatype := req.Header.Get("Content-Type"); mediatype != "" && isBinaryMediatype(mediatype) {
p.println("* body contains binary data")
return
}

// TODO(henvic): add support for printing multipart/formdata information as body (to responses too).
if p.logger.MaxRequestBody > 0 && req.ContentLength > p.logger.MaxRequestBody {
p.printf("* body is too long (%d bytes) to print, skipping (longer than %d bytes)\n",
Expand Down