Skip to content

Commit

Permalink
Merge pull request #344 from hearchco/as/fix/proxy-encoding
Browse files Browse the repository at this point in the history
feat: zstd and disabled reencoding of `/proxy`
  • Loading branch information
aleksasiriski committed Jul 2, 2024
2 parents ef2a2ad + a8836cf commit 0083a60
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 28 deletions.
12 changes: 7 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ require (
github.com/gocolly/colly/v2 v2.1.1-0.20231020184023-3c987f1982ed // Hearchco's PR
github.com/knadh/koanf/parsers/yaml v0.1.0
github.com/knadh/koanf/providers/env v0.1.0
github.com/knadh/koanf/providers/file v0.1.0
github.com/knadh/koanf/providers/file v1.0.0
github.com/knadh/koanf/providers/structs v0.1.0
github.com/knadh/koanf/v2 v2.1.1
github.com/pkg/profile v1.7.0
Expand All @@ -25,11 +25,13 @@ require (
golang.org/x/tools v0.22.0
)

require github.com/klauspost/compress v1.17.9

require (
github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/antchfx/htmlquery v1.3.1 // indirect
github.com/antchfx/xmlquery v1.4.0 // indirect
github.com/antchfx/xpath v1.3.0 // indirect
github.com/antchfx/htmlquery v1.3.2 // indirect
github.com/antchfx/xmlquery v1.4.1 // indirect
github.com/antchfx/xpath v1.3.1 // indirect
github.com/bits-and-blooms/bitset v1.13.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
Expand All @@ -40,7 +42,7 @@ require (
github.com/gobwas/glob v0.2.3 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/pprof v0.0.0-20240528025155-186aa0362fba // indirect
github.com/google/pprof v0.0.0-20240625030939-27f56978b8b0 // indirect
github.com/kennygrant/sanitize v1.2.4 // indirect
github.com/knadh/koanf/maps v0.1.1 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
Expand Down
22 changes: 12 additions & 10 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEq
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
github.com/antchfx/htmlquery v1.3.1 h1:wm0LxjLMsZhRHfQKKZscDf2COyH4vDYA3wyH+qZ+Ylc=
github.com/antchfx/htmlquery v1.3.1/go.mod h1:PTj+f1V2zksPlwNt7uVvZPsxpKNa7mlVliCRxLX6Nx8=
github.com/antchfx/htmlquery v1.3.2 h1:85YdttVkR1rAY+Oiv/nKI4FCimID+NXhDn82kz3mEvs=
github.com/antchfx/htmlquery v1.3.2/go.mod h1:1mbkcEgEarAokJiWhTfr4hR06w/q2ZZjnYLrDt6CTUk=
github.com/antchfx/xmlquery v1.3.4/go.mod h1:64w0Xesg2sTaawIdNqMB+7qaW/bSqkQm+ssPaCMWNnc=
github.com/antchfx/xmlquery v1.4.0 h1:xg2HkfcRK2TeTbdb0m1jxCYnvsPaGY/oeZWTGqX/0hA=
github.com/antchfx/xmlquery v1.4.0/go.mod h1:Ax2aeaeDjfIw3CwXKDQ0GkwZ6QlxoChlIBP+mGnDFjI=
github.com/antchfx/xmlquery v1.4.1 h1:YgpSwbeWvLp557YFTi8E3z6t6/hYjmFEtiEKbDfEbl0=
github.com/antchfx/xmlquery v1.4.1/go.mod h1:lKezcT8ELGt8kW5L+ckFMTbgdR61/odpPgDv8Gvi1fI=
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/antchfx/xpath v1.3.0 h1:nTMlzGAK3IJ0bPpME2urTuFL76o4A96iYvoKFHRXJgc=
github.com/antchfx/xpath v1.3.0/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
github.com/antchfx/xpath v1.3.1 h1:PNbFuUqHwWl0xRjvUPjJ95Agbmdj2uzzIwmQKgu4oCk=
github.com/antchfx/xpath v1.3.1/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
github.com/aws/aws-lambda-go v1.47.0 h1:0H8s0vumYx/YKs4sE7YM0ktwL2eWse+kfopsRI1sXVI=
github.com/aws/aws-lambda-go v1.47.0/go.mod h1:dpMpZgvWx5vuQJfBt0zqBha60q7Dd7RfgJv23DymV8A=
github.com/awslabs/aws-lambda-go-api-proxy v0.16.2 h1:CJyGEyO1CIwOnXTU40urf0mchf6t3voxpvUDikOU9LY=
Expand Down Expand Up @@ -106,8 +106,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg=
github.com/google/pprof v0.0.0-20240227163752-401108e1b7e7/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik=
github.com/google/pprof v0.0.0-20240528025155-186aa0362fba h1:ql1qNgCyOB7iAEk8JTNM+zJrgIbnyCKX/wdlyPufP5g=
github.com/google/pprof v0.0.0-20240528025155-186aa0362fba/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
github.com/google/pprof v0.0.0-20240625030939-27f56978b8b0 h1:e+8XbKB6IMn8A4OAyZccO4pYfB3s7bt6azNIPE7AnPg=
github.com/google/pprof v0.0.0-20240625030939-27f56978b8b0/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
Expand All @@ -116,14 +116,16 @@ github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6Pyu
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/knadh/koanf/maps v0.1.1 h1:G5TjmUh2D7G2YWf5SQQqSiHRJEjaicvU0KpypqB3NIs=
github.com/knadh/koanf/maps v0.1.1/go.mod h1:npD/QZY3V6ghQDdcQzl1W4ICNVTkohC8E73eI2xW4yI=
github.com/knadh/koanf/parsers/yaml v0.1.0 h1:ZZ8/iGfRLvKSaMEECEBPM1HQslrZADk8fP1XFUxVI5w=
github.com/knadh/koanf/parsers/yaml v0.1.0/go.mod h1:cvbUDC7AL23pImuQP0oRw/hPuccrNBS2bps8asS0CwY=
github.com/knadh/koanf/providers/env v0.1.0 h1:LqKteXqfOWyx5Ab9VfGHmjY9BvRXi+clwyZozgVRiKg=
github.com/knadh/koanf/providers/env v0.1.0/go.mod h1:RE8K9GbACJkeEnkl8L/Qcj8p4ZyPXZIQ191HJi44ZaQ=
github.com/knadh/koanf/providers/file v0.1.0 h1:fs6U7nrV58d3CFAFh8VTde8TM262ObYf3ODrc//Lp+c=
github.com/knadh/koanf/providers/file v0.1.0/go.mod h1:rjJ/nHQl64iYCtAW2QQnF0eSmDEX/YZ/eNFj5yR6BvA=
github.com/knadh/koanf/providers/file v1.0.0 h1:DtPvSQBeF+N0QLPMz0yf2bx0nFSxUcncpqQvzCxfCyk=
github.com/knadh/koanf/providers/file v1.0.0/go.mod h1:/faSBcv2mxPVjFrXck95qeoyoZ5myJ6uxN8OOVNJJCI=
github.com/knadh/koanf/providers/structs v0.1.0 h1:wJRteCNn1qvLtE5h8KQBvLJovidSdntfdyIbbCzEyE0=
github.com/knadh/koanf/providers/structs v0.1.0/go.mod h1:sw2YZ3txUcqA3Z27gPlmmBzWn1h8Nt9O6EP/91MkcWE=
github.com/knadh/koanf/v2 v2.1.1 h1:/R8eXqasSTsmDCsAyYj+81Wteg8AqrV9CP6gvsTsOmM=
Expand Down
24 changes: 17 additions & 7 deletions src/router/middlewares/compress.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,29 @@ import (

"github.com/andybalholm/brotli"
"github.com/go-chi/chi/v5/middleware"
"github.com/klauspost/compress/zstd"
"github.com/rs/zerolog/log"
)

func compress(lvl int, types ...string) [](func(http.Handler) http.Handler) {
// Deflate & GZIP.
dig := middleware.Compress(lvl, types...)
func compress(lvl int, types ...string) func(next http.Handler) http.Handler {
// Already has deflate and gzip.
comp := middleware.NewCompressor(lvl, types...)

// Brotli.
br := middleware.NewCompressor(lvl, types...)
br.SetEncoder("br", func(w io.Writer, lvl int) io.Writer {
// Add brotli.
comp.SetEncoder("br", func(w io.Writer, lvl int) io.Writer {
return brotli.NewWriterOptions(w, brotli.WriterOptions{
Quality: lvl,
})
})

return [](func(http.Handler) http.Handler){dig, br.Handler}
// Add zstd.
comp.SetEncoder("zstd", func(w io.Writer, lvl int) io.Writer {
writer, err := zstd.NewWriter(w, zstd.WithEncoderLevel(zstd.EncoderLevel(lvl)))
if err != nil {
log.Panic().Err(err).Msg("Failed to create zstd writer")
}
return writer
})

return comp.Handler
}
6 changes: 4 additions & 2 deletions src/router/middlewares/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ func Setup(mux *chi.Mux, lgr zerolog.Logger, frontendUrls []string, serveProfile
// Use recovery middleware.
mux.Use(middleware.Recoverer)

// Use compression middleware.
mux.Use(compress(5)...)
// Use compression middleware, except for image proxy since the response is copied over.
mux.Use(middleware.Maybe(compress(3), func(r *http.Request) bool {
return !strings.HasPrefix(r.URL.Path, "/proxy")
}))

// Use CORS middleware.
mux.Use(cors.Handler(cors.Options{
Expand Down
5 changes: 5 additions & 0 deletions src/router/routes/route_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ func routeProxy(w http.ResponseWriter, r *http.Request, salt string, timeout tim
return writeResponse(w, http.StatusInternalServerError, fmt.Sprintf("failed to proxy request: %v", err))
}

log.Trace().
Caller().
Str("response", fmt.Sprint(resp)).
Msg("Got a response")

// Proxy the response.
return writeResponseImageProxy(w, resp)
}
Expand Down
15 changes: 12 additions & 3 deletions src/router/routes/writers.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,18 @@ func writeResponseSuggestions(w http.ResponseWriter, status int, query string, s
}

func writeResponseImageProxy(w http.ResponseWriter, resp *http.Response) error {
w.Header().Set("Content-Encoding", resp.Header.Get("Content-Encoding"))
w.Header().Set("Content-Length", resp.Header.Get("Content-Length"))
w.Header().Set("Content-Type", resp.Header.Get("Content-Type"))
if ce := resp.Header.Get("Content-Encoding"); ce != "" {
w.Header().Set("Content-Encoding", ce)
}

if cl := resp.Header.Get("Content-Length"); cl != "" {
w.Header().Set("Content-Length", cl)
}

if ct := resp.Header.Get("Content-Type"); ct != "" {
w.Header().Set("Content-Type", ct)
}

w.WriteHeader(resp.StatusCode)
_, err := io.Copy(w, resp.Body)
return err
Expand Down
25 changes: 24 additions & 1 deletion src/search/scraper/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/andybalholm/brotli"
"github.com/gocolly/colly/v2"
"github.com/klauspost/compress/zstd"
"github.com/rs/zerolog/log"

"github.com/hearchco/agent/src/config"
Expand Down Expand Up @@ -39,7 +40,7 @@ func (e *EngineBase) initCollector(ctx context.Context, acceptS string) {
colly.UserAgent(ua.UserAgent),
colly.Headers(map[string]string{
"Accept": acceptS,
"Accept-Encoding": "gzip, deflate, br",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "en-US,en;q=0.9",
"Sec-Ch-Ua": ua.SecCHUA,
"Sec-Ch-Ua-Mobile": ua.SecCHUAMobile,
Expand Down Expand Up @@ -105,6 +106,28 @@ func (e *EngineBase) initCollectorOnResponse() {
return
}

r.Body = body
} else if strings.Contains(r.Headers.Get("Content-Encoding"), "zstd") {
reader, err := zstd.NewReader(bytes.NewReader(r.Body))
if err != nil {
log.Error().
Caller().
Err(err).
Str("engine", e.Name.String()).
Msg("Failed to create zstd reader")
return
}

body, err := io.ReadAll(reader)
if err != nil {
log.Error().
Caller().
Err(err).
Str("engine", e.Name.String()).
Msg("Failed to decode zstd response")
return
}

r.Body = body
}
})
Expand Down

0 comments on commit 0083a60

Please sign in to comment.