From caa9d5a8cc998b345ce020aa0d50ae1543e730bd Mon Sep 17 00:00:00 2001 From: Alec Thomas Date: Thu, 15 Jan 2026 21:01:30 +1100 Subject: [PATCH] fix: cloning large repo's failed with `fatal: early EOF` This only manifested with `--bare --mirror` because the clones are so large, due to including all refs. Also switched to using `httputil.ReverseProxy`, which is more robust. ``` ~/dev/cachew $ rm -rf git-source ; time git clone --bare --mirror http://127.0.0.1:8080/git/github.com/git/git.git git-source Cloning into bare repository 'git-source'... remote: Enumerating objects: 807474, done. remote: Counting objects: 100% (8533/8533), done. remote: Compressing objects: 100% (7947/7947), done. error: RPC failed; curl 18 transfer closed with outstanding read data remaining error: 297 bytes of body are still expected fetch-pack: unexpected disconnect while reading sideband packet fatal: early EOF fatal: fetch-pack: invalid index-pack output git clone --bare --mirror http://127.0.0.1:8080/git/github.com/git/git.git 5.26s user 1.17s system 21% cpu 30.509 total ``` Once the cache is populated, cloning git takes half the time: ``` ~/dev/cachew $ rm -rf git-source ; time git clone http://127.0.0.1:8080/git/github.com/git/git.git git-source Cloning into 'git-source'... remote: Enumerating objects: 403536, done. remote: Counting objects: 100% (756/756), done. remote: Compressing objects: 100% (363/363), done. remote: Total 403536 (delta 532), reused 498 (delta 393), pack-reused 402780 (from 4) Receiving objects: 100% (403536/403536), 282.29 MiB | 19.06 MiB/s, done. Resolving deltas: 100% (305003/305003), done. git clone http://127.0.0.1:8080/git/github.com/git/git.git git-source 18.61s user 2.65s system 96% cpu 21.940 total ~/dev/cachew $ rm -rf git-source ; time git clone http://127.0.0.1:8080/git/github.com/git/git.git git-source Cloning into 'git-source'... remote: Enumerating objects: 403536, done. remote: Counting objects: 100% (403536/403536), done. remote: Compressing objects: 100% (95986/95986), done. remote: Total 403536 (delta 305001), reused 403485 (delta 304954), pack-reused 0 (from 0) Receiving objects: 100% (403536/403536), 282.15 MiB | 117.21 MiB/s, done. Resolving deltas: 100% (305001/305001), done. git clone http://127.0.0.1:8080/git/github.com/git/git.git git-source 18.11s user 2.34s system 206% cpu 9.916 total ``` --- internal/strategy/git/backend.go | 15 ++++++++-- internal/strategy/git/git.go | 16 +++++++++++ internal/strategy/git/proxy.go | 49 ++++---------------------------- 3 files changed, 34 insertions(+), 46 deletions(-) diff --git a/internal/strategy/git/backend.go b/internal/strategy/git/backend.go index dff362f..859a729 100644 --- a/internal/strategy/git/backend.go +++ b/internal/strategy/git/backend.go @@ -69,7 +69,13 @@ func (s *Strategy) executeClone(ctx context.Context, c *clone) error { } // #nosec G204 - c.upstreamURL and c.path are controlled by us - cmd := exec.CommandContext(ctx, "git", "clone", "--bare", "--mirror", c.upstreamURL, c.path) + // Configure git for large repositories to avoid network buffer issues + cmd := exec.CommandContext(ctx, "git", "clone", + "--bare", "--mirror", + "-c", "http.postBuffer=524288000", // 500MB buffer + "-c", "http.lowSpeedLimit=1000", // 1KB/s minimum speed + "-c", "http.lowSpeedTime=600", // 10 minute timeout at low speed + c.upstreamURL, c.path) output, err := cmd.CombinedOutput() if err != nil { logger.ErrorContext(ctx, "git clone failed", @@ -87,7 +93,12 @@ func (s *Strategy) executeFetch(ctx context.Context, c *clone) error { logger := logging.FromContext(ctx) // #nosec G204 - c.path is controlled by us - cmd := exec.CommandContext(ctx, "git", "-C", c.path, "fetch", "--all") + // Configure git for large repositories to avoid network buffer issues + cmd := exec.CommandContext(ctx, "git", "-C", c.path, + "-c", "http.postBuffer=524288000", // 500MB buffer + "-c", "http.lowSpeedLimit=1000", // 1KB/s minimum speed + "-c", "http.lowSpeedTime=600", // 10 minute timeout at low speed + "fetch", "--all") output, err := cmd.CombinedOutput() if err != nil { logger.ErrorContext(ctx, "git fetch failed", diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index 22179b4..7b7149b 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -5,6 +5,7 @@ import ( "context" "log/slog" "net/http" + "net/http/httputil" "net/url" "os" "path/filepath" @@ -54,6 +55,7 @@ type Strategy struct { clones map[string]*clone clonesMu sync.RWMutex httpClient *http.Client + proxy *httputil.ReverseProxy } // New creates a new Git caching strategy. @@ -79,6 +81,20 @@ func New(ctx context.Context, config Config, cache cache.Cache, mux strategy.Mux httpClient: http.DefaultClient, } + s.proxy = &httputil.ReverseProxy{ + Director: func(req *http.Request) { + req.URL.Scheme = "https" + req.URL.Host = req.PathValue("host") + req.URL.Path = "/" + req.PathValue("path") + req.Host = req.URL.Host + }, + Transport: s.httpClient.Transport, + ErrorHandler: func(w http.ResponseWriter, r *http.Request, err error) { + logging.FromContext(r.Context()).ErrorContext(r.Context(), "Upstream request failed", slog.String("error", err.Error())) + w.WriteHeader(http.StatusBadGateway) + }, + } + mux.Handle("GET /git/{host}/{path...}", http.HandlerFunc(s.handleRequest)) mux.Handle("POST /git/{host}/{path...}", http.HandlerFunc(s.handleRequest)) diff --git a/internal/strategy/git/proxy.go b/internal/strategy/git/proxy.go index 4a3e5aa..2db0b96 100644 --- a/internal/strategy/git/proxy.go +++ b/internal/strategy/git/proxy.go @@ -1,59 +1,20 @@ package git import ( - "io" "log/slog" "net/http" - "github.com/block/cachew/internal/httputil" "github.com/block/cachew/internal/logging" ) // forwardToUpstream forwards a request to the upstream Git server. func (s *Strategy) forwardToUpstream(w http.ResponseWriter, r *http.Request, host, pathValue string) { - ctx := r.Context() - logger := logging.FromContext(ctx) + logger := logging.FromContext(r.Context()) - upstreamURL := "https://" + host + "/" + pathValue - if r.URL.RawQuery != "" { - upstreamURL += "?" + r.URL.RawQuery - } - - logger.DebugContext(ctx, "Forwarding to upstream", + logger.DebugContext(r.Context(), "Forwarding to upstream", slog.String("method", r.Method), - slog.String("upstream_url", upstreamURL)) - - upstreamReq, err := http.NewRequestWithContext(ctx, r.Method, upstreamURL, r.Body) - if err != nil { - httputil.ErrorResponse(w, r, http.StatusInternalServerError, "failed to create upstream request") - return - } - - // Copy relevant headers - for _, header := range []string{"Content-Type", "Content-Length", "Content-Encoding", "Accept", "Accept-Encoding", "Git-Protocol"} { - if v := r.Header.Get(header); v != "" { - upstreamReq.Header.Set(header, v) - } - } - - resp, err := s.httpClient.Do(upstreamReq) - if err != nil { - logger.ErrorContext(ctx, "Upstream request failed", slog.String("error", err.Error())) - httputil.ErrorResponse(w, r, http.StatusBadGateway, "upstream request failed") - return - } - defer resp.Body.Close() - - // Copy response headers - for key, values := range resp.Header { - for _, value := range values { - w.Header().Add(key, value) - } - } - - w.WriteHeader(resp.StatusCode) + slog.String("host", host), + slog.String("path", pathValue)) - if _, err := io.Copy(w, resp.Body); err != nil { - logger.ErrorContext(ctx, "Failed to stream upstream response", slog.String("error", err.Error())) - } + s.proxy.ServeHTTP(w, r) }