Skip to content

Commit

Permalink
Refactor route path normalization (#31381)
Browse files Browse the repository at this point in the history
Refactor route path normalization and decouple it from the chi router.
Fix the TODO, fix the legacy strange path behavior.
  • Loading branch information
wxiaoguang committed Jun 17, 2024
1 parent 5a7376c commit d32648b
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 157 deletions.
59 changes: 58 additions & 1 deletion modules/web/route.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ package web

import (
"net/http"
"net/url"
"strings"

"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/web/middleware"

"gitea.com/go-chi/binding"
Expand Down Expand Up @@ -160,14 +162,69 @@ func (r *Route) Patch(pattern string, h ...any) {

// ServeHTTP implements http.Handler
func (r *Route) ServeHTTP(w http.ResponseWriter, req *http.Request) {
r.R.ServeHTTP(w, req)
r.normalizeRequestPath(w, req, r.R)
}

// NotFound defines a handler to respond whenever a route could not be found.
func (r *Route) NotFound(h http.HandlerFunc) {
r.R.NotFound(h)
}

func (r *Route) normalizeRequestPath(resp http.ResponseWriter, req *http.Request, next http.Handler) {
normalized := false
normalizedPath := req.URL.EscapedPath()
if normalizedPath == "" {
normalizedPath, normalized = "/", true
} else if normalizedPath != "/" {
normalized = strings.HasSuffix(normalizedPath, "/")
normalizedPath = strings.TrimRight(normalizedPath, "/")
}
removeRepeatedSlashes := strings.Contains(normalizedPath, "//")
normalized = normalized || removeRepeatedSlashes

// the following code block is a slow-path for replacing all repeated slashes "//" to one single "/"
// if the path doesn't have repeated slashes, then no need to execute it
if removeRepeatedSlashes {
buf := &strings.Builder{}
for i := 0; i < len(normalizedPath); i++ {
if i == 0 || normalizedPath[i-1] != '/' || normalizedPath[i] != '/' {
buf.WriteByte(normalizedPath[i])
}
}
normalizedPath = buf.String()
}

// If the config tells Gitea to use a sub-url path directly without reverse proxy,
// then we need to remove the sub-url path from the request URL path.
// But "/v2" is special for OCI container registry, it should always be in the root of the site.
if setting.UseSubURLPath {
remainingPath, ok := strings.CutPrefix(normalizedPath, setting.AppSubURL+"/")
if ok {
normalizedPath = "/" + remainingPath
} else if normalizedPath == setting.AppSubURL {
normalizedPath = "/"
} else if !strings.HasPrefix(normalizedPath+"/", "/v2/") {
// do not respond to other requests, to simulate a real sub-path environment
http.Error(resp, "404 page not found, sub-path is: "+setting.AppSubURL, http.StatusNotFound)
return
}
normalized = true
}

// if the path is normalized, then fill it back to the request
if normalized {
decodedPath, err := url.PathUnescape(normalizedPath)
if err != nil {
http.Error(resp, "400 Bad Request: unable to unescape path "+normalizedPath, http.StatusBadRequest)
return
}
req.URL.RawPath = normalizedPath
req.URL.Path = decodedPath
}

next.ServeHTTP(resp, req)
}

// Combo delegates requests to Combo
func (r *Route) Combo(pattern string, h ...any) *Combo {
return &Combo{r, pattern, h}
Expand Down
44 changes: 44 additions & 0 deletions modules/web/route_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ import (
"strconv"
"testing"

"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"

chi "github.com/go-chi/chi/v5"
"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -176,3 +179,44 @@ func TestRoute3(t *testing.T) {
assert.EqualValues(t, http.StatusOK, recorder.Code)
assert.EqualValues(t, 4, hit)
}

func TestRouteNormalizePath(t *testing.T) {
type paths struct {
EscapedPath, RawPath, Path string
}
testPath := func(reqPath string, expectedPaths paths) {
recorder := httptest.NewRecorder()
recorder.Body = bytes.NewBuffer(nil)

actualPaths := paths{EscapedPath: "(none)", RawPath: "(none)", Path: "(none)"}
r := NewRoute()
r.Get("/*", func(resp http.ResponseWriter, req *http.Request) {
actualPaths.EscapedPath = req.URL.EscapedPath()
actualPaths.RawPath = req.URL.RawPath
actualPaths.Path = req.URL.Path
})

req, err := http.NewRequest("GET", reqPath, nil)
assert.NoError(t, err)
r.ServeHTTP(recorder, req)
assert.Equal(t, expectedPaths, actualPaths, "req path = %q", reqPath)
}

// RawPath could be empty if the EscapedPath is the same as escape(Path) and it is already normalized
testPath("/", paths{EscapedPath: "/", RawPath: "", Path: "/"})
testPath("//", paths{EscapedPath: "/", RawPath: "/", Path: "/"})
testPath("/%2f", paths{EscapedPath: "/%2f", RawPath: "/%2f", Path: "//"})
testPath("///a//b/", paths{EscapedPath: "/a/b", RawPath: "/a/b", Path: "/a/b"})

defer test.MockVariableValue(&setting.UseSubURLPath, true)()
defer test.MockVariableValue(&setting.AppSubURL, "/sub-path")()
testPath("/", paths{EscapedPath: "(none)", RawPath: "(none)", Path: "(none)"}) // 404
testPath("/sub-path", paths{EscapedPath: "/", RawPath: "/", Path: "/"})
testPath("/sub-path/", paths{EscapedPath: "/", RawPath: "/", Path: "/"})
testPath("/sub-path//a/b///", paths{EscapedPath: "/a/b", RawPath: "/a/b", Path: "/a/b"})
testPath("/sub-path/%2f/", paths{EscapedPath: "/%2f", RawPath: "/%2f", Path: "//"})
// "/v2" is special for OCI container registry, it should always be in the root of the site
testPath("/v2", paths{EscapedPath: "/v2", RawPath: "/v2", Path: "/v2"})
testPath("/v2/", paths{EscapedPath: "/v2", RawPath: "/v2", Path: "/v2"})
testPath("/v2/%2f", paths{EscapedPath: "/v2/%2f", RawPath: "/v2/%2f", Path: "/v2//"})
}
68 changes: 13 additions & 55 deletions routers/common/middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,23 @@ import (

"gitea.com/go-chi/session"
"github.com/chi-middleware/proxy"
chi "github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5"
)

// ProtocolMiddlewares returns HTTP protocol related middlewares, and it provides a global panic recovery
func ProtocolMiddlewares() (handlers []any) {
// first, normalize the URL path
handlers = append(handlers, normalizeRequestPathMiddleware)
// make sure chi uses EscapedPath(RawPath) as RoutePath, then "%2f" could be handled correctly
handlers = append(handlers, func(next http.Handler) http.Handler {
return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) {
ctx := chi.RouteContext(req.Context())
if req.URL.RawPath == "" {
ctx.RoutePath = req.URL.EscapedPath()
} else {
ctx.RoutePath = req.URL.RawPath
}
next.ServeHTTP(resp, req)
})
})

// prepare the ContextData and panic recovery
handlers = append(handlers, func(next http.Handler) http.Handler {
Expand Down Expand Up @@ -75,58 +85,6 @@ func ProtocolMiddlewares() (handlers []any) {
return handlers
}

func normalizeRequestPathMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) {
// escape the URL RawPath to ensure that all routing is done using a correctly escaped URL
req.URL.RawPath = req.URL.EscapedPath()

urlPath := req.URL.RawPath
rctx := chi.RouteContext(req.Context())
if rctx != nil && rctx.RoutePath != "" {
urlPath = rctx.RoutePath
}

normalizedPath := strings.TrimRight(urlPath, "/")
// the following code block is a slow-path for replacing all repeated slashes "//" to one single "/"
// if the path doesn't have repeated slashes, then no need to execute it
if strings.Contains(normalizedPath, "//") {
buf := &strings.Builder{}
prevWasSlash := false
for _, chr := range normalizedPath {
if chr != '/' || !prevWasSlash {
buf.WriteRune(chr)
}
prevWasSlash = chr == '/'
}
normalizedPath = buf.String()
}

if setting.UseSubURLPath {
remainingPath, ok := strings.CutPrefix(normalizedPath, setting.AppSubURL+"/")
if ok {
normalizedPath = "/" + remainingPath
} else if normalizedPath == setting.AppSubURL {
normalizedPath = "/"
} else if !strings.HasPrefix(normalizedPath+"/", "/v2/") {
// do not respond to other requests, to simulate a real sub-path environment
http.Error(resp, "404 page not found, sub-path is: "+setting.AppSubURL, http.StatusNotFound)
return
}
// TODO: it's not quite clear about how req.URL and rctx.RoutePath work together.
// Fortunately, it is only used for debug purpose, we have enough time to figure it out in the future.
req.URL.RawPath = normalizedPath
req.URL.Path = normalizedPath
}

if rctx == nil {
req.URL.Path = normalizedPath
} else {
rctx.RoutePath = normalizedPath
}
next.ServeHTTP(resp, req)
})
}

func Sessioner() func(next http.Handler) http.Handler {
return session.Sessioner(session.Options{
Provider: setting.SessionConfig.Provider,
Expand Down
70 changes: 0 additions & 70 deletions routers/common/middleware_test.go

This file was deleted.

3 changes: 2 additions & 1 deletion routers/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ func NormalRoutes() *web.Route {
if setting.Packages.Enabled {
// This implements package support for most package managers
r.Mount("/api/packages", packages_router.CommonRoutes())
// This implements the OCI API (Note this is not preceded by /api but is instead /v2)
// This implements the OCI API, this container registry "/v2" endpoint must be in the root of the site.
// If site admin deploys Gitea in a sub-path, they must configure their reverse proxy to map the "https://host/v2" endpoint to Gitea.
r.Mount("/v2", packages_router.ContainerRoutes())
}

Expand Down
19 changes: 12 additions & 7 deletions services/context/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/optional"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/translation"
"code.gitea.io/gitea/modules/web/middleware"

Expand Down Expand Up @@ -142,23 +143,27 @@ func (b *Base) RemoteAddr() string {
return b.Req.RemoteAddr
}

// Params returns the param on route
func (b *Base) Params(p string) string {
s, _ := url.PathUnescape(chi.URLParam(b.Req, strings.TrimPrefix(p, ":")))
// Params returns the param in request path, eg: "/{var}" => "/a%2fb", then `var == "a/b"`
func (b *Base) Params(name string) string {
s, err := url.PathUnescape(b.PathParamRaw(name))
if err != nil && !setting.IsProd {
panic("Failed to unescape path param: " + err.Error() + ", there seems to be a double-unescaping bug")
}
return s
}

func (b *Base) PathParamRaw(p string) string {
return chi.URLParam(b.Req, strings.TrimPrefix(p, ":"))
// PathParamRaw returns the raw param in request path, eg: "/{var}" => "/a%2fb", then `var == "a%2fb"`
func (b *Base) PathParamRaw(name string) string {
return chi.URLParam(b.Req, strings.TrimPrefix(name, ":"))
}

// ParamsInt64 returns the param on route as int64
// ParamsInt64 returns the param in request path as int64
func (b *Base) ParamsInt64(p string) int64 {
v, _ := strconv.ParseInt(b.Params(p), 10, 64)
return v
}

// SetParams set params into routes
// SetParams set request path params into routes
func (b *Base) SetParams(k, v string) {
chiCtx := chi.RouteContext(b)
chiCtx.URLParams.Add(strings.TrimPrefix(k, ":"), url.PathEscape(v))
Expand Down
6 changes: 3 additions & 3 deletions services/context/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -1006,12 +1006,12 @@ func RepoRefByType(refType RepoRefType, ignoreNotExistErr ...bool) func(*Context
if refType == RepoRefLegacy {
// redirect from old URL scheme to new URL scheme
prefix := strings.TrimPrefix(setting.AppSubURL+strings.ToLower(strings.TrimSuffix(ctx.Req.URL.Path, ctx.Params("*"))), strings.ToLower(ctx.Repo.RepoLink))

ctx.Redirect(path.Join(
redirect := path.Join(
ctx.Repo.RepoLink,
util.PathEscapeSegments(prefix),
ctx.Repo.BranchNameSubURL(),
util.PathEscapeSegments(ctx.Repo.TreePath)))
util.PathEscapeSegments(ctx.Repo.TreePath))
ctx.Redirect(redirect)
return cancel
}
}
Expand Down
Loading

0 comments on commit d32648b

Please sign in to comment.