Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serve LFS/attachment with http.ServeContent to Support Range-Request #18448

Closed
wants to merge 13 commits into from
30 changes: 30 additions & 0 deletions integrations/download_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package integrations

import (
"mime"
"net/http"
"testing"

Expand Down Expand Up @@ -91,3 +92,32 @@ func TestDownloadRawTextFileWithMimeTypeMapping(t *testing.T) {
delete(setting.MimeTypeMap.Map, ".xml")
setting.MimeTypeMap.Enabled = false
}

func TestDownloadRawBinaryFileWithoutMimeTypeMapping(t *testing.T) {
defer prepareTestEnv(t)()

session := loginUser(t, "user2")

req := NewRequest(t, "GET", "/user2/repo2/raw/branch/master/bin.foo")
6543 marked this conversation as resolved.
Show resolved Hide resolved
resp := session.MakeRequest(t, req, http.StatusOK)

assert.Equal(t, "application/octet-stream", resp.HeaderMap.Get("Content-Type"))
}

func TestDownloadRawBinaryFileWithMimeTypeMapping(t *testing.T) {
defer prepareTestEnv(t)()

setting.MimeTypeMap.Map[".foo"] = "audio/foo"
setting.MimeTypeMap.Enabled = true
_ = mime.AddExtensionType(".foo", "audio/foo")

session := loginUser(t, "user2")

req := NewRequest(t, "GET", "/user2/repo2/raw/branch/master/bin.foo")
resp := session.MakeRequest(t, req, http.StatusOK)

assert.Equal(t, "audio/foo", resp.HeaderMap.Get("Content-Type"))

delete(setting.MimeTypeMap.Map, ".foo")
setting.MimeTypeMap.Enabled = false
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1032bbf17fbc0d9c95bb5418dabe8f8c99278700
808eedf6b8dd519aa89b59af2d815ed668580fc2
12 changes: 12 additions & 0 deletions modules/charset/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,18 @@ func RemoveBOMIfPresent(content []byte) []byte {
return content
}

// DetectEncodingFromReader
// Read the head 1024 bytes from the reader and detect it's encoding
// Note: you may need reader.Seek(0, io.SeekStart) to reset the offset
func DetectEncodingFromReader(reader io.Reader) (string, error) {
buf := make([]byte, 1024)
n, err := util.ReadAtMost(reader, buf)
if err != nil {
return "", fmt.Errorf("DetectEncoding io error: %w", err)
}
return DetectEncoding(buf[:n])
}

// DetectEncoding detect the encoding of content
func DetectEncoding(content []byte) (string, error) {
// First we check if the content represents valid utf8 content excepting a truncated character at the end.
Expand Down
2 changes: 1 addition & 1 deletion modules/lfs/content_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func (s *ContentStore) Verify(pointer Pointer) (bool, error) {
}

// ReadMetaObject will read a git_model.LFSMetaObject and return a reader
func ReadMetaObject(pointer Pointer) (io.ReadCloser, error) {
func ReadMetaObject(pointer Pointer) (io.ReadSeekCloser, error) {
contentStore := NewContentStore()
return contentStore.Get(pointer)
}
Expand Down
11 changes: 10 additions & 1 deletion modules/setting/mime_type_map.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@

package setting

import "strings"
import (
"mime"
"strings"

"code.gitea.io/gitea/modules/log"
)

// MimeTypeMap defines custom mime type mapping settings
var MimeTypeMap = struct {
Expand All @@ -21,6 +26,10 @@ func newMimeTypeMap() {
m := make(map[string]string, len(keys))
for _, key := range keys {
m[strings.ToLower(key.Name())] = key.Value()
err := mime.AddExtensionType(key.Name(), key.Value())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please dont add unrelated changes in here

if err != nil {
log.Warn("mime.AddExtensionType(%s,%s): %v", key.Name(), key.Value(), err)
}
}
MimeTypeMap.Map = m
if len(keys) > 0 {
Expand Down
43 changes: 35 additions & 8 deletions modules/typesniffer/typesniffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ package typesniffer
import (
"fmt"
"io"
"mime"
"net/http"
"path/filepath"
"regexp"
"strings"

Expand Down Expand Up @@ -36,32 +38,32 @@ type SniffedType struct {

// IsText etects if content format is plain text.
func (ct SniffedType) IsText() bool {
return strings.Contains(ct.contentType, "text/")
return strings.HasPrefix(ct.contentType, "text/")
}

// IsImage detects if data is an image format
func (ct SniffedType) IsImage() bool {
return strings.Contains(ct.contentType, "image/")
return strings.HasPrefix(ct.contentType, "image/")
}

// IsSvgImage detects if data is an SVG image format
func (ct SniffedType) IsSvgImage() bool {
return strings.Contains(ct.contentType, SvgMimeType)
return strings.HasPrefix(ct.contentType, SvgMimeType)
}

// IsPDF detects if data is a PDF format
func (ct SniffedType) IsPDF() bool {
return strings.Contains(ct.contentType, "application/pdf")
return strings.HasPrefix(ct.contentType, "application/pdf")
}

// IsVideo detects if data is an video format
func (ct SniffedType) IsVideo() bool {
return strings.Contains(ct.contentType, "video/")
return strings.HasPrefix(ct.contentType, "video/")
}

// IsAudio detects if data is an video format
func (ct SniffedType) IsAudio() bool {
return strings.Contains(ct.contentType, "audio/")
return strings.HasPrefix(ct.contentType, "audio/")
}

// IsRepresentableAsText returns true if file content can be represented as
Expand All @@ -70,6 +72,11 @@ func (ct SniffedType) IsRepresentableAsText() bool {
return ct.IsText() || ct.IsSvgImage()
}

// Mime return the mime
func (ct SniffedType) Mime() string {
return strings.Split(ct.contentType, ";")[0]
}

// DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty.
func DetectContentType(data []byte) SniffedType {
if len(data) == 0 {
Expand All @@ -82,15 +89,35 @@ func DetectContentType(data []byte) SniffedType {
data = data[:sniffLen]
}

if (strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")) && svgTagRegex.Match(data) ||
strings.Contains(ct, "text/xml") && svgTagInXMLRegex.Match(data) {
if (strings.HasPrefix(ct, "text/plain") || strings.HasPrefix(ct, "text/html")) && svgTagRegex.Match(data) ||
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not keep Contains?

strings.HasPrefix(ct, "text/xml") && svgTagInXMLRegex.Match(data) {
// SVG is unsupported. https://github.com/golang/go/issues/15888
ct = SvgMimeType
}

return SniffedType{ct}
}

// DetectContentTypeExtFirst
// detect content type by `name` first, if not found, detect by `reader`
// Note: you may need `reader.Seek(0, io.SeekStart)` to reset the offset
func DetectContentTypeExtFirst(name string, bytesOrReader interface{}) (SniffedType, error) {
ct := mime.TypeByExtension(filepath.Ext(name))
// FIXME: Not sure if it's necessary to keep the old behavior.
// if ct != "" && !strings.HasPrefix(ct, "text/") {
if ct != "" {
return SniffedType{ct}, nil
}
if r, ok := bytesOrReader.(io.Reader); ok {
st, err := DetectContentTypeFromReader(r)
if nil != err {
return SniffedType{}, err
}
return st, nil
}
return DetectContentType(bytesOrReader.([]byte)), nil
}

// DetectContentTypeFromReader guesses the content type contained in the reader.
func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) {
buf := make([]byte, sniffLen)
Expand Down
6 changes: 3 additions & 3 deletions routers/api/v1/repo/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ func GetRawFileOrLFS(ctx *context.APIContext) {
}

// OK not cached - serve!
if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, blob.Size(), bytes.NewReader(buf)); err != nil {
if err := common.ServeLargeFile(ctx.Context, ctx.Repo.TreePath, lastModified, bytes.NewReader(buf)); err != nil {
ctx.ServerError("ServeBlob", err)
}
return
Expand All @@ -189,7 +189,7 @@ func GetRawFileOrLFS(ctx *context.APIContext) {
return
}

if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, blob.Size(), bytes.NewReader(buf)); err != nil {
if err := common.ServeLargeFile(ctx.Context, ctx.Repo.TreePath, lastModified, bytes.NewReader(buf)); err != nil {
ctx.ServerError("ServeBlob", err)
}
return
Expand Down Expand Up @@ -219,7 +219,7 @@ func GetRawFileOrLFS(ctx *context.APIContext) {
}
defer lfsDataRc.Close()

if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, meta.Size, lfsDataRc); err != nil {
if err := common.ServeLargeFile(ctx.Context, ctx.Repo.TreePath, lastModified, lfsDataRc); err != nil {
ctx.ServerError("ServeData", err)
}
}
Expand Down
70 changes: 51 additions & 19 deletions routers/common/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ package common
import (
"fmt"
"io"
"path"
"net/http"
"path/filepath"
"strconv"
"strings"
"time"

Expand All @@ -22,7 +23,8 @@ import (
"code.gitea.io/gitea/modules/util"
)

// ServeBlob download a git.Blob
// ServeBlob serve git.Blob which represents a normal(non-lfs) file stored in repositories
// todo: implement io.Seeker for git.Blob.blobReader to support Range-Request
func ServeBlob(ctx *context.Context, blob *git.Blob, lastModified time.Time) error {
if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) {
return nil
Expand All @@ -38,49 +40,75 @@ func ServeBlob(ctx *context.Context, blob *git.Blob, lastModified time.Time) err
}
}()

return ServeData(ctx, ctx.Repo.TreePath, blob.Size(), dataRc)
}

// ServeData download file from io.Reader
func ServeData(ctx *context.Context, name string, size int64, reader io.Reader) error {
buf := make([]byte, 1024)
n, err := util.ReadAtMost(reader, buf)
n, err := util.ReadAtMost(dataRc, buf)
if err != nil {
return err
}
if n >= 0 {
buf = buf[:n]
}

ctx.Resp.Header().Set("Cache-Control", "public,max-age=86400")

size := blob.Size()
if size >= 0 {
ctx.Resp.Header().Set("Content-Length", fmt.Sprintf("%d", size))
ctx.Resp.Header().Set("Content-Length", strconv.FormatInt(size, 10))
} else {
log.Error("ServeData called to serve data: %s with size < 0: %d", name, size)
log.Error("ServeData called to serve data: %s with size < 0: %d", ctx.Repo.TreePath, size)
}

if err := setCommonHeaders(ctx, ctx.Repo.TreePath, buf); err != nil {
return err
}

_, err = ctx.Resp.Write(buf)
if err != nil {
return err
}
name = path.Base(name)
_, err = io.Copy(ctx.Resp, dataRc)
return err
}

func setCommonHeaders(ctx *context.Context, name string, data interface{}) error {
// Google Chrome dislike commas in filenames, so let's change it to a space
name = strings.ReplaceAll(name, ",", " ")

st := typesniffer.DetectContentType(buf)
ctx.Resp.Header().Set("Cache-Control", "public, max-age=300")

// reset the offset to the start of served file
if seeker, ok := data.(io.ReadSeeker); ok {
_, _ = seeker.Seek(0, io.SeekStart)
}

st, err := typesniffer.DetectContentTypeExtFirst(name, data)
if nil != err {
return err
}

mappedMimeType := ""
if setting.MimeTypeMap.Enabled {
fileExtension := strings.ToLower(filepath.Ext(name))
mappedMimeType = setting.MimeTypeMap.Map[fileExtension]
}

if st.IsText() || ctx.FormBool("render") {
cs, err := charset.DetectEncoding(buf)
var cs string
var err error
if reader, ok := data.(io.ReadSeeker); ok {
cs, err = charset.DetectEncodingFromReader(reader)
_, _ = reader.Seek(0, io.SeekStart)
} else {
cs, err = charset.DetectEncoding(data.([]byte))
}
if err != nil {
log.Error("Detect raw file %s charset failed: %v, using by default utf-8", name, err)
cs = "utf-8"
}

if mappedMimeType == "" {
mappedMimeType = "text/plain"
}
ctx.Resp.Header().Set("Content-Type", mappedMimeType+"; charset="+strings.ToLower(cs))

} else {
ctx.Resp.Header().Set("Access-Control-Expose-Headers", "Content-Disposition")
if mappedMimeType != "" {
Expand All @@ -102,10 +130,14 @@ func ServeData(ctx *context.Context, name string, size int64, reader io.Reader)
}
}

_, err = ctx.Resp.Write(buf)
if err != nil {
return nil
}

// ServeLargeFile Serve files stored with Git LFS and attachments uploaded on the Releases page
func ServeLargeFile(ctx *context.Context, name string, time time.Time, reader io.ReadSeeker) error {
if err := setCommonHeaders(ctx, name, reader); err != nil {
return err
}
_, err = io.Copy(ctx.Resp, reader)
return err
http.ServeContent(ctx.Resp, ctx.Req, name, time, reader)
return nil
}
2 changes: 1 addition & 1 deletion routers/web/repo/attachment.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ func GetAttachment(ctx *context.Context) {
}
defer fr.Close()

if err = common.ServeData(ctx, attach.Name, attach.Size, fr); err != nil {
if err = common.ServeLargeFile(ctx, attach.Name, attach.CreatedUnix.AsTime(), fr); err != nil {
ctx.ServerError("ServeData", err)
return
}
Expand Down
2 changes: 1 addition & 1 deletion routers/web/repo/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func ServeBlobOrLFS(ctx *context.Context, blob *git.Blob, lastModified time.Time
log.Error("ServeBlobOrLFS: Close: %v", err)
}
}()
return common.ServeData(ctx, ctx.Repo.TreePath, meta.Size, lfsDataRc)
return common.ServeLargeFile(ctx, ctx.Repo.TreePath, meta.CreatedUnix.AsTime(), lfsDataRc)
}
if err = dataRc.Close(); err != nil {
log.Error("ServeBlobOrLFS: Close: %v", err)
Expand Down