Skip to content

Commit

Permalink
Add content-based mime-type detection
Browse files Browse the repository at this point in the history
  • Loading branch information
dsoprea committed Jul 5, 2019
1 parent e7b8fc1 commit e692ee1
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 0 deletions.
4 changes: 4 additions & 0 deletions data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,7 @@
# lru

An LRU implementation. Accessors available for the first item, last item, list of all keys, count, fullness, membership checks, explicit drops, and popping the oldest in a loop. You can also set a callback for when items are dropped and dump the current contents of the LRU.

# mimetype

Convenience function for determining a mime-type from an `io.Reader`.
41 changes: 41 additions & 0 deletions data/mime.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package ridata

import (
"io"

"net/http"

"github.com/dsoprea/go-logging"
)

const (
MimetypeLeadBytes = 512
)

// GetMimetypeFromContent uses net/http to map from magic-bytes to mime-type.
func GetMimetypeFromContent(r io.Reader, fileSize int) (mimetype string, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()

// TODO(dustin): !! Add test.

leadCount := MimetypeLeadBytes
if fileSize > 0 && fileSize < leadCount {
leadCount = fileSize
}

buffer := make([]byte, leadCount)

n, err := io.ReadFull(r, buffer)
log.PanicIf(err)

buffer = buffer[:n]

// Always returns a valid mime-type.
contentType := http.DetectContentType(buffer)

return contentType, nil
}
63 changes: 63 additions & 0 deletions data/mime_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package ridata

import (
"bytes"
"testing"

"encoding/hex"

"github.com/dsoprea/go-logging"
"github.com/dsoprea/go-utility/filesystem"
)

func TestGetMimetypeFromContent(t *testing.T) {
raw := "ffd8ffe000104a46494600010101004800480000ffe1001845786966000049492a00080000000000000000000000ffed004850686f746f73686f7020332e30003842494d040400000000000f1c015a00031b25471c020000020002003842494d0425000000000010fce11f89c8b7c9782f346234075877ebffdb0043000403030403030404030405040405060a07060606060d090a080a0f0d10100f0d0f0e11131814111217120e0f151c151719191b1b1b10141d1f1d1a1f181a1b1affdb0043010405050605060c07070c1a110f111a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1affc20011080313057803012200021101031101ffc4001c0000000701010000000000000000000000000102030405060708ffc4001b01010101010101010100000000000000000001020304050607ffda000c03010002100310000001d2b6b73c7eb94fa55e6f4d25cd2683a48d16533c6cc4854e95ea42dcdaafb5abd490c496196c386c4427c544789d582b52ed4224b4a4971329a8094a8f429db0b49d5074e7c460f7daeebcb8a1f4da3edcf1e57755be6c2907a1181000140c800cca12879c21aa63234b5245bb1d79932656ab17b0f60f244ee4f638e4bd4bad9000eb800000000000000085a21600a000000000000000057c5833cb3957176be558"

buffer, err := hex.DecodeString(raw)
log.PanicIf(err)

b := bytes.NewBuffer(buffer)

mimetype, err := GetMimetypeFromContent(b, 0)
log.PanicIf(err)

if mimetype != "image/jpeg" {
t.Fatalf("Mime-type not correct: [%s]", mimetype)
}
}

func TestGetMimetypeFromContent__ShortData_Error(t *testing.T) {
raw := "ffd8ffe000104a46494600010101004800480000ffe1001845786966000049492a00080000000000000000000000ffed004850686f746f73686f7020332e30003842494d040400000000000f1c015a00031b25471c020000020002003842494d0425000000000010fce11f89c8b7c9782f346234075877ebffdb0043000403030403030404030405040405060a07060606060d090a080a0f0d10100f0d0f0e11131814111217120e0f151c151719191b1b1b10141d1f1d1a1f181a1b1affdb0043010405050605060c07070c1a110f111a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1affc20011080313057803012200021101031101ffc4001c0000000701010000000000000000000000000102030405060708ffc4001b01010101010101010100000000000000000001020304050607ffda000c03010002100310000001d2b6b73c7eb94fa55e6f4d25cd2683a48d16533c6cc4854e95ea42dcdaafb5abd490c496196c386c4427c544789d582b52ed4224b4a4971329a8094a8f429db0b49d5074e7c460f7daeebcb8a1f4da3edcf1e57755be6c2907a1181000140c800cca12879c21aa63234b5245bb1d79932656ab17b0f60f244ee4f638e4bd4bad9000eb800000000000000085a21600a000000000000000057c5833cb3957176be558"

buffer, err := hex.DecodeString(raw)
log.PanicIf(err)

buffer = buffer[:256]

b := rifs.NewSeekableBufferWithBytes(buffer)

_, err = GetMimetypeFromContent(b, 0)
if err == nil {
t.Fatalf("Expected error for not enough data.")
} else if err.Error() != "unexpected EOF" {
log.Panic(err)
}
}

func TestGetMimetypeFromContent__ShortData_Success(t *testing.T) {
raw := "ffd8ffe000104a46494600010101004800480000ffe1001845786966000049492a00080000000000000000000000ffed004850686f746f73686f7020332e30003842494d040400000000000f1c015a00031b25471c020000020002003842494d0425000000000010fce11f89c8b7c9782f346234075877ebffdb0043000403030403030404030405040405060a07060606060d090a080a0f0d10100f0d0f0e11131814111217120e0f151c151719191b1b1b10141d1f1d1a1f181a1b1affdb0043010405050605060c07070c1a110f111a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1affc20011080313057803012200021101031101ffc4001c0000000701010000000000000000000000000102030405060708ffc4001b01010101010101010100000000000000000001020304050607ffda000c03010002100310000001d2b6b73c7eb94fa55e6f4d25cd2683a48d16533c6cc4854e95ea42dcdaafb5abd490c496196c386c4427c544789d582b52ed4224b4a4971329a8094a8f429db0b49d5074e7c460f7daeebcb8a1f4da3edcf1e57755be6c2907a1181000140c800cca12879c21aa63234b5245bb1d79932656ab17b0f60f244ee4f638e4bd4bad9000eb800000000000000085a21600a000000000000000057c5833cb3957176be558"

buffer, err := hex.DecodeString(raw)
log.PanicIf(err)

buffer = buffer[:256]

b := rifs.NewSeekableBufferWithBytes(buffer)

mimetype, err := GetMimetypeFromContent(b, len(buffer))
log.PanicIf(err)

if mimetype != "image/jpeg" {
t.Fatalf("Mime-type not correct: [%s]", mimetype)
}
}

0 comments on commit e692ee1

Please sign in to comment.