Skip to content

Commit

Permalink
Change library interface for v1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriel-vasile committed Dec 7, 2019
1 parent 514ead3 commit 408d157
Show file tree
Hide file tree
Showing 5 changed files with 375 additions and 406 deletions.
117 changes: 20 additions & 97 deletions mime.go
Original file line number Diff line number Diff line change
@@ -1,125 +1,48 @@
// Package mimetype uses magic number signatures to detect and
// to check the MIME type of a file.
// Package mimetype uses magic number signatures to detect the MIME type of a file.
//
// mimetype stores the list of MIME types in a tree structure with
// "application/octet-stream" at the root of the hierarchy. When the detection
// fails to find any result for an input, the MIME type "application/octet-stream"
// is returned. The hierarchy approach minimises the number of checks that need
// to be done on the input and allows for more precise results once the base
// type of file has been identified.
package mimetype

import (
"io"
"mime"
"os"

"github.com/gabriel-vasile/mimetype/internal/matchers"
)

// Detect returns the MIME type and extension of the provided byte slice.
//
// mime is always a valid MIME type, with application/octet-stream as fallback.
// extension is empty string if detected file format does not have an extension.
func Detect(in []byte) (mime, extension string) {
// Detect returns the MIME type of the provided byte slice.
func Detect(in []byte) (mime *MIME) {
if len(in) == 0 {
return "inode/x-empty", ""
}
n := root.match(in, root)
return n.mime, n.extension
}

// DetectReader returns the MIME type and extension
// of the byte slice read from the provided reader.
//
// mime is always a valid MIME type, with application/octet-stream as fallback.
// extension is empty string if detection failed with an error or
// detected file format does not have an extension.
func DetectReader(r io.Reader) (mime, extension string, err error) {
in := make([]byte, matchers.ReadLimit)
n, err := r.Read(in)
if err != nil && err != io.EOF {
return root.mime, root.extension, err
}
in = in[:n]

mime, extension = Detect(in)
return mime, extension, nil
}

// DetectFile returns the MIME type and extension of the provided file.
//
// mime is always a valid MIME type, with application/octet-stream as fallback.
// extension is empty string if detection failed with an error or
// detected file format does not have an extension.
func DetectFile(file string) (mime, extension string, err error) {
f, err := os.Open(file)
if err != nil {
return root.mime, root.extension, err
}
defer f.Close()

return DetectReader(f)
}

// Matches returns whether the MIME type detected from the slice, or any of its
// aliases, is the same as any of the expected MIME types.
//
// MIME type equality test is done on the "type/subtype" sections, ignores any
// optional MIME parameters, ignores any leading and trailing whitespace,
// and is case insensitive.
// Any error returned is related to the parsing of the expected MIME type.
func Matches(in []byte, expectedMimes ...string) (match bool, err error) {
for i := 0; i < len(expectedMimes); i++ {
expectedMimes[i], _, err = mime.ParseMediaType(expectedMimes[i])
if err != nil {
return false, err
}
return newMIME("inode/x-empty", "", matchers.True)
}

n := root.match(in, root)
// This parsing is needed because some detected MIME types contain parameters.
found, _, err := mime.ParseMediaType(n.mime)
if err != nil {
return false, err
}

for _, expected := range expectedMimes {
if expected == found {
return true, nil
}
for _, alias := range n.aliases {
if alias == expected {
return true, nil
}
}
}

return false, nil
return root.match(in, root)
}

// MatchesReader returns whether the MIME type detected from the reader, or any of its
// aliases, is the same as any of the expected MIME types.
//
// MIME type equality test is done on the "type/subtype" sections, ignores any
// optional MIME parameters, ignores any leading and trailing whitespace,
// and is case insensitive.
func MatchesReader(r io.Reader, expectedMimes ...string) (match bool, err error) {
// DetectReader returns the MIME type of the provided reader.
func DetectReader(r io.Reader) (mime *MIME, err error) {
in := make([]byte, matchers.ReadLimit)
n, err := r.Read(in)
if err != nil && err != io.EOF {
return false, err
return root, err
}
in = in[:n]

return Matches(in, expectedMimes...)
return Detect(in), nil
}

// MatchesFile returns whether the MIME type detected from the file, or any of its
// aliases, is the same as any of the expected MIME types.
//
// MIME type equality test is done on the "type/subtype" sections, ignores any
// optional MIME parameters, ignores any leading and trailing whitespace,
// and is case insensitive.
func MatchesFile(file string, expectedMimes ...string) (match bool, err error) {
// DetectFile returns the MIME type of the provided file.
func DetectFile(file string) (mime *MIME, err error) {
f, err := os.Open(file)
if err != nil {
return false, err
return root, err
}
defer f.Close()

return MatchesReader(f, expectedMimes...)
return DetectReader(f)
}
53 changes: 27 additions & 26 deletions mime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (

const testDataDir = "testdata"

var files = map[string]*node{
var files = map[string]*MIME{
// archives
"pdf.pdf": pdf,
"zip.zip": zip,
Expand Down Expand Up @@ -187,45 +187,45 @@ func TestDetect(t *testing.T) {
t.Fatal(err)
}

if dMime, _ := Detect(data); dMime != node.mime {
t.Errorf(errStr, fName, node.mime, dMime, nil)
if mime := Detect(data); mime.String() != node.mime {
t.Errorf(errStr, fName, node.mime, mime.String(), nil)
}

if _, err := f.Seek(0, io.SeekStart); err != nil {
t.Errorf(errStr, fName, node.mime, root.mime, err)
}

if dMime, _, err := DetectReader(f); dMime != node.mime {
t.Errorf(errStr, fName, node.mime, dMime, err)
if mime, err := DetectReader(f); mime.String() != node.mime {
t.Errorf(errStr, fName, node.mime, mime.String(), err)
}
f.Close()

if dMime, _, err := DetectFile(fileName); dMime != node.mime {
t.Errorf(errStr, fName, node.mime, dMime, err)
if mime, err := DetectFile(fileName); mime.String() != node.mime {
t.Errorf(errStr, fName, node.mime, mime.String(), err)
}
}
}

func TestFaultyInput(t *testing.T) {
inexistent := "inexistent.file"
if _, _, err := DetectFile(inexistent); err == nil {
if _, err := DetectFile(inexistent); err == nil {
t.Errorf("%s should not match successfully", inexistent)
}

f, _ := os.Open(inexistent)
if _, _, err := DetectReader(f); err == nil {
if _, err := DetectReader(f); err == nil {
t.Errorf("%s reader should not match successfully", inexistent)
}
}

func TestEmptyInput(t *testing.T) {
if m, _ := Detect([]byte{}); m != "inode/x-empty" {
if mime := Detect([]byte{}); mime.String() != "inode/x-empty" {
t.Errorf("failed to detect empty file")
}
}

func TestBadBdfInput(t *testing.T) {
if m, _, _ := DetectFile("testdata/bad.dbf"); m != "application/octet-stream" {
if mime, _ := DetectFile("testdata/bad.dbf"); mime.String() != "application/octet-stream" {
t.Errorf("failed to detect bad DBF file")
}
}
Expand Down Expand Up @@ -272,28 +272,29 @@ func TestIndexOutOfRange(t *testing.T) {
}

// MIME type equality ignores any optional MIME parameters, so, in order to not
// parse each alias when testing for equality, we must ensure they are registered
// with no parameters.
func TestAliasesParameters(t *testing.T) {
// parse each alias when testing for equality, we must ensure they are
// registered with no parameters.
func TestMIMEFormat(t *testing.T) {
for _, n := range root.flatten() {
// All extensions must be dot prefixed so they are compatible
// with the stdlib mime package.
if n.Extension() != "" && !strings.HasPrefix(n.Extension(), ".") {
t.Fatalf("")
}
// All MIMEs must be correctly formatted.
_, _, err := mime.ParseMediaType(n.String())
if err != nil {
t.Fatalf("error parsing node MIME: %s", err)
}
// Aliases must have no optional MIME parameters.
for _, a := range n.aliases {
parsed, params, err := mime.ParseMediaType(a)
if err != nil {
t.Fatalf("error parsing node alias: %s", err)
t.Fatalf("error parsing node alias MIME: %s", err)
}
if parsed != a || len(params) > 0 {
t.Fatalf("node alias should have no optional params; alias: %s, params: %v", a, params)
t.Fatalf("node alias MIME should have no optional params; alias: %s, params: %v", a, params)
}
}
}
}

func TestMatch(t *testing.T) {
matches, err := MatchesFile("testdata/zip.zip", "application/x-zip; charset=utf-8")
if err != nil {
t.Fatal(err)
}
if !matches {
t.Errorf("zip file should match application/x-zip")
}
}
79 changes: 62 additions & 17 deletions node.go
Original file line number Diff line number Diff line change
@@ -1,35 +1,80 @@
package mimetype

type (
// node represents a vertex in the matchers tree structure.
// It holds the MIME type, the extension and the function
// to check whether a byte slice has the MIME type.
node struct {
mime string
extension string
aliases []string
matchFunc func([]byte) bool
children []*node
import "mime"

// MIME represents a file format in the tree structure of formats.
type MIME struct {
mime string
aliases []string
extension string
matchFunc func([]byte) bool
children []*MIME
parent *MIME
}

// String returns the string representation of the MIME type, e.g., "application/zip".
func (n *MIME) String() string {
return n.mime
}

// Extension returns the file extension associated with the MIME type.
// It includes the leading dot, as in ".html". When the file format does not
// have an extension, the empty string is returned.
func (n *MIME) Extension() string {
return n.extension
}

// Parent returns the parent MIME type from the tree structure.
// Each MIME type has a non-nil parent, except for the root MIME type.
func (n *MIME) Parent() *MIME {
return n.parent
}

// Is checks whether this MIME type, or any of its aliases, is equal to the
// expected MIME type. MIME type equality test is done on the "type/subtype"
// sections, ignores any optional MIME parameters, ignores any leading and
// trailing whitespace, and is case insensitive.
func (n *MIME) Is(expectedMIME string) bool {
// Parsing is needed because some detected MIME types contain parameters
// that need to be stripped for the comparison.
expectedMIME, _, _ = mime.ParseMediaType(expectedMIME)
found, _, _ := mime.ParseMediaType(n.mime)

if expectedMIME == found {
return true
}
for _, alias := range n.aliases {
if alias == expectedMIME {
return true
}
}
)

func newNode(mime, extension string, matchFunc func([]byte) bool, children ...*node) *node {
return &node{
return false
}

func newMIME(mime, extension string, matchFunc func([]byte) bool, children ...*MIME) *MIME {
n := &MIME{
mime: mime,
extension: extension,
matchFunc: matchFunc,
children: children,
}

for _, c := range children {
c.parent = n
}

return n
}

func (n *node) alias(aliases ...string) *node {
func (n *MIME) alias(aliases ...string) *MIME {
n.aliases = aliases
return n
}

// match does a depth-first search on the matchers tree.
// it returns the deepest successful matcher for which all the children fail.
func (n *node) match(in []byte, deepestMatch *node) *node {
func (n *MIME) match(in []byte, deepestMatch *MIME) *MIME {
for _, c := range n.children {
if c.matchFunc(in) {
return c.match(in, c)
Expand All @@ -39,8 +84,8 @@ func (n *node) match(in []byte, deepestMatch *node) *node {
return deepestMatch
}

func (n *node) flatten() []*node {
out := []*node{n}
func (n *MIME) flatten() []*MIME {
out := []*MIME{n}
for _, c := range n.children {
out = append(out, c.flatten()...)
}
Expand Down

0 comments on commit 408d157

Please sign in to comment.