Skip to content

Commit

Permalink
add new option SetCustomParseMediaType (#308)
Browse files Browse the repository at this point in the history
* add new option SetCustomParseMediaType to customise mediatype parsing
  • Loading branch information
vadzappa committed Oct 18, 2023
1 parent 9d1f8fe commit b88939a
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 21 deletions.
21 changes: 10 additions & 11 deletions detect.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@ import (
"strings"

inttp "github.com/jhillyerd/enmime/internal/textproto"
"github.com/jhillyerd/enmime/mediatype"
)

// detectMultipartMessage returns true if the message has a recognized multipart Content-Type header
func detectMultipartMessage(root *Part, multipartWOBoundaryAsSinglepart bool) bool {
// Parse top-level multipart
ctype := root.Header.Get(hnContentType)
mtype, params, _, err := mediatype.Parse(ctype)
mtype, params, _, err := root.parseMediaType(ctype)
if err != nil {
return false
}
Expand All @@ -35,27 +34,27 @@ func detectMultipartMessage(root *Part, multipartWOBoundaryAsSinglepart bool) bo
// - Content-Disposition: attachment; filename="frog.jpg"
// - Content-Disposition: inline; filename="frog.jpg"
// - Content-Type: attachment; filename="frog.jpg"
func detectAttachmentHeader(header inttp.MIMEHeader) bool {
mtype, params, _, _ := mediatype.Parse(header.Get(hnContentDisposition))
func detectAttachmentHeader(root *Part, header inttp.MIMEHeader) bool {
mtype, params, _, _ := root.parseMediaType(header.Get(hnContentDisposition))
if strings.ToLower(mtype) == cdAttachment ||
(strings.ToLower(mtype) == cdInline && len(params) > 0) {
return true
}

mtype, _, _, _ = mediatype.Parse(header.Get(hnContentType))
mtype, _, _, _ = root.parseMediaType(header.Get(hnContentType))
return strings.ToLower(mtype) == cdAttachment
}

// detectTextHeader returns true, if the the MIME headers define a valid 'text/plain' or 'text/html'
// part. If the emptyContentTypeIsPlain argument is set to true, a missing Content-Type header will
// result in a positive plain part detection.
func detectTextHeader(header inttp.MIMEHeader, emptyContentTypeIsText bool) bool {
func detectTextHeader(root *Part, header inttp.MIMEHeader, emptyContentTypeIsText bool) bool {
ctype := header.Get(hnContentType)
if ctype == "" && emptyContentTypeIsText {
return true
}

if mtype, _, _, err := mediatype.Parse(ctype); err == nil {
if mtype, _, _, err := root.parseMediaType(ctype); err == nil {
switch mtype {
case ctTextPlain, ctTextHTML:
return true
Expand All @@ -68,23 +67,23 @@ func detectTextHeader(header inttp.MIMEHeader, emptyContentTypeIsText bool) bool
// detectBinaryBody returns true if the mail header defines a binary body.
func detectBinaryBody(root *Part) bool {
header := inttp.MIMEHeader(root.Header) // Use internal header methods.
if detectTextHeader(header, true) {
if detectTextHeader(root, header, true) {
// It is text/plain, but an attachment.
// Content-Type: text/plain; name="test.csv"
// Content-Disposition: attachment; filename="test.csv"
// Check for attachment only, or inline body is marked
// as attachment, too.
mtype, _, _, _ := mediatype.Parse(header.Get(hnContentDisposition))
mtype, _, _, _ := root.parseMediaType(header.Get(hnContentDisposition))
return strings.ToLower(mtype) == cdAttachment
}

isBin := detectAttachmentHeader(header)
isBin := detectAttachmentHeader(root, header)
if !isBin {
// This must be an attachment, if the Content-Type is not
// 'text/plain' or 'text/html'.
// Example:
// Content-Type: application/pdf; name="doc.pdf"
mtype, _, _, _ := mediatype.Parse(header.Get(hnContentType))
mtype, _, _, _ := root.parseMediaType(header.Get(hnContentType))
mtype = strings.ToLower(mtype)
if mtype != ctTextPlain && mtype != ctTextHTML {
return true
Expand Down
8 changes: 6 additions & 2 deletions detect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,10 @@ func TestDetectAttachmentHeader(t *testing.T) {
},
}

root := &Part{parser: &defaultParser}

for _, s := range htests {
got := detectAttachmentHeader(s.header)
got := detectAttachmentHeader(root, s.header)
if got != s.want {
t.Errorf("detectAttachmentHeader(%v) == %v, want: %v", s.header, got, s.want)
}
Expand Down Expand Up @@ -192,8 +194,10 @@ func TestDetectTextHeader(t *testing.T) {
},
}

root := &Part{parser: &defaultParser}

for _, s := range htests {
got := detectTextHeader(s.header, s.emptyIsPlain)
got := detectTextHeader(root, s.header, s.emptyIsPlain)
if got != s.want {
t.Errorf("detectTextHeader(%v, %v) == %v, want: %v",
s.header, s.emptyIsPlain, got, s.want)
Expand Down
6 changes: 2 additions & 4 deletions envelope.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ import (
"github.com/jaytaylor/html2text"
"github.com/jhillyerd/enmime/internal/coding"
inttp "github.com/jhillyerd/enmime/internal/textproto"
"github.com/jhillyerd/enmime/mediatype"

"github.com/pkg/errors"
)

Expand Down Expand Up @@ -232,7 +230,7 @@ func parseTextOnlyBody(root *Part, e *Envelope) error {
var charset string
var isHTML bool
if ctype := root.Header.Get(hnContentType); ctype != "" {
if mediatype, mparams, _, err := mediatype.Parse(ctype); err == nil {
if mediatype, mparams, _, err := root.parseMediaType(ctype); err == nil {
isHTML = (mediatype == ctTextHTML)
if mparams[hpCharset] != "" {
charset = mparams[hpCharset]
Expand Down Expand Up @@ -271,7 +269,7 @@ func parseTextOnlyBody(root *Part, e *Envelope) error {
func parseMultiPartBody(root *Part, e *Envelope) error {
// Parse top-level multipart
ctype := root.Header.Get(hnContentType)
mediatype, params, _, err := mediatype.Parse(ctype)
mediatype, params, _, err := root.parseMediaType(ctype)
if err != nil {
return fmt.Errorf("unable to parse media type: %v", err)
}
Expand Down
13 changes: 13 additions & 0 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,16 @@ type rawContentOption bool
func (o rawContentOption) apply(p *Parser) {
p.rawContent = bool(o)
}

// SetCustomParseMediaType if provided, will be used to parse media type instead of the default ParseMediaType
// function. This may be used to parse media type parameters that would otherwise be considered malformed.
// By default parsing happens using ParseMediaType
func SetCustomParseMediaType(customParseMediaType CustomParseMediaType) Option {
return parseMediaTypeOption(customParseMediaType)
}

type parseMediaTypeOption CustomParseMediaType

func (o parseMediaTypeOption) apply(p *Parser) {
p.customParseMediaType = CustomParseMediaType(o)
}
90 changes: 90 additions & 0 deletions options_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package enmime

import (
"fmt"
"strings"
"testing"
)

func TestSetCustomParseMediaType(t *testing.T) {
alwaysReturnHTML := func(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) {
return "text/html", nil, nil, err
}
changeAndUtilizeDefault := func(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) {
modifiedStr := strings.ReplaceAll(ctype, "application/Pamir Viewer", "application/PamirViewer")
return ParseMediaType(modifiedStr)
}
tcases := []struct {
ctype string
want string
customParseMediaType CustomParseMediaType
}{
{
ctype: "text/plain",
want: "text/plain",
customParseMediaType: nil,
},
{
ctype: "text/plain",
want: "text/html",
customParseMediaType: alwaysReturnHTML,
},
{
ctype: "text/plain; charset=utf-8",
want: "text/html",
customParseMediaType: alwaysReturnHTML,
},
{
ctype: "application/Pamir Viewer; name=\"2023-384.pmrv\"",
want: "application/pamirviewer",
customParseMediaType: changeAndUtilizeDefault,
},
}

for _, tcase := range tcases {
p := &Part{parser: NewParser(SetCustomParseMediaType(tcase.customParseMediaType))}

got, _, _, _ := p.parseMediaType(tcase.ctype)
if got != tcase.want {
t.Errorf("Parser.parseMediaType(%v) == %v, want: %v",
tcase.ctype, got, tcase.want)
}
}
}

func ExampleSetCustomParseMediaType() {
// for the sake of simplicity replaces space in a very specific invalid content-type: "application/Pamir Viewer"
replaceSpecificContentType := func(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) {
modifiedStr := strings.ReplaceAll(ctype, "application/Pamir Viewer", "application/PamirViewer")

return ParseMediaType(modifiedStr)
}

invalidMessageContent := `From: <enmime@parser.git>
Content-Type: multipart/mixed;
boundary="----=_NextPart_000_000F_01D9FAC6.09EB3B60"
------=_NextPart_000_000F_01D9FAC6.09EB3B60
Content-Type: application/Pamir Viewer;
name="2023-10-13.pmrv"
Content-Transfer-Encoding: base64
Content-Disposition: attachment;
filename="2023-10-13.pmrv"
f6En7vFpNql3tfMkoKABP1iBEf+M/qF6LCAIvyRbpH6uDCqcKKGmH3e6OiqN5eCfqUk=
`

p := NewParser(SetCustomParseMediaType(replaceSpecificContentType))
e, err := p.ReadEnvelope(strings.NewReader(invalidMessageContent))

fmt.Println(err)
fmt.Println(len(e.Attachments))
fmt.Println(e.Attachments[0].ContentType)
fmt.Println(e.Attachments[0].FileName)

// Output:
// <nil>
// 1
// application/pamirviewer
// 2023-10-13.pmrv
}
4 changes: 4 additions & 0 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ func AllowCorruptTextPartErrorPolicy(p *Part, err error) bool {
return false
}

// CustomParseMediaType parses media type. See ParseMediaType for more details
type CustomParseMediaType func(ctype string) (mtype string, params map[string]string, invalidParams []string, err error)

// Parser parses MIME.
// Default parser is a valid one.
type Parser struct {
Expand All @@ -21,6 +24,7 @@ type Parser struct {
readPartErrorPolicy ReadPartErrorPolicy
skipMalformedParts bool
rawContent bool
customParseMediaType CustomParseMediaType
}

// defaultParser is a Parser with default configuration.
Expand Down
15 changes: 11 additions & 4 deletions part.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ import (
"github.com/gogs/chardet"
"github.com/jhillyerd/enmime/internal/coding"
inttp "github.com/jhillyerd/enmime/internal/textproto"
"github.com/jhillyerd/enmime/mediatype"

"github.com/pkg/errors"
)

Expand Down Expand Up @@ -126,7 +124,7 @@ func (p *Part) setupHeaders(r *bufio.Reader, defaultContentType string) error {
ctype = defaultContentType
}
// Parse Content-Type header.
mtype, mparams, minvalidParams, err := mediatype.Parse(ctype)
mtype, mparams, minvalidParams, err := p.parseMediaType(ctype)
if err != nil {
return err
}
Expand All @@ -149,7 +147,7 @@ func (p *Part) setupHeaders(r *bufio.Reader, defaultContentType string) error {
func (p *Part) setupContentHeaders(mediaParams map[string]string) {
header := inttp.MIMEHeader(p.Header)
// Determine content disposition, filename, character set.
disposition, dparams, _, err := mediatype.Parse(header.Get(hnContentDisposition))
disposition, dparams, _, err := p.parseMediaType(header.Get(hnContentDisposition))
if err == nil {
// Disposition is optional
p.Disposition = disposition
Expand Down Expand Up @@ -327,6 +325,15 @@ func (p *Part) decodeContent(r io.Reader, readPartErrorPolicy ReadPartErrorPolic
return nil
}

// parses media type using custom or default media type parser
func (p *Part) parseMediaType(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) {
if p.parser == nil || p.parser.customParseMediaType == nil {
return ParseMediaType(ctype)
}

return p.parser.customParseMediaType(ctype)
}

// IsBase64CorruptInputError returns true when err is of type base64.CorruptInputError.
//
// It can be used to create ReadPartErrorPolicy functions.
Expand Down

0 comments on commit b88939a

Please sign in to comment.