Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: remove tspecials from type/subtype #317

Merged
merged 8 commits into from Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions header.go
Expand Up @@ -113,6 +113,8 @@ func ParseAddressList(list string) ([]*mail.Address, error) {
// - Missing ';' between content-type and media parameters
// - Repeating media parameters
// - Unquoted values in media parameters containing 'tspecials' characters
jhillyerd marked this conversation as resolved.
Show resolved Hide resolved
//
// Deprecated: Use mediaType.Parse instead
func ParseMediaType(ctype string) (mtype string, params map[string]string, invalidParams []string,
jhillyerd marked this conversation as resolved.
Show resolved Hide resolved
err error) {
// Export of internal function.
Expand Down
25 changes: 23 additions & 2 deletions mediatype/mediatype.go
Expand Up @@ -30,6 +30,10 @@ const (
utf8 = "utf-8"
)

type MediaTypeParseOptions struct {
StripMediaTypeInvalidCharacters bool
}

// Parse is a more tolerant implementation of Go's mime.ParseMediaType function.
//
// Tolerances accounted for:
Expand All @@ -38,8 +42,13 @@ const (
// - Unquoted values in media parameters containing 'tspecials' characters
// - Newline characters
func Parse(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) {
jhillyerd marked this conversation as resolved.
Show resolved Hide resolved
return ParseWithOptions(ctype, MediaTypeParseOptions{})
}

// ParseWithOptions parses media-type with additional options controlling the parsing behavior.
func ParseWithOptions(ctype string, options MediaTypeParseOptions) (mtype string, params map[string]string, invalidParams []string, err error) {
mtype, params, err = mime.ParseMediaType(
fixNewlines(fixUnescapedQuotes(fixUnquotedSpecials(fixMangledMediaType(removeTrailingHTMLTags(ctype), ';')))))
fixNewlines(fixUnescapedQuotes(fixUnquotedSpecials(fixMangledMediaType(removeTrailingHTMLTags(ctype), ';', options)))))
if err != nil {
if err.Error() == "mime: no media type" {
return "", nil, nil, nil
Expand All @@ -63,7 +72,7 @@ func Parse(ctype string) (mtype string, params map[string]string, invalidParams

// fixMangledMediaType is used to insert ; separators into media type strings that lack them, and
// remove repeated parameters.
func fixMangledMediaType(mtype string, sep rune) string {
func fixMangledMediaType(mtype string, sep rune, options MediaTypeParseOptions) string {
strsep := string([]rune{sep})
if mtype == "" {
return ""
Expand All @@ -84,6 +93,10 @@ func fixMangledMediaType(mtype string, sep rune) string {
// The content type is completely missing. Put in a placeholder.
p = ctPlaceholder
}
// Remove invalid characters (specials)
if options.StripMediaTypeInvalidCharacters {
p = removeTypeSpecials(p)
}
// Check for missing token after slash.
if strings.HasSuffix(p, "/") {
switch p {
Expand Down Expand Up @@ -525,3 +538,11 @@ loop:

return value
}

func removeTypeSpecials(value string) string {
for _, r := range []string{"(", ")", "<", ">", "@", ",", ":", "\\", "\"", "[", "]", "?", "="} {
value = strings.ReplaceAll(value, r, "")
}

return value
}
26 changes: 20 additions & 6 deletions mediatype/mediatype_test.go
Expand Up @@ -6,9 +6,10 @@ import (

func TestFixMangledMediaType(t *testing.T) {
testCases := []struct {
input string
sep rune
want string
input string
sep rune
want string
options MediaTypeParseOptions
}{
{
input: "",
Expand Down Expand Up @@ -48,9 +49,9 @@ func TestFixMangledMediaType(t *testing.T) {
},
{
// Removes empty parameters in the middle
input: `Content-Type: text/html; =""; charset=""`,
input: `text/html; =""; charset=""`,
sep: ';',
want: `Content-Type: text/html; charset=""`,
want: `text/html; charset=""`,
},
{
input: "application/octet-stream;=?UTF-8?B?bmFtZT0iw7DCn8KUwoo=?=You've got a new voice miss call.msg",
Expand Down Expand Up @@ -133,10 +134,23 @@ func TestFixMangledMediaType(t *testing.T) {
sep: ';',
want: `application/pdf; name=1337.pdf`,
},
// invalid media type characters
{
input: `text/html>`,
sep: ';',
want: `text/html>`,
},
// invalid media type characters with stripping invalid characters sanitation enabled
{
input: `text/html>`,
sep: ';',
want: `text/html`,
options: MediaTypeParseOptions{StripMediaTypeInvalidCharacters: true},
},
}
for _, tc := range testCases {
t.Run(tc.input, func(t *testing.T) {
got := fixMangledMediaType(tc.input, tc.sep)
got := fixMangledMediaType(tc.input, tc.sep, tc.options)
if got != tc.want {
t.Errorf("got %q, want %q", got, tc.want)
}
Expand Down
12 changes: 12 additions & 0 deletions options.go
Expand Up @@ -76,3 +76,15 @@ type parseMediaTypeOption CustomParseMediaType
func (o parseMediaTypeOption) apply(p *Parser) {
p.customParseMediaType = CustomParseMediaType(o)
}

type stripMediaTypeInvalidCharactersOption bool

func (o stripMediaTypeInvalidCharactersOption) apply(p *Parser) {
p.stripMediaTypeInvalidCharacters = bool(o)
}

// StripMediaTypeInvalidCharacters sets stripMediaTypeInvalidCharacters option. If true, invalid characters
// will be removed from media type during parsing.
func StripMediaTypeInvalidCharacters(stripMediaTypeInvalidCharacters bool) Option {
return stripMediaTypeInvalidCharactersOption(stripMediaTypeInvalidCharacters)
}
1 change: 1 addition & 0 deletions parser.go
Expand Up @@ -25,6 +25,7 @@ type Parser struct {
skipMalformedParts bool
rawContent bool
customParseMediaType CustomParseMediaType
stripMediaTypeInvalidCharacters bool
}

// defaultParser is a Parser with default configuration.
Expand Down
3 changes: 2 additions & 1 deletion part.go
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/gogs/chardet"
"github.com/jhillyerd/enmime/internal/coding"
inttp "github.com/jhillyerd/enmime/internal/textproto"
"github.com/jhillyerd/enmime/mediatype"
"github.com/pkg/errors"
)

Expand Down Expand Up @@ -328,7 +329,7 @@ func (p *Part) decodeContent(r io.Reader, readPartErrorPolicy ReadPartErrorPolic
// parses media type using custom or default media type parser
func (p *Part) parseMediaType(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) {
if p.parser == nil || p.parser.customParseMediaType == nil {
return ParseMediaType(ctype)
return mediatype.ParseWithOptions(ctype, mediatype.MediaTypeParseOptions{StripMediaTypeInvalidCharacters: p.parser.stripMediaTypeInvalidCharacters})
}

return p.parser.customParseMediaType(ctype)
Expand Down
16 changes: 16 additions & 0 deletions part_test.go
Expand Up @@ -1302,3 +1302,19 @@ func TestChardetSuccess(t *testing.T) {
test.ComparePart(t, p, wantp)
})
}

func TestCtypeInvalidCharacters(t *testing.T) {
r := test.OpenTestData("parts", "ctype-invalid-characters.raw")
parser := enmime.NewParser(enmime.StripMediaTypeInvalidCharacters(true))
p, err := parser.ReadParts(r)
if err != nil {
t.Fatal(err)
}

wantp := &enmime.Part{
PartID: "0",
ContentType: "text/plain",
}

test.ComparePart(t, p, wantp)
}
4 changes: 4 additions & 0 deletions testdata/parts/ctype-invalid-characters.raw
@@ -0,0 +1,4 @@
Content-Type: text/plain>

Test of text/plain section with invalid content type