Skip to content

Commit

Permalink
Merge pull request #231 from Slamdunk/dont_charset_decode_attachments
Browse files Browse the repository at this point in the history
Do not charset-decode attachments
  • Loading branch information
Slamdunk committed Oct 6, 2017
2 parents f6a611a + f8bc695 commit 3a050b3
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 30 deletions.
14 changes: 7 additions & 7 deletions src/Message/AbstractPart.php
Expand Up @@ -168,11 +168,11 @@ final public function getParameters(): Parameters
/**
* Part charset.
*
* @return string
* @return null|string
*/
final public function getCharset(): string
final public function getCharset()
{
return $this->parameters->get('charset') ?: 'us-ascii';
return $this->parameters->get('charset') ?: null;
}

/**
Expand Down Expand Up @@ -274,9 +274,9 @@ final public function getDecodedContent(): string
$content = \quoted_printable_decode($content);
}

// If this part is a text part, try to convert its encoding to UTF-8.
// We don't want to convert an attachment's encoding.
if (self::TYPE_TEXT === $this->getType()) {
// If this part is a text part, convert its charset to UTF-8.
// We don't want to decode an attachment's charset.
if (!$this instanceof Attachment && null !== $this->getCharset() && self::TYPE_TEXT === $this->getType()) {
$content = Transcoder::decode($content, $this->getCharset());
}

Expand Down Expand Up @@ -308,7 +308,7 @@ final protected function doGetContent(string $partNumber): string
*
* @param \stdClass $structure
*/
final protected function parseStructure(\stdClass $structure)
private function parseStructure(\stdClass $structure)
{
$this->type = $this->typesMap[$structure->type] ?? self::TYPE_UNKNOWN;

Expand Down
2 changes: 1 addition & 1 deletion src/Message/PartInterface.php
Expand Up @@ -41,7 +41,7 @@ public function getNumber(): int;
*
* @return string
*/
public function getCharset(): string;
public function getCharset();

/**
* Part type.
Expand Down
118 changes: 96 additions & 22 deletions tests/MessageTest.php
Expand Up @@ -6,11 +6,13 @@

use Ddeboer\Imap\Exception\InvalidDateHeaderException;
use Ddeboer\Imap\Exception\UnsupportedCharsetException;
use Ddeboer\Imap\Message;
use Ddeboer\Imap\Message\EmailAddress;
use Ddeboer\Imap\Message\Parameters;
use Ddeboer\Imap\MessageIterator;
use Ddeboer\Imap\Search;
use Zend\Mime\Mime;
use Zend\Mail;
use Zend\Mime;

/**
* @covers \Ddeboer\Imap\Connection::expunge
Expand All @@ -27,10 +29,16 @@
*/
final class MessageTest extends AbstractTest
{
/**
* @var \Ddeboer\Imap\Mailbox
*/
protected $mailbox;
private static $charsets = [
'ASCII' => '! "#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
'GB18030' => " 、。〃々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〝〞〡〢〣〤〥〦〧〨〩〾一\u{200b}\u{200b}丂踰\u{200b}\u{200b}\u{200b}",
'ISO-8859-6' => 'ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىي',
'ISO-8859-7' => 'ΆΈΉΊ»Ό½ΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟ2ΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ',
'SJIS' => '。「」、・ヲァィゥェォャュョッBーアイウエオカキクケコサシスセソCタチツテトナニヌネノハヒフヘホマDミムメモヤユヨラリルレロワン゙゚',
'UTF-8' => '€✔',
'Windows-1251' => 'ЂЃѓЉЊЌЋЏђљњќћџЎўЈҐЁЄЇІіґёєјЅѕїАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя',
'Windows-1252' => 'ƒŠŒŽšœžŸªºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ',
];

protected function setUp()
{
Expand Down Expand Up @@ -88,30 +96,19 @@ public function testBodyCharsets(string $charset = null, string $charList, strin

public function provideCharsets(): array
{
$charsets = [
'ASCII' => '! "#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
'GB18030' => " 、。〃々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〝〞〡〢〣〤〥〦〧〨〩〾一\u{200b}\u{200b}丂踰\u{200b}\u{200b}\u{200b}",
'ISO-8859-6' => 'ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىي',
'ISO-8859-7' => 'ΆΈΉΊ»Ό½ΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟ2ΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ',
'SJIS' => '。「」、・ヲァィゥェォャュョッBーアイウエオカキクケコサシスセソCタチツテトナニヌネノハヒフヘホマDミムメモヤユヨラリルレロワン゙゚',
'UTF-8' => '€✔',
'Windows-1251' => 'ЂЃѓЉЊЌЋЏђљњќћџЎўЈҐЁЄЇІіґёєјЅѕїАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя',
'Windows-1252' => 'ƒŠŒŽšœžŸªºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ',
];

$provider = [];

// This first data set mimics "us-ascii" imap server default settings
$provider[] = [null, $charsets['ASCII'], null];
$provider[] = [null, self::$charsets['ASCII'], null];

$encodings = [
Mime::ENCODING_7BIT,
Mime::ENCODING_8BIT,
Mime::ENCODING_QUOTEDPRINTABLE,
Mime::ENCODING_BASE64,
Mime\Mime::ENCODING_7BIT,
Mime\Mime::ENCODING_8BIT,
Mime\Mime::ENCODING_QUOTEDPRINTABLE,
Mime\Mime::ENCODING_BASE64,
];

foreach ($charsets as $charset => $charList) {
foreach (self::$charsets as $charset => $charList) {
foreach ($encodings as $encoding) {
$provider[] = [$charset, $charList, $encoding];
}
Expand Down Expand Up @@ -594,4 +591,81 @@ public function testMultipartMessageWithoutCharset()
$this->assertSame('MyPlain', \rtrim($message->getBodyText()));
$this->assertSame('MyHtml', \rtrim($message->getBodyHtml()));
}

public function testAttachmentMustNotBeCharsetDecoded()
{
$parts = [];
foreach (self::$charsets as $charset => $charList) {
$part = new Mime\Part(\mb_convert_encoding($charList, $charset, 'UTF-8'));
$part->setType('text/xml');
$part->setEncoding(Mime\Mime::ENCODING_BASE64);
$part->setCharset($charset);
$part->setDisposition(Mime\Mime::DISPOSITION_ATTACHMENT);
$part->setFilename(\sprintf('%s.xml', $charset));
$parts[] = $part;
}

$mimeMessage = new Mime\Message();
$mimeMessage->setParts($parts);

$message = new Mail\Message();
$message->addFrom('from@here.com');
$message->addTo('to@there.com');
$message->setSubject('Charsets');
$message->setBody($mimeMessage);

$messageString = $message->toString();
$messageString = \preg_replace('/; charset=.+/', '', $messageString);

$this->mailbox->addMessage($messageString);

$message = $this->mailbox->getMessage(1);

$this->resetAttachmentCharset($message);
$this->assertTrue($message->hasAttachments());
$attachments = $message->getAttachments();
$this->assertCount(\count(self::$charsets), $attachments);

foreach ($attachments as $attachment) {
$charset = \str_replace('.xml', '', $attachment->getFilename());
$this->assertSame(\mb_convert_encoding(self::$charsets[$charset], $charset, 'UTF-8'), $attachment->getDecodedContent());
}
}

private function resetAttachmentCharset(Message $message)
{
// Mimic GMAIL behaviour that correctly doesn't report charset
// of attachments that don't have it
$refMessage = new \ReflectionClass($message);
$refAbstractMessage = $refMessage->getParentClass();
$refAbstractPart = $refAbstractMessage->getParentClass();

$refParts = $refAbstractPart->getProperty('parts');
$refParts->setAccessible(true);
$refParts->setValue($message, []);
$refParts->setAccessible(false);

$refStructure = $refAbstractPart->getProperty('structure');
$refStructure->setAccessible(true);
$structure = $refStructure->getValue($message);
foreach ($structure->parts as $partIndex => $part) {
if ($part->ifdisposition && 'attachment' === $part->disposition) {
foreach ($part->parameters as $parameterIndex => $parameter) {
if ('charset' === $parameter->attribute) {
unset($structure->parts[$partIndex]->parameters[$parameterIndex]);
}
}
if (0 === \count($part->parameters)) {
$part->ifparameters = 0;
}
}
}
$refStructure->setValue($message, $structure);
$refStructure->setAccessible(false);

$refParseStructure = $refAbstractPart->getMethod('parseStructure');
$refParseStructure->setAccessible(true);
$refParseStructure->invoke($message, $structure);
$refParseStructure->setAccessible(false);
}
}

0 comments on commit 3a050b3

Please sign in to comment.