Permalink
Browse files

MIME4J-211: Add an optional fallback charset argument to DecoderUtil.…

…decodeEncodedWords

Contributed by TzeKai Lee <chikei at gmail.com>

git-svn-id: https://svn.apache.org/repos/asf/james/mime4j/trunk@1240783 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
1 parent 54f25b5 commit f5ea3a491d251cfd97c3a7e1cc1f20b31940d672 @ok2c ok2c committed Feb 5, 2012
@@ -142,6 +142,38 @@ static String decodeEncodedWords(String body) {
* @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing)
*/
public static String decodeEncodedWords(String body, DecodeMonitor monitor) throws IllegalArgumentException {
+ return decodeEncodedWords(body, monitor, null);
+ }
+
+ /**
+ * Decodes a string containing encoded words as defined by RFC 2047. Encoded
+ * words have the form =?charset?enc?encoded-text?= where enc is either 'Q'
+ * or 'q' for quoted-printable and 'B' or 'b' for base64. Using fallback
+ * charset if charset in encoded words is invalid.
+ *
+ * @param body the string to decode
+ * @param fallback the fallback Charset to be used.
+ * @return the decoded string.
+ * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing)
+ */
+ public static String decodeEncodedWords(String body, Charset fallback) throws IllegalArgumentException {
+ return decodeEncodedWords(body, null, fallback);
+ }
+
+ /**
+ * Decodes a string containing encoded words as defined by RFC 2047. Encoded
+ * words have the form =?charset?enc?encoded-text?= where enc is either 'Q'
+ * or 'q' for quoted-printable and 'B' or 'b' for base64. Using fallback
+ * charset if charset in encoded words is invalid.
+ *
+ * @param body the string to decode
+ * @param monitor the DecodeMonitor to be used.
+ * @param fallback the fallback Charset to be used.
+ * @return the decoded string.
+ * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing)
+ */
+ public static String decodeEncodedWords(String body, DecodeMonitor monitor, Charset fallback)
+ throws IllegalArgumentException {
int tailIndex = 0;
boolean lastMatchValid = false;
@@ -154,7 +186,7 @@ public static String decodeEncodedWords(String body, DecodeMonitor monitor) thro
String encodedText = matcher.group(4);
String decoded = null;
- decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor);
+ decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor, fallback);
if (decoded == null) {
sb.append(matcher.group(0));
} else {
@@ -178,12 +210,16 @@ public static String decodeEncodedWords(String body, DecodeMonitor monitor) thro
// return null on error
private static String tryDecodeEncodedWord(final String mimeCharset,
- final String encoding, final String encodedText, final DecodeMonitor monitor) {
+ final String encoding, final String encodedText, final DecodeMonitor monitor, final Charset fallback) {
Charset charset = CharsetUtil.lookup(mimeCharset);
if (charset == null) {
- monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
- "Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset");
- return null;
+ if(fallback == null) {
+ monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
+ "Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset");
+ return null;
+ } else {
+ charset = fallback;
+ }
}
if (encodedText.length() == 0) {
@@ -20,6 +20,7 @@
package org.apache.james.mime4j.codec;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
import junit.framework.TestCase;
@@ -67,6 +68,18 @@ public void testDecodeJapaneseEncodedWords() {
+ "\u30B8\u30CD\u30B9\u306E\u6C7A\u5B9A\u7248\u3067\u3059\uFF01", dec);
}
+ public void testDecodeJapaneseEncodedWordsWithFallback(){
+ String enc = "=?random?B?GyRCTCQbKEobJEI+NRsoShskQkJ6GyhKGyRCOS0bKEo=?= "
+ + "=?garbage?B?GyRCOXAbKEobJEIiKBsoShskQiU1GyhKGyRCJSQbKEo=?= "
+ + "=?charset?B?GyRCJUkbKEobJEIlUxsoShskQiU4GyhKGyRCJU0bKEo=?= "
+ + "=?name?B?GyRCJTkbKEobJEIkThsoShskQjdoGyhKGyRCRGobKEo=?= "
+ + "=?trash?B?GyRCSEcbKEobJEIkRxsoShskQiQ5GyhKGyRCISobKEo=?=";
+
+ String dec = DecoderUtil.decodeEncodedWords(enc, Charset.forName("ISO-2022-JP"));
+ assertEquals("\u672A\u627F\u8AFE\u5E83\u544A\u203B\u30B5\u30A4\u30C9\u30D3"
+ + "\u30B8\u30CD\u30B9\u306E\u6C7A\u5B9A\u7248\u3067\u3059\uFF01", dec);
+ }
+
public void testInvalidEncodedWordsAreIgnored() {
assertEquals("=?iso8859-1?Q?=", DecoderUtil.decodeEncodedWords("=?iso8859-1?Q?="));
assertEquals("=?iso8859-1?b?=", DecoderUtil.decodeEncodedWords("=?iso8859-1?b?="));

0 comments on commit f5ea3a4

Please sign in to comment.