Skip to content

Commit

Permalink
Added charset encoding support when read properties; now only for quo…
Browse files Browse the repository at this point in the history
…ted-printable enconding type
  • Loading branch information
jerecui committed Feb 29, 2012
1 parent c0e7f70 commit bbd9b6e
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 52 deletions.
@@ -0,0 +1,6 @@
BEGIN:VCARD
VERSION:2.1
N;ENCODING=QUOTED-PRINTABLE;CHARSET=UTF-8:;=
=E9=99=88=E4=B8=BD=E5=90=9B;;;
TEL;PREF;VOICE;CELL:18777777719
END:VCARD
9 changes: 8 additions & 1 deletion Solution/Thought.vCards.UnitTests/SampleCards.Designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions Solution/Thought.vCards.UnitTests/SampleCards.resx
Expand Up @@ -112,12 +112,12 @@
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<assembly alias="System.Windows.Forms" name="System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
<assembly alias="System.Windows.Forms" name="System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
<data name="Outlook2007" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>Resources\Outlook2007.txt;System.Byte[], mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
Expand All @@ -133,6 +133,9 @@
<data name="RfcAuthors" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>Resources\RfcAuthors.txt;System.Byte[], mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
<data name="UnicodeNameSample" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>resources\unicodenamesample.txt;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
<data name="XPalmWithPhoto" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>Resources\XPalmWithPhoto.txt;System.Byte[], mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
Expand Down
13 changes: 13 additions & 0 deletions Solution/Thought.vCards.UnitTests/Samples/OutlookTests.cs
Expand Up @@ -428,6 +428,19 @@ public void ParseOutlookSimple()

#endregion

#region [ ParseOutlookSimple ]

[Test]
public void ParseUnicodeSimple()
{
vCard card = new vCard(
new StreamReader(new MemoryStream(SampleCards.UnicodeNameSample)));

Assert.NotNull(card);
Assert.AreEqual("³ÂÀö¾ý", card.GivenName);
}

#endregion

}
}
Expand Up @@ -102,6 +102,7 @@
</ItemGroup>
<ItemGroup>
<Content Include="ReadMe.txt" />
<Content Include="Resources\UnicodeNameSample.txt" />
<None Include="packages.config" />
<None Include="Resources\Outlook2007.txt" />
<None Include="Resources\RfcAuthors.txt" />
Expand Down
138 changes: 90 additions & 48 deletions Solution/Thought.vCards/vCardStandardReader.cs
@@ -1,8 +1,7 @@

/* =======================================================================
* vCard Library for .NET
* Copyright (c) 2007-2009 David Pinch; http://wwww.thoughtproject.com
* See LICENSE.TXT for licensing information.
/* =======================================================================
* vCard Library for .NET
* Copyright (c) 2007-2009 David Pinch; http://wwww.thoughtproject.com
* See LICENSE.TXT for licensing information.
* ======================================================================= */

using System;
Expand All @@ -26,13 +25,13 @@ public class vCardStandardReader : vCardReader
/// The DeliveryAddressTypeNames array contains the recognized
/// TYPE values for an ADR (delivery address).
/// </summary>
private readonly string[] DeliveryAddressTypeNames = new string[] {
"DOM", // Domestic address
"INTL", // International address
"POSTAL", // Postal address
"PARCEL", // Parcel delivery address
"HOME", // Home address
"WORK", // Work address
private readonly string[] DeliveryAddressTypeNames = new string[] {
"DOM", // Domestic address
"INTL", // International address
"POSTAL", // Postal address
"PARCEL", // Parcel delivery address
"HOME", // Home address
"WORK", // Work address
"PREF" }; // Preferred address


Expand All @@ -42,26 +41,26 @@ public class vCardStandardReader : vCardReader
/// classification of a phone. The names are used with
/// the TEL property.
/// </summary>
private readonly string[] PhoneTypeNames = new string[] {
"BBS",
"CAR",
"CELL",
"FAX",
"HOME",
"ISDN",
"MODEM",
"MSG",
"PAGER",
"PREF",
"VIDEO",
"VOICE",
private readonly string[] PhoneTypeNames = new string[] {
"BBS",
"CAR",
"CELL",
"FAX",
"HOME",
"ISDN",
"MODEM",
"MSG",
"PAGER",
"PREF",
"VIDEO",
"VOICE",
"WORK" };

/// <summary>
/// The state of the quoted-printable decoder (private).
/// </summary>
/// <remarks>
/// The <see cref="DecodeQuotedPrintable"/> function
/// The <see cref="DecodeQuotedPrintable(string)"/> function
/// is a utility function that parses a string that
/// has been encoded with the QUOTED-PRINTABLE format.
/// The function is implemented as a state-pased parser
Expand All @@ -80,10 +79,9 @@ private enum QuotedPrintableState
/// <summary>
/// Initializes a new instance of the <see cref="vCardStandardReader"/>.
/// </summary>
public vCardStandardReader() : base()
{
}

public vCardStandardReader()
: base()
{ }

#region [ DecodeBase64(string) ]

Expand Down Expand Up @@ -368,26 +366,39 @@ public static int DecodeHexadecimal(char value)
#endregion

#region [ DecodeQuotedPrintable ]
/// <summary>
///
/// </summary>
/// <param name="value"></param>
/// <returns></returns>
public static string DecodeQuotedPrintable(string value)
{
return DecodeQuotedPrintable(value, Encoding.Default);
}

/// <summary>
/// Decodes a string that has been encoded in QUOTED-PRINTABLE format.
/// </summary>
/// <param name="value">
/// A string that has been encoded in QUOTED-PRINTABLE.
/// </param>
/// <param name="encoding">
/// charset encoding
/// </param>
/// <returns>
/// The decoded string.
/// </returns>
public static string DecodeQuotedPrintable(string value)
public static string DecodeQuotedPrintable(string value, Encoding encoding)
{

if (string.IsNullOrEmpty(value))
return value;

StringBuilder builder = new StringBuilder();
char firstHexChar = '\x0';
QuotedPrintableState state = QuotedPrintableState.None;

System.Collections.Generic.List<Char> charList = new System.Collections.Generic.List<Char>();

foreach (char c in value)
{

Expand All @@ -409,7 +420,7 @@ public static string DecodeQuotedPrintable(string value)
}
else
{
builder.Append(c);
charList.Add(c);
}
break;

Expand Down Expand Up @@ -451,7 +462,7 @@ public static string DecodeQuotedPrintable(string value)
// character and assume this equal sign marks
// the beginning of a sequence.

builder.Append('=');
charList.Add('=');
state = QuotedPrintableState.ExpectingHexChar1;

}
Expand All @@ -463,8 +474,9 @@ public static string DecodeQuotedPrintable(string value)
// not a hex digit, a carriage return, or an
// equal sign. It is bad data.

builder.Append('=');
builder.Append(c);
charList.Add('=');
charList.Add(c);

state = QuotedPrintableState.None;
}
break;
Expand All @@ -487,7 +499,8 @@ public static string DecodeQuotedPrintable(string value)
(DecodeHexadecimal(firstHexChar) << 4) +
DecodeHexadecimal(c);

builder.Append((char)charValue);
charList.Add((char)charValue);

state = QuotedPrintableState.None;

}
Expand All @@ -500,9 +513,9 @@ public static string DecodeQuotedPrintable(string value)
// the partial sequence is dumped to the output
// and skipped.

builder.Append('=');
builder.Append(firstHexChar);
builder.Append(c);
charList.Add('=');
charList.Add(firstHexChar);
charList.Add(c);
state = QuotedPrintableState.None;

}
Expand Down Expand Up @@ -531,7 +544,7 @@ public static string DecodeQuotedPrintable(string value)
}
else
{
builder.Append(c);
charList.Add(c);
state = QuotedPrintableState.None;
}

Expand All @@ -547,21 +560,29 @@ public static string DecodeQuotedPrintable(string value)
switch (state)
{
case QuotedPrintableState.ExpectingHexChar1:
builder.Append('=');
charList.Add('=');
break;

case QuotedPrintableState.ExpectingHexChar2:
builder.Append('=');
builder.Append(firstHexChar);
charList.Add('=');
charList.Add(firstHexChar);
break;

case QuotedPrintableState.ExpectingLineFeed:
builder.Append('=');
builder.Append('\r');
charList.Add('=');
charList.Add('\r');
break;
}

return builder.ToString();
var by = new byte[charList.Count];
for (int i = 0; i < charList.Count; i++)
{
by[i] = Convert.ToByte(charList[i]);
}

var ret = encoding.GetString(by);

return ret;

}

Expand Down Expand Up @@ -2077,6 +2098,14 @@ public vCardProperty ReadProperty(TextReader reader)
property.Subproperties.GetValue("ENCODING",
new string[] { "B", "BASE64", "QUOTED-PRINTABLE" });

var hasCharset = property.Subproperties.Contains("CHARSET");
var charsetEncoding = Encoding.Default;
if (hasCharset)
{
var charsetEncodingName = property.Subproperties.GetValue("CHARSET");
charsetEncoding = GetCharsetEncoding(charsetEncodingName);
}

// Convert the encoding name into its corresponding
// vCardEncoding enumeration value.

Expand Down Expand Up @@ -2135,7 +2164,7 @@ public vCardProperty ReadProperty(TextReader reader)
break;

case vCardEncoding.QuotedPrintable:
property.Value = DecodeQuotedPrintable(rawValue);
property.Value = DecodeQuotedPrintable(rawValue, charsetEncoding);
break;

default:
Expand All @@ -2149,6 +2178,19 @@ public vCardProperty ReadProperty(TextReader reader)

}


private Encoding GetCharsetEncoding(string encodingName)
{
switch (encodingName)
{
case "UTF-8":
return Encoding.UTF8;
case "ASCII":
return Encoding.ASCII;
default:
return Encoding.GetEncoding(encodingName);
}
}
#endregion

}
Expand Down

0 comments on commit bbd9b6e

Please sign in to comment.