From e64fbadda6f4f1a9813491b6a3e2decf1aded318 Mon Sep 17 00:00:00 2001 From: Dustin Sallings Date: Sun, 23 Dec 2007 13:44:49 -0800 Subject: [PATCH] Use consistent character sets when transcoding string values. Defaults to UTF-8, but may be supplied at runtime. I'd like more testing around different character sets, but I'm going to have to get someone familiar with another character set to make me a test case. --- .../spy/memcached/SerializingTranscoder.java | 29 ++++++++++++++-- .../memcached/SerializingTranscoderTest.java | 33 +++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/src/main/java/net/spy/memcached/SerializingTranscoder.java b/src/main/java/net/spy/memcached/SerializingTranscoder.java index ebd63cb51..0d6362e69 100644 --- a/src/main/java/net/spy/memcached/SerializingTranscoder.java +++ b/src/main/java/net/spy/memcached/SerializingTranscoder.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.io.UnsupportedEncodingException; import java.util.Date; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; @@ -40,7 +41,10 @@ public final class SerializingTranscoder extends SpyObject private static final int SPECIAL_DOUBLE=(7<<8); private static final int SPECIAL_BYTEARRAY=(8<<8); + private static final String DEFAULT_CHARSET = "UTF-8"; + private int compressionThreshold=DEFAULT_COMPRESSION_THRESHOLD; + private String charset=DEFAULT_CHARSET; /** * Set the compression threshold to the given number of bytes. This @@ -53,6 +57,19 @@ public void setCompressionThreshold(int to) { compressionThreshold=to; } + /** + * Set the character set for string value transcoding (defaults to UTF-8). + */ + public void setCharset(String to) { + // Validate the character set. + try { + new String(new byte[97], to); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + charset=to; + } + /* (non-Javadoc) * @see net.spy.memcached.Transcoder#decode(net.spy.memcached.CachedData) */ @@ -93,7 +110,11 @@ public Object decode(CachedData d) { default: assert false; } } else { - rv=new String(data); + try { + rv=new String(data, charset); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } } return rv; } @@ -106,7 +127,11 @@ public CachedData encode(Object o) { byte[] b=null; int flags=0; if(o instanceof String) { - b=((String)o).getBytes(); + try { + b=((String)o).getBytes(charset); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } } else if(o instanceof Long) { b=encodeLong((Long)o); flags |= SPECIAL_LONG; diff --git a/src/test/java/net/spy/memcached/SerializingTranscoderTest.java b/src/test/java/net/spy/memcached/SerializingTranscoderTest.java index 4dab67397..85894c0a9 100644 --- a/src/test/java/net/spy/memcached/SerializingTranscoderTest.java +++ b/src/test/java/net/spy/memcached/SerializingTranscoderTest.java @@ -2,6 +2,7 @@ package net.spy.memcached; +import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; @@ -45,6 +46,38 @@ public void testStrings() throws Exception { assertEquals(s1, tc.decode(cd)); } + public void testUTF8String() throws Exception { + String s1="\u2013\u00f3\u2013\u00a5\u2014\u00c4\u2013\u221e\u2013" + + "\u2264\u2014\u00c5\u2014\u00c7\u2013\u2264\u2014\u00c9\u2013" + + "\u03c0, \u2013\u00ba\u2013\u220f\u2014\u00c4."; + CachedData cd=tc.encode(s1); + // Test the stringification while we're here. + String exp="{CachedData flags=0 data=[-30, -128, -109, -61, -77, -30, " + + "-128, -109, -62, -91, -30, -128, -108, -61, -124, -30, " + + "-128, -109, -30, -120, -98, -30, -128, -109, -30, -119, " + + "-92, -30, -128, -108, -61, -123, -30, -128, -108, -61, -121, " + + "-30, -128, -109, -30, -119, -92, -30, -128, -108, -61, -119, " + + "-30, -128, -109, -49, -128, 44, 32, -30, -128, -109, -62, -70, " + + "-30, -128, -109, -30, -120, -113, -30, -128, -108, -61, -124, " + + "46]}"; + assertEquals(exp, String.valueOf(cd)); + assertEquals(0, cd.getFlags()); + assertTrue(Arrays.equals(s1.getBytes("UTF-8"), cd.getData())); + assertEquals(s1, tc.decode(cd)); + } + + public void testValidCharacterSet() { + tc.setCharset("KOI8"); + } + + public void testInvalidCharacterSet() { + try { + tc.setCharset("Dustin's Kick Ass Character Set"); + } catch(RuntimeException e) { + assertTrue(e.getCause() instanceof UnsupportedEncodingException); + } + } + public void testCompressedStringNotSmaller() throws Exception { String s1="This is a test simple string that will not be compressed."; // Reduce the compression threshold so it'll attempt to compress it.