fixed unicode issues in csv encoder

hivesolutions · Nov 19, 2014 · 7b4501d · 7b4501d
1 parent 0574cba
commit 7b4501d
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 16 deletions.
diff --git a/misc/src/csv_c/serializer.py b/misc/src/csv_c/serializer.py
@@ -126,12 +126,13 @@ def _chunk(object, string_buffer):
         header_value = SEPARATOR_CHARACTER.join(attribute_names) + NEWLINE_CHARACTER
 
         # verifies if the header is encoded as an unicode string
-        # if that's the case it must be converted into a raw
-        # bytes string using the default encoding for writing
+        # if that's not the case it must be converted into an
+        # unicode string using the default encoding for writing
         is_unicode = type(header_value) == colony.legacy.UNICODE
-        if is_unicode: header_value = header_value.encode(DEFAULT_ENCODING)
+        if not is_unicode: header_value = header_value.decode("utf-8")
 
-        # writes the header value to the string buffer
+        # writes the header value to the string buffer, note that
+        # this value is defined as an unicode based string
         string_buffer.write(header_value)
 
     # in case the generator mode is defined we must run the proper
@@ -198,11 +199,11 @@ def _chunk_line(string_buffer, object_item, attribute_names = None, map_mode = F
 
         # writes the separator character and then increments
         # the current index so that it's possible to count values
-        string_buffer.write(SEPARATOR_CHARACTER)
+        string_buffer.write(colony.legacy.u(SEPARATOR_CHARACTER))
         index += 1
 
     # writes the new line in the string buffer
-    string_buffer.write(NEWLINE_CHARACTER)
+    string_buffer.write(colony.legacy.u(NEWLINE_CHARACTER))
 
 def _attribute_names(object_item, object = [], sort = True):
     # creates the first and initial set of attribute names

diff --git a/misc/src/csv_c/test.py b/misc/src/csv_c/test.py
@@ -62,6 +62,9 @@ def test_dumps(self):
         result = self.system.dumps(test_mocks.SIMPLE_OBJECT, encoding = None)
         self.assertEqual(result, test_mocks.SIMPLE_CSV)
 
+        result = self.system.dumps(test_mocks.SIMPLE_RAW, encoding = None)
+        self.assertEqual(result, test_mocks.SIMPLE_CSV)
+
     def test_loads(self):
         result = self.system.loads(test_mocks.SIMPLE_CSV)
         self.assertEqual(result, test_mocks.SIMPLE_RAW)
diff --git a/misc/src/csv_c/test_mocks.py b/misc/src/csv_c/test_mocks.py
@@ -40,25 +40,25 @@
 import colony
 
 SIMPLE_OBJECT = [
-    ["name", "age", "country"],
-    ["João", 24, "Portugal"],
-    ["Michael", 12, "Ireland"]
+    ["age", "country", "name"],
+    [24, "Portugal", "João"],
+    [12, "Ireland", "Michael"]
 ]
 
-SIMPLE_CSV = colony.legacy.u("""name;age;country
-João;24;Portugal
-Michael;12;Ireland
+SIMPLE_CSV = colony.legacy.u("""age;country;name
+24;Portugal;João
+12;Ireland;Michael
 """)
 
 SIMPLE_RAW = [
     dict(
-        name = colony.legacy.u("João"),
         age = colony.legacy.u("24"),
-        country = colony.legacy.u("Portugal")
+        country = colony.legacy.u("Portugal"),
+        name = colony.legacy.u("João")
     ),
     dict(
-        name = colony.legacy.u("Michael"),
         age = colony.legacy.u("12"),
-        country = colony.legacy.u("Ireland")
+        country = colony.legacy.u("Ireland"),
+        name = colony.legacy.u("Michael")
     )
 ]