From b01b21c01024401603c892b1bc5305f39183ce35 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Tue, 15 Mar 2016 14:38:14 +0000 Subject: [PATCH] SerDeUtil must use ISO-8859-1 actually --- .../src/main/scala/org/apache/spark/api/python/SerDeUtil.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala index bf9bb3d299b2a..55db938f09a91 100644 --- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala +++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala @@ -69,7 +69,8 @@ private[spark] object SerDeUtil extends Logging { construct(args ++ Array("")) } else if (args.length == 2 && args(1).isInstanceOf[String]) { val typecode = args(0).asInstanceOf[String].charAt(0) - val data: Array[Byte] = args(1).asInstanceOf[String].getBytes(StandardCharsets.UTF_8) + // This must be ISO 8859-1 / Latin 1, not UTF-8, to interoperate correctly + val data = args(1).asInstanceOf[String].getBytes(StandardCharsets.ISO_8859_1) construct(typecode, machineCodes(typecode), data) } else { super.construct(args)