From 8fdddb7dc631bdd4812b2f5ecba55ae003480303 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats <16490766+yevgenypats@users.noreply.github.com> Date: Tue, 18 Apr 2023 23:21:06 +0300 Subject: [PATCH] fix: Disallow null character in strings per utf8 spec --- schema/arrow.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/schema/arrow.go b/schema/arrow.go index a5262dd327..f5fab69de1 100644 --- a/schema/arrow.go +++ b/schema/arrow.go @@ -2,6 +2,7 @@ package schema import ( "fmt" + "strings" "github.com/goccy/go-json" @@ -270,7 +271,9 @@ func CQTypesToRecord(mem memory.Allocator, c []CQTypes, arrowSchema *arrow.Schem } case TypeString: if c[j][i].(*Text).Status == Present { - bldr.Field(i).(*array.StringBuilder).Append(c[j][i].(*Text).Str) + // In the new type system we wont allow null string as they are not valid utf-8 + // https://github.com/apache/arrow/pull/35161#discussion_r1170516104 + bldr.Field(i).(*array.StringBuilder).Append(strings.ReplaceAll(c[j][i].(*Text).Str, "\x00", "")) } else { bldr.Field(i).(*array.StringBuilder).AppendNull() } @@ -285,7 +288,7 @@ func CQTypesToRecord(mem memory.Allocator, c []CQTypes, arrowSchema *arrow.Schem listBldr := bldr.Field(i).(*array.ListBuilder) listBldr.Append(true) for _, str := range c[j][i].(*TextArray).Elements { - listBldr.ValueBuilder().(*array.StringBuilder).Append(str.Str) + listBldr.ValueBuilder().(*array.StringBuilder).Append(strings.ReplaceAll(str.Str, "\x00", "")) } } else { bldr.Field(i).(*array.ListBuilder).AppendNull()