Skip to content
Permalink
Browse files
Support large list type writing (#22)
* Support large list type writing

Implements #14. It took a few different tries, but it turns out this
isn't too bad to support. Basically, we check incoming columns if they
need to be large list types, and wrap them in a new `LargeList{T}` type,
which the writing methods can intercept to write the type, fieldnodes,
and buffers out correctly with.

* fix tests
  • Loading branch information
quinnj committed Oct 3, 2020
1 parent ac39f5c commit d90cc595a3e7c0953b23c9eef590d38e624e3b49
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 57 deletions.
@@ -19,9 +19,8 @@ Tables = "1" # should be 1.1 for Tables.partitions
SentinelArrays = "1"

[extras]
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "Random", "PooledArrays"]
test = ["Test", "Random"]
@@ -18,6 +18,11 @@ type. So each supported writing type needs to define `default`.
"""
function default end

struct LargeList{T} end

offsettype(::Type{LargeList{T}}) where {T} = Int64
offsettype(T) = Int32

default(T) = zero(T)

finaljuliatype(T) = T
@@ -100,11 +105,16 @@ end
juliaeltype(f::Meta.Field, b::Union{Meta.Utf8, Meta.LargeUtf8}) = String

function arrowtype(b, ::Type{String})
# To support LargeUtf8, we'd need a way to flag/pass a max length from user/actual data
Meta.utf8Start(b)
return Meta.Utf8, Meta.utf8End(b), nothing
end

function arrowtype(b, ::Type{LargeList{String}})
Meta.largUtf8Start(b)
return Meta.LargeUtf8, Meta.largUtf8End(b), nothing
end

default(::Type{LargeList{T}}) where {T} = default(T)
default(::Type{String}) = ""

datasizeof(x) = sizeof(x)
@@ -113,11 +123,15 @@ datasizeof(x::AbstractVector) = sum(datasizeof, x)
juliaeltype(f::Meta.Field, b::Union{Meta.Binary, Meta.LargeBinary}) = Vector{UInt8}

function arrowtype(b, ::Type{Vector{UInt8}})
# To support LargeBinary, we'd need a way to flag/pass a max length from user/actual data
Meta.binaryStart(b)
return Meta.Binary, Meta.binaryEnd(b), nothing
end

function arrowtype(b, ::Type{LargeList{Vector{UInt8}}})
Meta.largeBinaryStart(b)
return Meta.Binary, Meta.largeBinaryEnd(b), nothing
end

function default(::Type{A}) where {A <: AbstractVector{T}} where {T}
a = similar(A, 1)
a[1] = default(T)
@@ -301,6 +315,12 @@ function arrowtype(b, ::Type{Vector{T}}) where {T}
return Meta.List, Meta.listEnd(b), children
end

function arrowtype(b, ::Type{LargeList{Vector{T}}}) where {T}
children = [fieldoffset(b, -1, "", T, nothing, nothing)]
Meta.largeListStart(b)
return Meta.List, Meta.largeListEnd(b), children
end

function juliaeltype(f::Meta.Field, list::Meta.FixedSizeList)
type, _ = juliaeltype(f.children[1])
return NTuple{Int(list.listSize), type}

0 comments on commit d90cc59

Please sign in to comment.