Skip to content
Permalink
Browse files
Pass "ArrowVector" through arrowvector calls (#46)
If users construct `ArrowVector` types themselves, or read, then write,
we can be a bit more efficient by not making copies.
  • Loading branch information
quinnj committed Oct 23, 2020
1 parent 799ef10 commit 54ebbc617b2e933354293fd37e77b2c817289dda
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 18 deletions.
@@ -44,6 +44,8 @@ end
return v
end

arrowvector(::BoolType, x::BoolVector, i, nl, fi, de, ded, meta; kw...) = x

function arrowvector(::BoolType, x, i, nl, fi, de, ded, meta; kw...)
validity = ValidityBitmap(x)
len = length(x)
@@ -75,6 +75,8 @@ dictencodeid(colidx, nestedlevel, fieldid) = (Int64(nestedlevel) << 48) | (Int64
getid(d::DictEncoded) = d.encoding.id
getid(c::Compressed{Z, A}) where {Z, A <: DictEncoded} = c.data.encoding.id

arrowvector(::DictEncodedType, x::DictEncoded, i, nl, fi, de, ded, meta; kw...) = x

function arrowvector(::DictEncodedType, x, i, nl, fi, de, ded, meta; dictencode::Bool=false, dictencodenested::Bool=false, kw...)
@assert x isa DictEncode
id = x.id == -1 ? dictencodeid(i, nl, fi) : x.id
@@ -83,6 +83,8 @@ end
return x, (i + 1, chunk, chunk_i, len)
end

arrowvector(::FixedSizeListType, x::FixedSizeList, i, nl, fi, de, ded, meta; kw...) = x

function arrowvector(::FixedSizeListType, x, i, nl, fi, de, ded, meta; kw...)
len = length(x)
validity = ValidityBitmap(x)
@@ -173,6 +173,8 @@ end
return x, (i, chunk, chunk_i, chunk_len, len)
end

arrowvector(::ListType, x::List, i, nl, fi, de, ded, meta; kw...) = x

function arrowvector(::ListType, x, i, nl, fi, de, ded, meta; largelists::Bool=false, kw...)
len = length(x)
validity = ValidityBitmap(x)
@@ -37,6 +37,8 @@ end
keyvalues(KT, ::Missing) = missing
keyvalues(KT, x::AbstractDict) = [KT(k, v) for (k, v) in pairs(x)]

arrowvector(::MapType, x::Map, i, nl, fi, de, ded, meta; kw...) = x

function arrowvector(::MapType, x, i, nl, fi, de, ded, meta; largelists::Bool=false, kw...)
len = length(x)
validity = ValidityBitmap(x)
@@ -58,6 +58,8 @@ end
return v
end

arrowvector(::PrimitiveType, x::Primitive, i, nl, fi, de, ded, meta; kw...) = x

function arrowvector(::PrimitiveType, x, i, nl, fi, de, ded, meta; kw...)
validity = ValidityBitmap(x)
return Primitive(eltype(x), UInt8[], validity, x, length(x), meta)
@@ -70,6 +70,8 @@ Base.@propagate_inbounds function Base.getindex(A::ToStruct{T, j}, i::Integer) w
return x === missing ? ArrowTypes.default(T) : getfield(x, j)
end

arrowvector(::StructType, x::Struct, i, nl, fi, de, ded, meta; kw...) = x

function arrowvector(::StructType, x, i, nl, fi, de, ded, meta; kw...)
len = length(x)
validity = ValidityBitmap(x)
@@ -151,24 +151,6 @@ Base.@propagate_inbounds function Base.getindex(A::ToSparseUnion{T}, i::Integer)
return @inbounds x isa T ? x : ArrowTypes.default(T)
end

arrowvector(U::Union, x, i, nl, fi, de, ded, meta; denseunions::Bool=true, kw...) =
arrowvector(denseunions ? DenseUnionVector(x) : SparseUnionVector(x), i, nl, fi, de, ded, meta; denseunions=denseunions, kw...)

function arrowvector(::UnionType, x, i, nl, fi, de, ded, meta; kw...)
UT = eltype(x)
if unionmode(UT) == Meta.UnionMode.Dense
x = x isa DenseUnionVector ? x.itr : x
typeids, offsets, data = todense(UT, x)
data2 = map(y -> arrowvector(y[2], i, nl + 1, y[1], de, ded, nothing; kw...), enumerate(data))
return DenseUnion{UT, typeof(data2)}(UInt8[], UInt8[], typeids, offsets, data2, meta)
else
x = x isa SparseUnionVector ? x.itr : x
typeids = sparsetypeids(UT, x)
data3 = Tuple(arrowvector(ToSparseUnion(fieldtype(eltype(UT), j), x), i, nl + 1, j, de, ded, nothing; kw...) for j = 1:fieldcount(eltype(UT)))
return SparseUnion{UT, typeof(data3)}(UInt8[], typeids, data3, meta)
end
end

function compress(Z::Meta.CompressionType, comp, x::A) where {A <: DenseUnion}
len = length(x)
nc = nullcount(x)
@@ -208,6 +190,26 @@ end
return v
end

arrowvector(U::Union, x, i, nl, fi, de, ded, meta; denseunions::Bool=true, kw...) =
arrowvector(denseunions ? DenseUnionVector(x) : SparseUnionVector(x), i, nl, fi, de, ded, meta; denseunions=denseunions, kw...)

arrowvector(::UnionType, x::Union{DenseUnion, SparseUnion}, i, nl, fi, de, ded, meta; kw...) = x

function arrowvector(::UnionType, x, i, nl, fi, de, ded, meta; kw...)
UT = eltype(x)
if unionmode(UT) == Meta.UnionMode.Dense
x = x isa DenseUnionVector ? x.itr : x
typeids, offsets, data = todense(UT, x)
data2 = map(y -> arrowvector(y[2], i, nl + 1, y[1], de, ded, nothing; kw...), enumerate(data))
return DenseUnion{UT, typeof(data2)}(UInt8[], UInt8[], typeids, offsets, data2, meta)
else
x = x isa SparseUnionVector ? x.itr : x
typeids = sparsetypeids(UT, x)
data3 = Tuple(arrowvector(ToSparseUnion(fieldtype(eltype(UT), j), x), i, nl + 1, j, de, ded, nothing; kw...) for j = 1:fieldcount(eltype(UT)))
return SparseUnion{UT, typeof(data3)}(UInt8[], typeids, data3, meta)
end
end

function compress(Z::Meta.CompressionType, comp, x::A) where {A <: SparseUnion}
len = length(x)
nc = nullcount(x)

0 comments on commit 54ebbc6

Please sign in to comment.