Skip to content
Permalink
Browse files
Add refpool, refarray and levels for DictEncoded (#161)
  • Loading branch information
dmbates committed Apr 1, 2021
1 parent 86ef345 commit df08c9c5ca22311329cd5e2cd7d8a68c16d0d583
Showing 2 changed files with 18 additions and 1 deletion.
@@ -279,6 +279,12 @@ function compress(Z::Meta.CompressionType, comp, x::A) where {A <: DictEncoded}
return Compressed{Z, A}(x, [validity, inds], len, nc, Compressed[])
end

function DataAPI.levels(x::DictEncoded)
rp = DataAPI.refpool(x) # may contain missing values
Missing <: eltype(rp) || return rp
convert(AbstractArray{nonmissingtype(eltype(rp))}, deleteat!(rp, ismissing.(rp)))
end

function makenodesbuffers!(col::DictEncoded{T, S}, fieldnodes, fieldbuffers, bufferoffset, alignment) where {T, S}
len = length(col)
nc = nullcount(col)
@@ -297,6 +303,10 @@ function makenodesbuffers!(col::DictEncoded{T, S}, fieldnodes, fieldbuffers, buf
return bufferoffset
end

DataAPI.refarray(x::DictEncoded{T, S}) where {T, S} = x.indices .+ one(S)

DataAPI.refpool(x::DictEncoded) = copy(x.encoding.data)

function writebuffer(io, col::DictEncoded, alignment)
@debug 1 "writebuffer: col = $(typeof(col))"
@debug 2 col
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

using Test, Arrow, Tables, Dates, PooledArrays, TimeZones, UUIDs, CategoricalArrays
using Test, Arrow, Tables, Dates, PooledArrays, TimeZones, UUIDs, CategoricalArrays, DataAPI

include(joinpath(dirname(pathof(Arrow)), "../test/testtables.jl"))
include(joinpath(dirname(pathof(Arrow)), "../test/integrationtest.jl"))
@@ -208,6 +208,13 @@ av = Arrow.toarrowvector(CategoricalArray(["a", "bb", "ccc"]))
@test length(av) == 3
@test eltype(av) == String

# 120
x = PooledArray(["hey", missing])
x2 = Arrow.toarrowvector(x)
@test eltype(DataAPI.refpool(x)) == Union{Missing, String}
@test eltype(DataAPI.levels(x)) == String
@test DataAPI.refarray(x) == [1, 2]

# 121
a = PooledArray(repeat(string.('S', 1:130), inner=5), compress=true)
@test eltype(a.refs) == UInt8

0 comments on commit df08c9c

Please sign in to comment.