Skip to content

Commit

Permalink
Use pool length in signed int conversion (#122)
Browse files Browse the repository at this point in the history
  • Loading branch information
dmbates committed Feb 4, 2021
1 parent 9eefced commit 9ae0a9f
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/arraytypes/dictencoding.jl
Expand Up @@ -90,6 +90,11 @@ isdictencoded(d::DictEncoded) = true
isdictencoded(x) = false
isdictencoded(c::Compressed{Z, A}) where {Z, A <: DictEncoded} = true

function signedtype(n::Integer)
typs = (Int8, Int16, Int32, Int64)
typs[something(findfirst(n .≤ typemax.(typs)), 4)]
end

signedtype(::Type{UInt8}) = Int8
signedtype(::Type{UInt16}) = Int16
signedtype(::Type{UInt32}) = Int32
Expand Down Expand Up @@ -122,7 +127,7 @@ function arrowvector(::DictEncodedType, x, i, nl, fi, de, ded, meta; dictencode:
else
pool = DataAPI.refpool(x)
refa = DataAPI.refarray(x)
inds = copyto!(similar(Vector{signedtype(eltype(refa))}, length(refa)), refa)
inds = copyto!(similar(Vector{signedtype(length(pool))}, length(refa)), refa)
end
# horrible hack? yes. better than taking CategoricalArrays dependency? also yes.
if typeof(pool).name.name == :CategoricalRefPool
Expand Down
7 changes: 7 additions & 0 deletions test/runtests.jl
Expand Up @@ -235,6 +235,13 @@ av = Arrow.toarrowvector(CategoricalArray(["a", "bb", "ccc"]))
@test length(av) == 3
@test eltype(av) == String

# 121

a = PooledArray(repeat(string.('S', 1:130), inner=5), compress=true)
@test eltype(a.refs) == UInt8
av = Arrow.toarrowvector(a)
@test eltype(av.indices) == Int16

end # @testset "misc"

end

0 comments on commit 9ae0a9f

Please sign in to comment.