Skip to content

Commit

Permalink
feat: support reading Decimal types from Parquet files.
Browse files Browse the repository at this point in the history
  • Loading branch information
alexpearce committed Jun 10, 2024
1 parent 1d69e69 commit 709aa67
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 2 deletions.
2 changes: 2 additions & 0 deletions lib/explorer/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ defmodule Explorer.Shared do
do: dtype

def normalise_dtype(dtype) when dtype in @scalar_types, do: dtype
def normalise_dtype({:decimal, _, _} = dtype), do: dtype
def normalise_dtype(dtype) when dtype in [:float, :f64], do: {:f, 64}
def normalise_dtype(dtype) when dtype in [:integer, :s64], do: {:s, 64}
def normalise_dtype(:f32), do: {:f, 32}
Expand Down Expand Up @@ -547,6 +548,7 @@ defmodule Explorer.Shared do
"""
def dtype_to_string({:naive_datetime, p}), do: "naive_datetime[#{precision_string(p)}]"
def dtype_to_string({:datetime, p, tz}), do: "datetime[#{precision_string(p)}, #{tz}]"
def dtype_to_string({:decimal, p, s}), do: "decimal[#{p}, #{s}]"
def dtype_to_string({:duration, p}), do: "duration[#{precision_string(p)}]"
def dtype_to_string({:list, dtype}), do: "list[" <> dtype_to_string(dtype) <> "]"
def dtype_to_string({:struct, fields}), do: "struct[#{length(fields)}]"
Expand Down
1 change: 1 addition & 0 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ defmodule Explorer.MixProject do
{:table_rex, "~> 3.1.1 or ~> 4.0.0"},
{:castore, "~> 1.0", optional: true},
{:adbc, "~> 0.1", optional: true},
{:decimal, "~> 2.0"},

## Optional
{:rustler, "~> 0.32.0", optional: not (@dev? or @force_build?)},
Expand Down
1 change: 1 addition & 0 deletions mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"cowboy": {:hex, :cowboy, "2.10.0", "ff9ffeff91dae4ae270dd975642997afe2a1179d94b1887863e43f681a203e26", [:make, :rebar3], [{:cowlib, "2.12.1", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "1.8.0", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "3afdccb7183cc6f143cb14d3cf51fa00e53db9ec80cdcd525482f5e99bc41d6b"},
"cowboy_telemetry": {:hex, :cowboy_telemetry, "0.4.0", "f239f68b588efa7707abce16a84d0d2acf3a0f50571f8bb7f56a15865aae820c", [:rebar3], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "7d98bac1ee4565d31b62d59f8823dfd8356a169e7fcbb83831b8a5397404c9de"},
"cowlib": {:hex, :cowlib, "2.12.1", "a9fa9a625f1d2025fe6b462cb865881329b5caff8f1854d1cbc9f9533f00e1e1", [:make, :rebar3], [], "hexpm", "163b73f6367a7341b33c794c4e88e7dbfe6498ac42dcd69ef44c5bc5507c8db0"},
"decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"},
"deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"},
"dll_loader_helper": {:hex, :dll_loader_helper, "0.1.11", "f553988162a3e4a1f1fa31708922f812f8fe8b67fb6652b9532c2c1c846a68c8", [:make, :mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:cc_precompiler, "~> 0.1", [hex: :cc_precompiler, repo: "hexpm", optional: false]}], "hexpm", "51b3fbc32d8f76356367c68561051d3afc872b80f8c5b7bd566914878e7159b8"},
"dll_loader_helper_beam": {:hex, :dll_loader_helper_beam, "1.2.2", "b86f97ec8fc64770c87468e41969eb309d87b29dd5a439b667e5954f85f8f65a", [:rebar3], [], "hexpm", "0e6119edde0ef5e42b4fe22d7dc71b7462e08573cee977c01a26ec5d9cd94a9a"},
Expand Down
8 changes: 8 additions & 0 deletions native/explorer/src/datatypes/ex_dtypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ pub enum ExSeriesDtype {
Boolean,
Category,
Date,
Decimal(usize, usize),
F(u8),
S(u8),
U(u8),
Expand All @@ -79,6 +80,10 @@ impl TryFrom<&DataType> for ExSeriesDtype {
DataType::Boolean => Ok(ExSeriesDtype::Boolean),
DataType::Categorical(_, _) => Ok(ExSeriesDtype::Category),
DataType::Date => Ok(ExSeriesDtype::Date),
DataType::Decimal(precision, scale) => Ok(ExSeriesDtype::Decimal(
precision.expect("unexpected null decimal precision"),
scale.expect("unexpected null decimal scale"),
)),
DataType::Float64 => Ok(ExSeriesDtype::F(64)),
DataType::Float32 => Ok(ExSeriesDtype::F(32)),
DataType::Int8 => Ok(ExSeriesDtype::S(8)),
Expand Down Expand Up @@ -133,6 +138,9 @@ impl TryFrom<&ExSeriesDtype> for DataType {
Ok(DataType::Categorical(None, CategoricalOrdering::default()))
}
ExSeriesDtype::Date => Ok(DataType::Date),
ExSeriesDtype::Decimal(scale, precision) => {
Ok(DataType::Decimal(Some(*scale), Some(*precision)))
}
ExSeriesDtype::F(64) => Ok(DataType::Float64),
ExSeriesDtype::F(32) => Ok(DataType::Float32),
ExSeriesDtype::F(size) => Err(ExplorerError::Other(format!(
Expand Down
62 changes: 60 additions & 2 deletions native/explorer/src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ use chrono_tz::OffsetName;
use chrono_tz::Tz;

use crate::atoms::{
self, calendar, day, hour, infinity, microsecond, millisecond, minute, month, nan, nanosecond,
neg_infinity, precision, second, std_offset, time_zone, utc_offset, value, year, zone_abbr,
self, calendar, coef, day, exp, hour, infinity, microsecond, millisecond, minute, month, nan,
nanosecond, neg_infinity, precision, second, sign, std_offset, time_zone, utc_offset, value,
year, zone_abbr,
};
use crate::datatypes::{
days_to_date, time64ns_to_time, timestamp_to_datetime, ExSeries, ExSeriesRef,
Expand Down Expand Up @@ -645,6 +646,60 @@ macro_rules! series_to_iovec {
}};
}

// Here we build the Decimal struct manually, as it's much faster than using Decimal NifStruct
// This is because we already have the keys (we know this at compile time), and the types,
// so we can build the struct directly.
#[inline]
fn decimal_struct_keys(env: Env) -> [NIF_TERM; 4] {
return [
atom::__struct__().encode(env).as_c_arg(),
coef().encode(env).as_c_arg(),
exp().encode(env).as_c_arg(),
sign().encode(env).as_c_arg(),
];
}

macro_rules! unsafe_encode_decimal {
($v: ident, $decimal_struct_keys: ident, $decimal_module: ident, $decimal_scale: ident, $env: ident) => {{
let coef = $v.abs();
let sign = $v.signum();
let exp = -($decimal_scale as isize);
unsafe {
Term::new(
$env,
map::make_map_from_arrays(
$env.as_c_arg(),
$decimal_struct_keys,
&[
$decimal_module,
coef.encode($env).as_c_arg(),
exp.encode($env).as_c_arg(),
sign.encode($env).as_c_arg(),
],
)
.unwrap(),
)
}
}};
}

#[inline]
fn decimal_series_to_list<'b>(
s: &Series,
scale: usize,
env: Env<'b>,
) -> Result<Term<'b>, ExplorerError> {
let decimal_struct_keys = &decimal_struct_keys(env);
let decimal_module = atoms::decimal_module().encode(env).as_c_arg();

Ok(unsafe_iterator_series_to_list!(
env,
s.decimal()?.into_iter().map(|option| option
.map(|v| unsafe_encode_decimal!(v, decimal_struct_keys, decimal_module, scale, env))
.encode(env))
))
}

// API

pub fn resource_term_from_value<'b>(
Expand Down Expand Up @@ -738,6 +793,9 @@ pub fn list_from_series(s: ExSeries, env: Env) -> Result<Term, ExplorerError> {
DataType::Datetime(time_unit, Some(time_zone)) => {
datetime_series_to_list(&s, *time_unit, time_zone.clone().to_string(), env)
}
DataType::Decimal(_precision, scale) => {
decimal_series_to_list(&s, scale.expect("unexpected null scale precision"), env)
}
DataType::Duration(time_unit) => duration_series_to_list(&s, *time_unit, env),
DataType::Binary => generic_binary_series_to_list(&s.resource, &s, env),
DataType::String => generic_string_series_to_list(&s, env),
Expand Down
4 changes: 4 additions & 0 deletions native/explorer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ mod atoms {
calendar_iso_module = "Elixir.Calendar.ISO",
date_module = "Elixir.Date",
datetime_module = "Elixir.DateTime",
decimal_module = "Elixir.Decimal",
duration_module = "Elixir.Explorer.Duration",
naive_datetime_module = "Elixir.NaiveDateTime",
time_module = "Elixir.Time",
Expand All @@ -73,6 +74,9 @@ mod atoms {
time_zone,
utc_offset,
zone_abbr,
coef,
exp,
sign,
}
}

Expand Down

0 comments on commit 709aa67

Please sign in to comment.