Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove nan filtering, add null filtering #676

Merged
merged 2 commits into from
Aug 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions cpp/perspective/src/cpp/base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,11 +295,11 @@ filter_op_to_str(t_filter_op op) {
case FILTER_OP_AND: {
return "and";
} break;
case FILTER_OP_IS_NAN: {
return "is_nan";
case FILTER_OP_IS_NULL: {
return "is null";
} break;
case FILTER_OP_IS_NOT_NAN: {
return "!is_nan";
case FILTER_OP_IS_NOT_NULL: {
return "is not null";
} break;
case FILTER_OP_IS_VALID: {
return "is not None";
Expand Down Expand Up @@ -340,10 +340,10 @@ str_to_filter_op(std::string str) {
return t_filter_op::FILTER_OP_AND;
} else if (str == "|") {
return t_filter_op::FILTER_OP_OR;
} else if (str == "is nan" || str == "is_nan") {
return t_filter_op::FILTER_OP_IS_NAN;
} else if (str == "is not nan" || str == "!is_nan") {
return t_filter_op::FILTER_OP_IS_NOT_NAN;
} else if (str == "is null") {
return t_filter_op::FILTER_OP_IS_NULL;
} else if (str == "is not null") {
return t_filter_op::FILTER_OP_IS_NOT_NULL;
} else if (str == "is not None") {
return t_filter_op::FILTER_OP_IS_VALID;
} else if (str == "is None") {
Expand Down
10 changes: 0 additions & 10 deletions cpp/perspective/src/cpp/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ t_config::t_config(const std::vector<std::string>& row_pivots,
, m_totals(TOTALS_BEFORE)
, m_combiner(combiner)
, m_fterms(fterms)
, m_handle_nan_sort(true)
, m_fmode(FMODE_SIMPLE_CLAUSES) {
for (const auto& p : row_pivots) {
m_row_pivots.push_back(t_pivot(p));
Expand All @@ -67,7 +66,6 @@ t_config::t_config(const std::vector<std::string>& row_pivots,
, m_totals(totals)
, m_combiner(combiner)
, m_fterms(fterms)
, m_handle_nan_sort(true)
, m_fmode(FMODE_SIMPLE_CLAUSES) {
for (const auto& p : row_pivots) {
m_row_pivots.push_back(t_pivot(p));
Expand All @@ -90,7 +88,6 @@ t_config::t_config(const std::vector<std::string>& row_pivots,
, m_totals(totals)
, m_combiner(combiner)
, m_fterms(fterms)
, m_handle_nan_sort(true)
, m_fmode(FMODE_SIMPLE_CLAUSES) {
for (const auto& p : row_pivots) {
m_row_pivots.push_back(t_pivot(p));
Expand All @@ -116,7 +113,6 @@ t_config::t_config(
: m_aggregates(aggregates)
, m_totals(TOTALS_BEFORE)
, m_combiner(FILTER_OP_AND)
, m_handle_nan_sort(true)
, m_fmode(FMODE_SIMPLE_CLAUSES) {
for (const auto& p : row_pivots) {
m_row_pivots.push_back(t_pivot(p));
Expand All @@ -129,7 +125,6 @@ t_config::t_config(const std::vector<std::string>& row_pivots, const t_aggspec&
: m_aggregates(std::vector<t_aggspec>{agg})
, m_totals(TOTALS_BEFORE)
, m_combiner(FILTER_OP_AND)
, m_handle_nan_sort(true)
, m_fmode(FMODE_SIMPLE_CLAUSES) {
for (const auto& p : row_pivots) {
m_row_pivots.push_back(t_pivot(p));
Expand Down Expand Up @@ -374,11 +369,6 @@ t_config::get_pivots() const {
return rval;
}

bool
t_config::handle_nan_sort() const {
return m_handle_nan_sort;
}

std::string
t_config::get_parent_pkey_column() const {
return m_parent_pkey_column;
Expand Down
9 changes: 3 additions & 6 deletions cpp/perspective/src/cpp/context_grouped_pkey.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ t_ctx_grouped_pkey::init() {
auto pivots = m_config.get_row_pivots();
m_tree = std::make_shared<t_stree>(pivots, m_config.get_aggregates(), m_schema, m_config);
m_tree->init();
m_traversal
= std::shared_ptr<t_traversal>(new t_traversal(m_tree, m_config.handle_nan_sort()));
m_traversal = std::shared_ptr<t_traversal>(new t_traversal(m_tree));
m_minmax = std::vector<t_minmax>(m_config.get_num_aggregates());
m_init = true;
}
Expand Down Expand Up @@ -445,8 +444,7 @@ t_ctx_grouped_pkey::reset() {
m_tree = std::make_shared<t_stree>(pivots, m_config.get_aggregates(), m_schema, m_config);
m_tree->init();
m_tree->set_deltas_enabled(get_feature_state(CTX_FEAT_DELTA));
m_traversal
= std::shared_ptr<t_traversal>(new t_traversal(m_tree, m_config.handle_nan_sort()));
m_traversal = std::shared_ptr<t_traversal>(new t_traversal(m_tree));
}

void
Expand Down Expand Up @@ -661,8 +659,7 @@ t_ctx_grouped_pkey::rebuild() {
);
#endif

m_traversal
= std::shared_ptr<t_traversal>(new t_traversal(m_tree, m_config.handle_nan_sort()));
m_traversal = std::shared_ptr<t_traversal>(new t_traversal(m_tree));

set_expansion_state(expansion_state);

Expand Down
6 changes: 2 additions & 4 deletions cpp/perspective/src/cpp/context_one.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ t_ctx1::init() {
auto pivots = m_config.get_row_pivots();
m_tree = std::make_shared<t_stree>(pivots, m_config.get_aggregates(), m_schema, m_config);
m_tree->init();
m_traversal
= std::shared_ptr<t_traversal>(new t_traversal(m_tree, m_config.handle_nan_sort()));
m_traversal = std::shared_ptr<t_traversal>(new t_traversal(m_tree));
m_minmax = std::vector<t_minmax>(m_config.get_num_aggregates());
m_init = true;
}
Expand Down Expand Up @@ -482,8 +481,7 @@ t_ctx1::reset() {
m_tree = std::make_shared<t_stree>(pivots, m_config.get_aggregates(), m_schema, m_config);
m_tree->init();
m_tree->set_deltas_enabled(get_feature_state(CTX_FEAT_DELTA));
m_traversal
= std::shared_ptr<t_traversal>(new t_traversal(m_tree, m_config.handle_nan_sort()));
m_traversal = std::shared_ptr<t_traversal>(new t_traversal(m_tree));
}

void
Expand Down
8 changes: 4 additions & 4 deletions cpp/perspective/src/cpp/context_two.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ t_ctx2::init() {
m_trees[treeidx]->init();
}

m_rtraversal = std::make_shared<t_traversal>(rtree(), m_config.handle_nan_sort());
m_rtraversal = std::make_shared<t_traversal>(rtree());

m_ctraversal = std::make_shared<t_traversal>(ctree(), m_config.handle_nan_sort());
m_ctraversal = std::make_shared<t_traversal>(ctree());
m_minmax = std::vector<t_minmax>(m_config.get_num_aggregates());
m_init = true;
}
Expand Down Expand Up @@ -806,8 +806,8 @@ t_ctx2::reset() {
m_trees[treeidx]->set_deltas_enabled(get_feature_state(CTX_FEAT_DELTA));
}

m_rtraversal = std::make_shared<t_traversal>(rtree(), m_config.handle_nan_sort());
m_ctraversal = std::make_shared<t_traversal>(ctree(), m_config.handle_nan_sort());
m_rtraversal = std::make_shared<t_traversal>(rtree());
m_ctraversal = std::make_shared<t_traversal>(ctree());
}

bool
Expand Down
2 changes: 1 addition & 1 deletion cpp/perspective/src/cpp/context_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ t_ctx0::get_column_name(t_index idx) {

void
t_ctx0::init() {
m_traversal = std::make_shared<t_ftrav>(m_config.handle_nan_sort());
m_traversal = std::make_shared<t_ftrav>();
m_deltas = std::make_shared<t_zcdeltas>();
m_init = true;
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/perspective/src/cpp/data_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ t_data_table::filter_cpp(t_filter_op combiner, const std::vector<t_fterm>& fterm
tval = ft(cell_val);
}

if (!cell_val.is_valid() || !tval) {
if ((ft.m_op != FILTER_OP_IS_NULL && !cell_val.is_valid()) || !tval) {
pass = false;
break;
}
Expand Down
18 changes: 14 additions & 4 deletions cpp/perspective/src/cpp/emscripten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1300,8 +1300,14 @@ namespace binding {

template <>
bool
is_valid_filter(t_dtype type, t_val date_parser, t_val filter_term) {
if (type == DTYPE_DATE || type == DTYPE_TIME) {
is_valid_filter(t_dtype type, t_val date_parser, t_val filter_term, t_val filter_operand) {
std::string comp_str = filter_operand.as<std::string>();
t_filter_op comp = str_to_filter_op(comp_str);

if (comp == t_filter_op::FILTER_OP_IS_NULL
|| comp == t_filter_op::FILTER_OP_IS_NOT_NULL) {
return true;
} else if (type == DTYPE_DATE || type == DTYPE_TIME) {
t_val parsed_date = date_parser.call<t_val>("parse", filter_term);
return has_value(parsed_date);
} else {
Expand All @@ -1326,6 +1332,10 @@ namespace binding {
terms.push_back(mktscalar(get_interned_cstr(term.c_str())));
}
} break;
case FILTER_OP_IS_NULL:
case FILTER_OP_IS_NOT_NULL: {
terms.push_back(mktscalar(0));
} break;
default: {
switch (type) {
case DTYPE_INT32: {
Expand Down Expand Up @@ -1394,7 +1404,7 @@ namespace binding {
t_dtype type = schema.get_dtype(f[0].as<std::string>());

// validate the filter before it goes into the core engine
if (is_valid_filter(type, date_parser, f[2])) {
if (is_valid_filter(type, date_parser, f[2], f[1])) {
filter.push_back(make_filter_term(type, date_parser, f));
}
}
Expand Down Expand Up @@ -1926,4 +1936,4 @@ EMSCRIPTEN_BINDINGS(perspective) {
function("get_from_data_slice_one", &get_from_data_slice<t_ctx1>, allow_raw_pointers());
function("get_data_slice_two", &get_data_slice<t_ctx2>, allow_raw_pointers());
function("get_from_data_slice_two", &get_from_data_slice<t_ctx2>, allow_raw_pointers());
}
}
11 changes: 5 additions & 6 deletions cpp/perspective/src/cpp/flat_traversal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@

namespace perspective {

t_ftrav::t_ftrav(bool handle_nan_sort)
t_ftrav::t_ftrav()
: m_step_deletes(0)
, m_step_inserts(0)
, m_handle_nan_sort(handle_nan_sort) {
, m_step_inserts(0) {
m_index = std::make_shared<std::vector<t_mselem>>();
}

Expand Down Expand Up @@ -140,7 +139,7 @@ t_ftrav::sort_by(std::shared_ptr<const t_gstate> state, const t_config& config,
const std::vector<t_sortspec>& sortby) {
if (sortby.empty())
return;
t_multisorter sorter(get_sort_orders(sortby), m_handle_nan_sort);
t_multisorter sorter(get_sort_orders(sortby));
t_index size = m_index->size();
auto sort_elems = std::make_shared<std::vector<t_mselem>>(static_cast<size_t>(size));
m_sortby = sortby;
Expand Down Expand Up @@ -271,7 +270,7 @@ t_ftrav::step_end() {
}
}
std::swap(new_index, m_index);
t_multisorter sorter(get_sort_orders(m_sortby), m_handle_nan_sort);
t_multisorter sorter(get_sort_orders(m_sortby));
std::sort(m_index->begin(), m_index->end(), sorter);

m_pkeyidx.clear();
Expand Down Expand Up @@ -337,7 +336,7 @@ t_ftrav::reset_step_state() {
t_uindex
t_ftrav::lower_bound_row_idx(std::shared_ptr<const t_gstate> state, const t_config& config,
const std::vector<t_tscalar>& row) const {
t_multisorter sorter(get_sort_orders(m_sortby), m_handle_nan_sort);
t_multisorter sorter(get_sort_orders(m_sortby));
t_mselem target_val;

fill_sort_elem(state, config, row, target_val);
Expand Down
14 changes: 6 additions & 8 deletions cpp/perspective/src/cpp/multi_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,19 +200,17 @@ nan_compare(t_sorttype order, const t_tscalar& a, const t_tscalar& b) {
return rval;
}

t_multisorter::t_multisorter(const std::vector<t_sorttype>& order, bool handle_nans)
: m_sort_order(order)
, m_handle_nans(handle_nans) {}
t_multisorter::t_multisorter(const std::vector<t_sorttype>& order)
: m_sort_order(order) {}

t_multisorter::t_multisorter(std::shared_ptr<const std::vector<t_mselem>> elems,
const std::vector<t_sorttype>& order, bool handle_nans)
t_multisorter::t_multisorter(
std::shared_ptr<const std::vector<t_mselem>> elems, const std::vector<t_sorttype>& order)
: m_sort_order(order)
, m_elems(elems)
, m_handle_nans(handle_nans) {}
, m_elems(elems) {}

bool
t_multisorter::operator()(const t_mselem& a, const t_mselem& b) const {
return cmp_mselem(a, b, m_sort_order, m_handle_nans);
return cmp_mselem(a, b, m_sort_order);
}

bool
Expand Down
8 changes: 4 additions & 4 deletions cpp/perspective/src/cpp/scalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1085,11 +1085,11 @@ t_tscalar::cmp(t_filter_op op, const t_tscalar& other) const {
case FILTER_OP_CONTAINS: {
return value.contains(other);
} break;
case FILTER_OP_IS_NAN: {
return std::isnan(to_double());
case FILTER_OP_IS_NULL: {
return m_status != STATUS_VALID;
} break;
case FILTER_OP_IS_NOT_NAN: {
return !std::isnan(to_double());
case FILTER_OP_IS_NOT_NULL: {
return m_status == STATUS_VALID;
} break;
case FILTER_OP_IS_VALID: {
return m_status == STATUS_VALID;
Expand Down
6 changes: 3 additions & 3 deletions cpp/perspective/src/cpp/sparse_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -854,7 +854,6 @@ t_stree::gen_aggidx() {
void
t_stree::update_agg_table(t_uindex nidx, t_agg_update_info& info, t_uindex src_ridx,
t_uindex dst_ridx, t_index nstrands, const t_gstate& gstate) {
static bool const enable_sticky_nan_fix = true;
for (t_uindex idx : info.m_dst_topo_sorted) {
const t_column* src = info.m_src[idx];
t_column* dst = info.m_dst[idx];
Expand All @@ -870,8 +869,7 @@ t_stree::update_agg_table(t_uindex nidx, t_agg_update_info& info, t_uindex src_r
t_tscalar dst_scalar = dst->get_scalar(dst_ridx);
old_value.set(dst_scalar);
new_value.set(dst_scalar.add(src_scalar));
if (enable_sticky_nan_fix
&& old_value.is_nan()) // is_nan returns false for non-float types
if (old_value.is_nan()) // is_nan returns false for non-float types
{
// if we previously had a NaN, add can't make it finite again; recalculate
// entire sum in case it is now finite
Expand Down Expand Up @@ -1162,11 +1160,13 @@ t_stree::update_agg_table(t_uindex nidx, t_agg_update_info& info, t_uindex src_r
t_tscalar rval;
rval.set(std::uint64_t(0));
rval.m_type = values[0].m_type;

for (const auto& v : values) {
if (v.is_nan())
continue;
rval = rval.add(v);
}

return rval;
}));
dst->set_scalar(dst_ridx, new_value);
Expand Down
2 changes: 1 addition & 1 deletion cpp/perspective/src/cpp/table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,4 +190,4 @@ Table::process_index_column(t_data_table& data_table) {
}
}

} // namespace perspective
} // namespace perspective
7 changes: 3 additions & 4 deletions cpp/perspective/src/cpp/traversal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,8 @@ t_vdnode::t_vdnode(bool expanded, bool has_children)
: m_expanded(expanded)
, m_has_children(has_children) {}

t_traversal::t_traversal(std::shared_ptr<const t_stree> tree, bool handle_nan_sort)
: m_tree(tree)
, m_handle_nan_sort(handle_nan_sort) {
t_traversal::t_traversal(std::shared_ptr<const t_stree> tree)
: m_tree(tree) {
t_stnode_vec rchildren;
tree->get_child_nodes(0, rchildren);
populate_root_children(rchildren);
Expand Down Expand Up @@ -153,7 +152,7 @@ t_traversal::expand_node(const std::vector<t_sortspec>& sortby, t_index exp_idx,
}

std::vector<t_sorttype> sort_orders = get_sort_orders(sortby);
t_multisorter sorter(sortelems, sort_orders, m_handle_nan_sort);
t_multisorter sorter(sortelems, sort_orders);
argsort(sorted_idx, sorter);
} else {
for (t_index i = 0, loop_end = sorted_idx.size(); i != loop_end; ++i)
Expand Down
4 changes: 2 additions & 2 deletions cpp/perspective/src/include/perspective/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ enum t_filter_op {
FILTER_OP_IN,
FILTER_OP_NOT_IN,
FILTER_OP_AND,
FILTER_OP_IS_NAN,
FILTER_OP_IS_NOT_NAN,
FILTER_OP_IS_NULL,
FILTER_OP_IS_NOT_NULL,
FILTER_OP_IS_VALID,
FILTER_OP_IS_NOT_VALID
};
Expand Down
Loading