Skip to content

Commit

Permalink
Refactor row* functions to use FExprs (#2872)
Browse files Browse the repository at this point in the history
Each row* function now supports vararg-style invocation, e.g. `rowsum(f.A, f.B, f.C)`, in addition to the regular single-argument style. On the other hand, applying row-function to a frame no longer produces a frame -- instead, it's an FExpr. This functionality previously existed but was undocumented. It would be better to return this functionality as a method on `Frame` class, instead of being a special case for the row* functions.

WIP for #2562
  • Loading branch information
st-pasha committed Mar 1, 2021
1 parent f39a53d commit 8a243f4
Show file tree
Hide file tree
Showing 34 changed files with 815 additions and 753 deletions.
4 changes: 2 additions & 2 deletions docs/_ext/xfunction.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
#-------------------------------------------------------------------------------
# Copyright 2019-2020 H2O.ai
# Copyright 2019-2021 H2O.ai
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand Down Expand Up @@ -60,7 +60,7 @@
from sphinx.util.nodes import make_refnode
from . import xnodes

rx_cc_id = re.compile(r"(?:\w+::)*\w+")
rx_cc_id = re.compile(r"(?:\w+(?:<\w+>)?::)*\w+")
rx_py_id = re.compile(r"(?:\w+\.)*\w+")
rx_param = re.compile(r"""
(?:(\w+) # parameter name
Expand Down
2 changes: 1 addition & 1 deletion docs/api/dt/rowall.rst
@@ -1,5 +1,5 @@

.. xfunction:: datatable.rowall
:src: src/core/expr/fnary/rowall.cc naryop_rowall
:src: src/core/expr/fnary/rowall.cc FExpr_RowAll::apply_function
:doc: src/core/expr/fnary/rowall.cc doc_rowall
:tests: tests/ijby/test-rowwise.py
2 changes: 1 addition & 1 deletion docs/api/dt/rowany.rst
@@ -1,5 +1,5 @@

.. xfunction:: datatable.rowany
:src: src/core/expr/fnary/rowany.cc naryop_rowany
:src: src/core/expr/fnary/rowany.cc FExpr_RowAny::apply_function
:doc: src/core/expr/fnary/rowany.cc doc_rowany
:tests: tests/ijby/test-rowwise.py
2 changes: 1 addition & 1 deletion docs/api/dt/rowcount.rst
@@ -1,5 +1,5 @@

.. xfunction:: datatable.rowcount
:src: src/core/expr/fnary/rowcount.cc naryop_rowcount
:src: src/core/expr/fnary/rowcount.cc FExpr_RowCount::apply_function
:doc: src/core/expr/fnary/rowcount.cc doc_rowcount
:tests: tests/ijby/test-rowwise.py
2 changes: 1 addition & 1 deletion docs/api/dt/rowfirst.rst
@@ -1,5 +1,5 @@

.. xfunction:: datatable.rowfirst
:src: src/core/expr/fnary/rowfirstlast.cc naryop_rowfirstlast
:src: src/core/expr/fnary/rowfirstlast.cc FExpr_RowFirstLast<FIRST>::apply_function
:doc: src/core/expr/fnary/rowfirstlast.cc doc_rowfirst
:tests: tests/ijby/test-rowwise.py
2 changes: 1 addition & 1 deletion docs/api/dt/rowlast.rst
@@ -1,5 +1,5 @@

.. xfunction:: datatable.rowlast
:src: src/core/expr/fnary/rowfirstlast.cc naryop_rowfirstlast
:src: src/core/expr/fnary/rowfirstlast.cc FExpr_RowFirstLast<FIRST>::apply_function
:doc: src/core/expr/fnary/rowfirstlast.cc doc_rowlast
:tests: tests/ijby/test-rowwise.py
2 changes: 1 addition & 1 deletion docs/api/dt/rowmax.rst
@@ -1,5 +1,5 @@

.. xfunction:: datatable.rowmax
:src: src/core/expr/fnary/rowminmax.cc naryop_rowminmax
:src: src/core/expr/fnary/rowminmax.cc FExpr_RowMinMax<MIN>::apply_function
:doc: src/core/expr/fnary/rowminmax.cc doc_rowmax
:tests: tests/ijby/test-rowwise.py
2 changes: 1 addition & 1 deletion docs/api/dt/rowmean.rst
@@ -1,5 +1,5 @@

.. xfunction:: datatable.rowmean
:src: src/core/expr/fnary/rowmean.cc naryop_rowmean
:src: src/core/expr/fnary/rowmean.cc FExpr_RowMean::apply_function
:doc: src/core/expr/fnary/rowmean.cc doc_rowmean
:tests: tests/ijby/test-rowwise.py
2 changes: 1 addition & 1 deletion docs/api/dt/rowmin.rst
@@ -1,5 +1,5 @@

.. xfunction:: datatable.rowmin
:src: src/core/expr/fnary/rowminmax.cc naryop_rowminmax
:src: src/core/expr/fnary/rowminmax.cc FExpr_RowMinMax<MIN>::apply_function
:doc: src/core/expr/fnary/rowminmax.cc doc_rowmin
:tests: tests/ijby/test-rowwise.py
2 changes: 1 addition & 1 deletion docs/api/dt/rowsd.rst
@@ -1,5 +1,5 @@

.. xfunction:: datatable.rowsd
:src: src/core/expr/fnary/rowsd.cc naryop_rowsd
:src: src/core/expr/fnary/rowsd.cc FExpr_RowSd::apply_function
:doc: src/core/expr/fnary/rowsd.cc doc_rowsd
:tests: tests/ijby/test-rowwise.py
2 changes: 1 addition & 1 deletion docs/api/dt/rowsum.rst
@@ -1,5 +1,5 @@

.. xfunction:: datatable.rowsum
:src: src/core/expr/fnary/rowsum.cc naryop_rowsum
:src: src/core/expr/fnary/rowsum.cc FExpr_RowSum::apply_function
:doc: src/core/expr/fnary/rowsum.cc doc_rowsum
:tests: tests/ijby/test-rowwise.py
1 change: 0 additions & 1 deletion src/core/datatablemodule.cc
Expand Up @@ -462,7 +462,6 @@ void py::DatatableModule::init_methods() {
init_methods_styles();

init_fbinary();
init_fnary();
init_funary();
init_fuzzy();

Expand Down
1 change: 0 additions & 1 deletion src/core/datatablemodule.h
Expand Up @@ -53,7 +53,6 @@ class DatatableModule : public ExtModule<DatatableModule> {
void init_fbinary(); // expr/fbinary/pyfn.cc
void init_fuzzy(); // utils/fuzzy.cc
void init_funary(); // expr/funary/pyfn.cc
void init_fnary(); // expr/fnary/pyfn.cc

#ifdef DTTEST
void init_tests();
Expand Down
6 changes: 6 additions & 0 deletions src/core/expr/fexpr_list.cc
Expand Up @@ -37,6 +37,9 @@ FExpr_List::FExpr_List(vecExpr&& args)
: args_(std::move(args))
{}

ptrExpr FExpr_List::empty() {
return ptrExpr(new FExpr_List());
}

ptrExpr FExpr_List::make(py::robj src) {
vecExpr args;
Expand All @@ -55,6 +58,9 @@ ptrExpr FExpr_List::make(py::robj src) {
return ptrExpr(new FExpr_List(std::move(args)));
}

void FExpr_List::add_expr(ptrExpr&& expr) {
args_.push_back(std::move(expr));
}



Expand Down
3 changes: 3 additions & 0 deletions src/core/expr/fexpr_list.h
Expand Up @@ -33,8 +33,11 @@ class FExpr_List : public FExpr {
vecExpr args_;

public:
FExpr_List() = default;
FExpr_List(vecExpr&& args);
static ptrExpr empty();
static ptrExpr make(py::robj);
void add_expr(ptrExpr&&);

Workframe evaluate_n(EvalContext&) const override;
Workframe evaluate_j(EvalContext&) const override;
Expand Down
104 changes: 78 additions & 26 deletions src/core/expr/fnary/fnary.cc
@@ -1,5 +1,5 @@
//------------------------------------------------------------------------------
// Copyright 2019 H2O.ai
// Copyright 2019-2021 H2O.ai
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
Expand All @@ -19,40 +19,50 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
//------------------------------------------------------------------------------
#include "expr/fnary/fnary.h"
#include "utils/exceptions.h"
#include "column.h"
#include "expr/fexpr_list.h"
#include "expr/fnary/fnary.h"
#include "expr/workframe.h"
#include "stype.h"
#include "utils/exceptions.h"
#include "python/xargs.h"
namespace dt {
namespace expr {


FExpr_RowFn::FExpr_RowFn(ptrExpr&& args)
: args_(std::move(args))
{}

Column naryop(Op opcode, colvec&& columns) {
switch (opcode) {
case Op::ROWALL: return naryop_rowall(std::move(columns));
case Op::ROWANY: return naryop_rowany(std::move(columns));
case Op::ROWCOUNT: return naryop_rowcount(std::move(columns));
case Op::ROWFIRST: return naryop_rowfirstlast(std::move(columns), true);
case Op::ROWLAST: return naryop_rowfirstlast(std::move(columns), false);
case Op::ROWMAX: return naryop_rowminmax(std::move(columns), false);
case Op::ROWMEAN: return naryop_rowmean(std::move(columns));
case Op::ROWMIN: return naryop_rowminmax(std::move(columns), true);
case Op::ROWSD: return naryop_rowsd(std::move(columns));
case Op::ROWSUM: return naryop_rowsum(std::move(columns));
default:
throw TypeError() << "Unknown n-ary op " << static_cast<int>(opcode);
}

std::string FExpr_RowFn::repr() const {
std::string out = name();
out += "(";
out += args_->repr();
out += ")";
return out;
}


Workframe FExpr_RowFn::evaluate_n(EvalContext& ctx) const {
Workframe inputs = args_->evaluate_n(ctx);
Grouping gmode = inputs.get_grouping_mode();
std::vector<Column> columns;
columns.reserve(inputs.ncols());
for (size_t i = 0; i < inputs.ncols(); ++i) {
columns.emplace_back(inputs.retrieve_column(i));
}

Workframe out(ctx);
out.add_column(
apply_function(std::move(columns)),
"", gmode
);
return out;
}

//------------------------------------------------------------------------------
// Various helper functions
//------------------------------------------------------------------------------

SType detect_common_numeric_stype(const colvec& columns, const char* fnname)
{
SType FExpr_RowFn::common_numeric_stype(const colvec& columns) const {
SType common_stype = SType::INT32;
for (size_t i = 0; i < columns.size(); ++i) {
switch (columns[i].stype()) {
Expand All @@ -75,7 +85,7 @@ SType detect_common_numeric_stype(const colvec& columns, const char* fnname)
break;
}
default:
throw TypeError() << "Function `" << fnname << "` expects a sequence "
throw TypeError() << "Function `" << name() << "` expects a sequence "
"of numeric columns, however column " << i
<< " had type `" << columns[i].stype() << "`";
}
Expand All @@ -90,13 +100,55 @@ SType detect_common_numeric_stype(const colvec& columns, const char* fnname)
}



void promote_columns(colvec& columns, SType target_stype) {
void FExpr_RowFn::promote_columns(colvec& columns, SType target_stype) const {
for (auto& col : columns) {
col.cast_inplace(target_stype);
}
}



/**
* Python-facing function that implements the n-ary operator.
*
* All "rowwise" python functions are implemented using this
* function, differentiating themselves only with the `args.get_info()`.
*
* This function has two possible signatures: it can take either
* a single Frame argument, in which case the rowwise function will
* be immediately applied to the frame, and the resulting frame
* returned; or it can take an Expr or sequence of Exprs as the
* argument(s), and return a new Expr that encapsulates application
* of the rowwise function to the given arguments.
*
*/
py::oobj py_rowfn(const py::XArgs& args) {
ptrExpr a;
if (args.num_varargs() == 1) {
a = as_fexpr(args.vararg(0));
}
else {
a = FExpr_List::empty();
for (auto arg : args.varargs()) {
static_cast<FExpr_List*>(a.get())->add_expr(as_fexpr(arg));
}
}
switch (args.get_info()) {
case FN_ROWALL: return PyFExpr::make(new FExpr_RowAll(std::move(a)));
case FN_ROWANY: return PyFExpr::make(new FExpr_RowAny(std::move(a)));
case FN_ROWCOUNT: return PyFExpr::make(new FExpr_RowCount(std::move(a)));
case FN_ROWFIRST: return PyFExpr::make(new FExpr_RowFirstLast<true>(std::move(a)));
case FN_ROWLAST: return PyFExpr::make(new FExpr_RowFirstLast<false>(std::move(a)));
case FN_ROWSUM: return PyFExpr::make(new FExpr_RowSum(std::move(a)));
case FN_ROWMAX: return PyFExpr::make(new FExpr_RowMinMax<false>(std::move(a)));
case FN_ROWMEAN: return PyFExpr::make(new FExpr_RowMean(std::move(a)));
case FN_ROWMIN: return PyFExpr::make(new FExpr_RowMinMax<true>(std::move(a)));
case FN_ROWSD: return PyFExpr::make(new FExpr_RowSd(std::move(a)));
default: throw RuntimeError();
}
}




}} // namespace dt::expr

0 comments on commit 8a243f4

Please sign in to comment.