From 8a243f467a9a37e606b5e4f66b9ef73942576820 Mon Sep 17 00:00:00 2001 From: Pasha Stetsenko Date: Mon, 1 Mar 2021 12:32:01 -0800 Subject: [PATCH] Refactor row* functions to use FExprs (#2872) Each row* function now supports vararg-style invocation, e.g. `rowsum(f.A, f.B, f.C)`, in addition to the regular single-argument style. On the other hand, applying row-function to a frame no longer produces a frame -- instead, it's an FExpr. This functionality previously existed but was undocumented. It would be better to return this functionality as a method on `Frame` class, instead of being a special case for the row* functions. WIP for #2562 --- docs/_ext/xfunction.py | 4 +- docs/api/dt/rowall.rst | 2 +- docs/api/dt/rowany.rst | 2 +- docs/api/dt/rowcount.rst | 2 +- docs/api/dt/rowfirst.rst | 2 +- docs/api/dt/rowlast.rst | 2 +- docs/api/dt/rowmax.rst | 2 +- docs/api/dt/rowmean.rst | 2 +- docs/api/dt/rowmin.rst | 2 +- docs/api/dt/rowsd.rst | 2 +- docs/api/dt/rowsum.rst | 2 +- src/core/datatablemodule.cc | 1 - src/core/datatablemodule.h | 1 - src/core/expr/fexpr_list.cc | 6 + src/core/expr/fexpr_list.h | 3 + src/core/expr/fnary/fnary.cc | 104 ++++++++++++----- src/core/expr/fnary/fnary.h | 150 +++++++++++++++++------- src/core/expr/fnary/pyfn.cc | 133 ---------------------- src/core/expr/fnary/rowall.cc | 109 +++++++++--------- src/core/expr/fnary/rowany.cc | 102 +++++++++-------- src/core/expr/fnary/rowcount.cc | 96 +++++++++------- src/core/expr/fnary/rowfirstlast.cc | 170 +++++++++++++++------------- src/core/expr/fnary/rowmean.cc | 79 +++++++------ src/core/expr/fnary/rowminmax.cc | 167 +++++++++++++++------------ src/core/expr/fnary/rowsd.cc | 79 +++++++------ src/core/expr/fnary/rowsum.cc | 78 +++++++------ src/core/expr/head_func.cc | 9 +- src/core/expr/head_func.h | 11 -- src/core/expr/head_func_nary.cc | 59 ---------- src/core/expr/op.h | 16 +-- src/core/python/xargs.cc | 66 ++++++++++- src/core/python/xargs.h | 48 ++++++-- src/datatable/expr/expr.py | 12 -- tests/ijby/test-rowwise.py | 45 ++++---- 34 files changed, 815 insertions(+), 753 deletions(-) delete mode 100644 src/core/expr/fnary/pyfn.cc delete mode 100644 src/core/expr/head_func_nary.cc diff --git a/docs/_ext/xfunction.py b/docs/_ext/xfunction.py index 54cb529e2e..5ca84fd20a 100644 --- a/docs/_ext/xfunction.py +++ b/docs/_ext/xfunction.py @@ -1,6 +1,6 @@ #!/usr/bin/env python #------------------------------------------------------------------------------- -# Copyright 2019-2020 H2O.ai +# Copyright 2019-2021 H2O.ai # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -60,7 +60,7 @@ from sphinx.util.nodes import make_refnode from . import xnodes -rx_cc_id = re.compile(r"(?:\w+::)*\w+") +rx_cc_id = re.compile(r"(?:\w+(?:<\w+>)?::)*\w+") rx_py_id = re.compile(r"(?:\w+\.)*\w+") rx_param = re.compile(r""" (?:(\w+) # parameter name diff --git a/docs/api/dt/rowall.rst b/docs/api/dt/rowall.rst index a00a6b7d76..b3a16600ed 100644 --- a/docs/api/dt/rowall.rst +++ b/docs/api/dt/rowall.rst @@ -1,5 +1,5 @@ .. xfunction:: datatable.rowall - :src: src/core/expr/fnary/rowall.cc naryop_rowall + :src: src/core/expr/fnary/rowall.cc FExpr_RowAll::apply_function :doc: src/core/expr/fnary/rowall.cc doc_rowall :tests: tests/ijby/test-rowwise.py diff --git a/docs/api/dt/rowany.rst b/docs/api/dt/rowany.rst index a6549cecd5..12b3ba57a2 100644 --- a/docs/api/dt/rowany.rst +++ b/docs/api/dt/rowany.rst @@ -1,5 +1,5 @@ .. xfunction:: datatable.rowany - :src: src/core/expr/fnary/rowany.cc naryop_rowany + :src: src/core/expr/fnary/rowany.cc FExpr_RowAny::apply_function :doc: src/core/expr/fnary/rowany.cc doc_rowany :tests: tests/ijby/test-rowwise.py diff --git a/docs/api/dt/rowcount.rst b/docs/api/dt/rowcount.rst index 483d2bd7f2..8bd3fb6bcc 100644 --- a/docs/api/dt/rowcount.rst +++ b/docs/api/dt/rowcount.rst @@ -1,5 +1,5 @@ .. xfunction:: datatable.rowcount - :src: src/core/expr/fnary/rowcount.cc naryop_rowcount + :src: src/core/expr/fnary/rowcount.cc FExpr_RowCount::apply_function :doc: src/core/expr/fnary/rowcount.cc doc_rowcount :tests: tests/ijby/test-rowwise.py diff --git a/docs/api/dt/rowfirst.rst b/docs/api/dt/rowfirst.rst index 6c34ce0964..64d012eecb 100644 --- a/docs/api/dt/rowfirst.rst +++ b/docs/api/dt/rowfirst.rst @@ -1,5 +1,5 @@ .. xfunction:: datatable.rowfirst - :src: src/core/expr/fnary/rowfirstlast.cc naryop_rowfirstlast + :src: src/core/expr/fnary/rowfirstlast.cc FExpr_RowFirstLast::apply_function :doc: src/core/expr/fnary/rowfirstlast.cc doc_rowfirst :tests: tests/ijby/test-rowwise.py diff --git a/docs/api/dt/rowlast.rst b/docs/api/dt/rowlast.rst index b951bf1bc4..25c20f0df6 100644 --- a/docs/api/dt/rowlast.rst +++ b/docs/api/dt/rowlast.rst @@ -1,5 +1,5 @@ .. xfunction:: datatable.rowlast - :src: src/core/expr/fnary/rowfirstlast.cc naryop_rowfirstlast + :src: src/core/expr/fnary/rowfirstlast.cc FExpr_RowFirstLast::apply_function :doc: src/core/expr/fnary/rowfirstlast.cc doc_rowlast :tests: tests/ijby/test-rowwise.py diff --git a/docs/api/dt/rowmax.rst b/docs/api/dt/rowmax.rst index 7fd019e495..d7cb980f1f 100644 --- a/docs/api/dt/rowmax.rst +++ b/docs/api/dt/rowmax.rst @@ -1,5 +1,5 @@ .. xfunction:: datatable.rowmax - :src: src/core/expr/fnary/rowminmax.cc naryop_rowminmax + :src: src/core/expr/fnary/rowminmax.cc FExpr_RowMinMax::apply_function :doc: src/core/expr/fnary/rowminmax.cc doc_rowmax :tests: tests/ijby/test-rowwise.py diff --git a/docs/api/dt/rowmean.rst b/docs/api/dt/rowmean.rst index 1a7b523e10..6c0491524a 100644 --- a/docs/api/dt/rowmean.rst +++ b/docs/api/dt/rowmean.rst @@ -1,5 +1,5 @@ .. xfunction:: datatable.rowmean - :src: src/core/expr/fnary/rowmean.cc naryop_rowmean + :src: src/core/expr/fnary/rowmean.cc FExpr_RowMean::apply_function :doc: src/core/expr/fnary/rowmean.cc doc_rowmean :tests: tests/ijby/test-rowwise.py diff --git a/docs/api/dt/rowmin.rst b/docs/api/dt/rowmin.rst index 54c2e07581..15007a6fc1 100644 --- a/docs/api/dt/rowmin.rst +++ b/docs/api/dt/rowmin.rst @@ -1,5 +1,5 @@ .. xfunction:: datatable.rowmin - :src: src/core/expr/fnary/rowminmax.cc naryop_rowminmax + :src: src/core/expr/fnary/rowminmax.cc FExpr_RowMinMax::apply_function :doc: src/core/expr/fnary/rowminmax.cc doc_rowmin :tests: tests/ijby/test-rowwise.py diff --git a/docs/api/dt/rowsd.rst b/docs/api/dt/rowsd.rst index f787614c3d..7ce7269c3e 100644 --- a/docs/api/dt/rowsd.rst +++ b/docs/api/dt/rowsd.rst @@ -1,5 +1,5 @@ .. xfunction:: datatable.rowsd - :src: src/core/expr/fnary/rowsd.cc naryop_rowsd + :src: src/core/expr/fnary/rowsd.cc FExpr_RowSd::apply_function :doc: src/core/expr/fnary/rowsd.cc doc_rowsd :tests: tests/ijby/test-rowwise.py diff --git a/docs/api/dt/rowsum.rst b/docs/api/dt/rowsum.rst index e08a2279a7..83e62e9914 100644 --- a/docs/api/dt/rowsum.rst +++ b/docs/api/dt/rowsum.rst @@ -1,5 +1,5 @@ .. xfunction:: datatable.rowsum - :src: src/core/expr/fnary/rowsum.cc naryop_rowsum + :src: src/core/expr/fnary/rowsum.cc FExpr_RowSum::apply_function :doc: src/core/expr/fnary/rowsum.cc doc_rowsum :tests: tests/ijby/test-rowwise.py diff --git a/src/core/datatablemodule.cc b/src/core/datatablemodule.cc index 29761a0170..d8cfc31af0 100644 --- a/src/core/datatablemodule.cc +++ b/src/core/datatablemodule.cc @@ -462,7 +462,6 @@ void py::DatatableModule::init_methods() { init_methods_styles(); init_fbinary(); - init_fnary(); init_funary(); init_fuzzy(); diff --git a/src/core/datatablemodule.h b/src/core/datatablemodule.h index 4d719b286a..1862125ce4 100644 --- a/src/core/datatablemodule.h +++ b/src/core/datatablemodule.h @@ -53,7 +53,6 @@ class DatatableModule : public ExtModule { void init_fbinary(); // expr/fbinary/pyfn.cc void init_fuzzy(); // utils/fuzzy.cc void init_funary(); // expr/funary/pyfn.cc - void init_fnary(); // expr/fnary/pyfn.cc #ifdef DTTEST void init_tests(); diff --git a/src/core/expr/fexpr_list.cc b/src/core/expr/fexpr_list.cc index 9ddc1c4a5e..65081d2b62 100644 --- a/src/core/expr/fexpr_list.cc +++ b/src/core/expr/fexpr_list.cc @@ -37,6 +37,9 @@ FExpr_List::FExpr_List(vecExpr&& args) : args_(std::move(args)) {} +ptrExpr FExpr_List::empty() { + return ptrExpr(new FExpr_List()); +} ptrExpr FExpr_List::make(py::robj src) { vecExpr args; @@ -55,6 +58,9 @@ ptrExpr FExpr_List::make(py::robj src) { return ptrExpr(new FExpr_List(std::move(args))); } +void FExpr_List::add_expr(ptrExpr&& expr) { + args_.push_back(std::move(expr)); +} diff --git a/src/core/expr/fexpr_list.h b/src/core/expr/fexpr_list.h index 2ef74a0fd2..ef5113d5f0 100644 --- a/src/core/expr/fexpr_list.h +++ b/src/core/expr/fexpr_list.h @@ -33,8 +33,11 @@ class FExpr_List : public FExpr { vecExpr args_; public: + FExpr_List() = default; FExpr_List(vecExpr&& args); + static ptrExpr empty(); static ptrExpr make(py::robj); + void add_expr(ptrExpr&&); Workframe evaluate_n(EvalContext&) const override; Workframe evaluate_j(EvalContext&) const override; diff --git a/src/core/expr/fnary/fnary.cc b/src/core/expr/fnary/fnary.cc index 62b2c95f78..aff6e00374 100644 --- a/src/core/expr/fnary/fnary.cc +++ b/src/core/expr/fnary/fnary.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2019 H2O.ai +// Copyright 2019-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -19,40 +19,50 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS // IN THE SOFTWARE. //------------------------------------------------------------------------------ -#include "expr/fnary/fnary.h" -#include "utils/exceptions.h" #include "column.h" +#include "expr/fexpr_list.h" +#include "expr/fnary/fnary.h" +#include "expr/workframe.h" #include "stype.h" +#include "utils/exceptions.h" +#include "python/xargs.h" namespace dt { namespace expr { +FExpr_RowFn::FExpr_RowFn(ptrExpr&& args) + : args_(std::move(args)) +{} -Column naryop(Op opcode, colvec&& columns) { - switch (opcode) { - case Op::ROWALL: return naryop_rowall(std::move(columns)); - case Op::ROWANY: return naryop_rowany(std::move(columns)); - case Op::ROWCOUNT: return naryop_rowcount(std::move(columns)); - case Op::ROWFIRST: return naryop_rowfirstlast(std::move(columns), true); - case Op::ROWLAST: return naryop_rowfirstlast(std::move(columns), false); - case Op::ROWMAX: return naryop_rowminmax(std::move(columns), false); - case Op::ROWMEAN: return naryop_rowmean(std::move(columns)); - case Op::ROWMIN: return naryop_rowminmax(std::move(columns), true); - case Op::ROWSD: return naryop_rowsd(std::move(columns)); - case Op::ROWSUM: return naryop_rowsum(std::move(columns)); - default: - throw TypeError() << "Unknown n-ary op " << static_cast(opcode); - } + +std::string FExpr_RowFn::repr() const { + std::string out = name(); + out += "("; + out += args_->repr(); + out += ")"; + return out; } +Workframe FExpr_RowFn::evaluate_n(EvalContext& ctx) const { + Workframe inputs = args_->evaluate_n(ctx); + Grouping gmode = inputs.get_grouping_mode(); + std::vector columns; + columns.reserve(inputs.ncols()); + for (size_t i = 0; i < inputs.ncols(); ++i) { + columns.emplace_back(inputs.retrieve_column(i)); + } + + Workframe out(ctx); + out.add_column( + apply_function(std::move(columns)), + "", gmode + ); + return out; +} -//------------------------------------------------------------------------------ -// Various helper functions -//------------------------------------------------------------------------------ -SType detect_common_numeric_stype(const colvec& columns, const char* fnname) -{ +SType FExpr_RowFn::common_numeric_stype(const colvec& columns) const { SType common_stype = SType::INT32; for (size_t i = 0; i < columns.size(); ++i) { switch (columns[i].stype()) { @@ -75,7 +85,7 @@ SType detect_common_numeric_stype(const colvec& columns, const char* fnname) break; } default: - throw TypeError() << "Function `" << fnname << "` expects a sequence " + throw TypeError() << "Function `" << name() << "` expects a sequence " "of numeric columns, however column " << i << " had type `" << columns[i].stype() << "`"; } @@ -90,8 +100,7 @@ SType detect_common_numeric_stype(const colvec& columns, const char* fnname) } - -void promote_columns(colvec& columns, SType target_stype) { +void FExpr_RowFn::promote_columns(colvec& columns, SType target_stype) const { for (auto& col : columns) { col.cast_inplace(target_stype); } @@ -99,4 +108,47 @@ void promote_columns(colvec& columns, SType target_stype) { +/** + * Python-facing function that implements the n-ary operator. + * + * All "rowwise" python functions are implemented using this + * function, differentiating themselves only with the `args.get_info()`. + * + * This function has two possible signatures: it can take either + * a single Frame argument, in which case the rowwise function will + * be immediately applied to the frame, and the resulting frame + * returned; or it can take an Expr or sequence of Exprs as the + * argument(s), and return a new Expr that encapsulates application + * of the rowwise function to the given arguments. + * + */ +py::oobj py_rowfn(const py::XArgs& args) { + ptrExpr a; + if (args.num_varargs() == 1) { + a = as_fexpr(args.vararg(0)); + } + else { + a = FExpr_List::empty(); + for (auto arg : args.varargs()) { + static_cast(a.get())->add_expr(as_fexpr(arg)); + } + } + switch (args.get_info()) { + case FN_ROWALL: return PyFExpr::make(new FExpr_RowAll(std::move(a))); + case FN_ROWANY: return PyFExpr::make(new FExpr_RowAny(std::move(a))); + case FN_ROWCOUNT: return PyFExpr::make(new FExpr_RowCount(std::move(a))); + case FN_ROWFIRST: return PyFExpr::make(new FExpr_RowFirstLast(std::move(a))); + case FN_ROWLAST: return PyFExpr::make(new FExpr_RowFirstLast(std::move(a))); + case FN_ROWSUM: return PyFExpr::make(new FExpr_RowSum(std::move(a))); + case FN_ROWMAX: return PyFExpr::make(new FExpr_RowMinMax(std::move(a))); + case FN_ROWMEAN: return PyFExpr::make(new FExpr_RowMean(std::move(a))); + case FN_ROWMIN: return PyFExpr::make(new FExpr_RowMinMax(std::move(a))); + case FN_ROWSD: return PyFExpr::make(new FExpr_RowSd(std::move(a))); + default: throw RuntimeError(); + } +} + + + + }} // namespace dt::expr diff --git a/src/core/expr/fnary/fnary.h b/src/core/expr/fnary/fnary.h index 62977bc753..2fdc8c95e1 100644 --- a/src/core/expr/fnary/fnary.h +++ b/src/core/expr/fnary/fnary.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2019 H2O.ai +// Copyright 2019-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -23,64 +23,132 @@ #define dt_EXPR_FNARY_FNARY_h #include #include "expr/declarations.h" +#include "expr/fexpr_func.h" #include "expr/op.h" #include "python/args.h" namespace dt { namespace expr { -/** - * Main method for computing n-ary (rowwise) functions. - * - */ -Column naryop(Op opcode, colvec&& columns); +enum RowFnKind : int { + FN_ROWALL, + FN_ROWANY, + FN_ROWCOUNT, + FN_ROWFIRST, + FN_ROWLAST, + FN_ROWMAX, + FN_ROWMEAN, + FN_ROWMIN, + FN_ROWSD, + FN_ROWSUM, +}; +py::oobj py_rowfn(const py::XArgs& args); -//------------------------------------------------------------------------------ -// Resolvers [private] -//------------------------------------------------------------------------------ +class FExpr_RowFn : public FExpr_Func { + private: + ptrExpr args_; -Column naryop_rowall(colvec&&); -Column naryop_rowany(colvec&&); -Column naryop_rowcount(colvec&&); -Column naryop_rowfirstlast(colvec&&, bool FIRST); -Column naryop_rowmean(colvec&&); -Column naryop_rowminmax(colvec&&, bool MIN); -Column naryop_rowsd(colvec&&); -Column naryop_rowsum(colvec&&); + public: + FExpr_RowFn(ptrExpr&& args); + std::string repr() const override; + Workframe evaluate_n(EvalContext& ctx) const override; -/** - * For a list of numeric columns, find the largest common stype. - * Possible return values are: INT32, INT64, FLOAT32 or FLOAT64. - * If any column in the list is not numeric, then an exception will - * be thrown. The error message will use `fnname`. - */ -SType detect_common_numeric_stype(const colvec&, const char* fnname); + virtual std::string name() const = 0; + virtual Column apply_function(std::vector&& columns) const = 0; + SType common_numeric_stype(const colvec&) const; + void promote_columns(colvec& columns, SType target_stype) const; +}; -/** - * Convert all columns in the list into a common stype. - */ -void promote_columns(colvec& columns, SType target_stype); +class FExpr_RowAll : public FExpr_RowFn { + public: + using FExpr_RowFn::FExpr_RowFn; + std::string name() const override; + Column apply_function(std::vector&& columns) const override; +}; -//------------------------------------------------------------------------------ -// Python interface -//------------------------------------------------------------------------------ -extern py::PKArgs args_rowall; -extern py::PKArgs args_rowany; -extern py::PKArgs args_rowcount; -extern py::PKArgs args_rowfirst; -extern py::PKArgs args_rowlast; -extern py::PKArgs args_rowmax; -extern py::PKArgs args_rowmean; -extern py::PKArgs args_rowmin; -extern py::PKArgs args_rowsd; -extern py::PKArgs args_rowsum; + +class FExpr_RowAny : public FExpr_RowFn { + public: + using FExpr_RowFn::FExpr_RowFn; + + std::string name() const override; + Column apply_function(std::vector&& columns) const override; +}; + + + +class FExpr_RowCount : public FExpr_RowFn { + public: + using FExpr_RowFn::FExpr_RowFn; + + std::string name() const override; + Column apply_function(std::vector&& columns) const override; +}; + + + +template +class FExpr_RowFirstLast : public FExpr_RowFn { + public: + using FExpr_RowFn::FExpr_RowFn; + + std::string name() const override; + Column apply_function(std::vector&& columns) const override; +}; + +extern template class FExpr_RowFirstLast; +extern template class FExpr_RowFirstLast; + + + +template +class FExpr_RowMinMax : public FExpr_RowFn { + public: + using FExpr_RowFn::FExpr_RowFn; + + std::string name() const override; + Column apply_function(std::vector&& columns) const override; +}; + +extern template class FExpr_RowMinMax; +extern template class FExpr_RowMinMax; + + + +class FExpr_RowMean : public FExpr_RowFn { + public: + using FExpr_RowFn::FExpr_RowFn; + + std::string name() const override; + Column apply_function(std::vector&& columns) const override; +}; + + + +class FExpr_RowSd : public FExpr_RowFn { + public: + using FExpr_RowFn::FExpr_RowFn; + + std::string name() const override; + Column apply_function(std::vector&& columns) const override; +}; + + + +class FExpr_RowSum : public FExpr_RowFn { + public: + using FExpr_RowFn::FExpr_RowFn; + + std::string name() const override; + Column apply_function(std::vector&& columns) const override; +}; diff --git a/src/core/expr/fnary/pyfn.cc b/src/core/expr/fnary/pyfn.cc deleted file mode 100644 index 900a7dadfa..0000000000 --- a/src/core/expr/fnary/pyfn.cc +++ /dev/null @@ -1,133 +0,0 @@ -//------------------------------------------------------------------------------ -// Copyright 2019 H2O.ai -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//------------------------------------------------------------------------------ -#include -#include "expr/fexpr_column.h" -#include "expr/fnary/fnary.h" -#include "expr/op.h" -#include "frame/py_frame.h" -#include "utils/assert.h" -#include "datatablemodule.h" - - -//------------------------------------------------------------------------------ -// PKArgs -> Op -//------------------------------------------------------------------------------ - -static std::unordered_map args2opcodes; - -static void register_args(const py::PKArgs& args, dt::expr::Op opcode) { - xassert(args2opcodes.count(&args) == 0); - args2opcodes[&args] = opcode; -} - -static dt::expr::Op get_opcode_from_args(const py::PKArgs& args) { - xassert(args2opcodes.count(&args) == 1); - return args2opcodes.at(&args); -} - - - - -//------------------------------------------------------------------------------ -// Main pyfn() function -//------------------------------------------------------------------------------ - -static py::oobj make_pyexpr(dt::expr::Op opcode, py::otuple args_tuple) { - size_t op = static_cast(opcode); - return py::robj(py::Expr_Type).call({ py::oint(op), args_tuple }); -} - - -/** - * This helper function will apply `opcode` to an entire frame. - */ -static py::oobj apply_to_frame(dt::expr::Op opcode, py::robj arg) { - xassert(arg.is_frame()); - - auto slice_all = py::oslice(py::oslice::NA, py::oslice::NA, py::oslice::NA); - auto f_all = dt::expr::PyFExpr::make(new dt::expr::FExpr_ColumnAsArg(0, slice_all)); - auto rowfn = make_pyexpr(opcode, py::otuple{ f_all }); - - auto frame = static_cast(arg.to_borrowed_ref()); - return frame->m__getitem__(py::otuple{ slice_all, rowfn }); -} - - -/** - * Python-facing function that implements the n-ary operator. - * - * All "rowwise" python functions are implemented using this - * function, differentiating themselves only with the `args` - * parameter. - * - * This function has two possible signatures: it can take either - * a single Frame argument, in which case the rowwise function will - * be immediately applied to the frame, and the resulting frame - * returned; or it can take an Expr or sequence of Exprs as the - * argument(s), and return a new Expr that encapsulates application - * of the rowwise function to the given arguments. - * - */ -static py::oobj fnary_pyfn(const py::PKArgs& args) -{ - auto opcode = get_opcode_from_args(args); - size_t n = args.num_vararg_args(); - - py::otuple expr_args(n); - size_t i = 0; - for (py::robj arg : args.varargs()) { - if (n == 1 && arg.is_frame()) { - return apply_to_frame(opcode, arg); - } - expr_args.set(i++, arg); - } - return make_pyexpr(opcode, expr_args); -} - - - - -//------------------------------------------------------------------------------ -// Static initialization -//------------------------------------------------------------------------------ - -/** - * Register python-facing functions. This is called once during - * the initialization of `datatable` module. - */ -void py::DatatableModule::init_fnary() -{ - #define FNARY(ARGS, OP) \ - ADD_FN(&fnary_pyfn, dt::expr::ARGS); \ - register_args(dt::expr::ARGS, dt::expr::OP) - - FNARY(args_rowall, Op::ROWALL); - FNARY(args_rowany, Op::ROWANY); - FNARY(args_rowcount, Op::ROWCOUNT); - FNARY(args_rowfirst, Op::ROWFIRST); - FNARY(args_rowlast, Op::ROWLAST); - FNARY(args_rowmax, Op::ROWMAX); - FNARY(args_rowmean, Op::ROWMEAN); - FNARY(args_rowmin, Op::ROWMIN); - FNARY(args_rowsd, Op::ROWSD); - FNARY(args_rowsum, Op::ROWSUM); -} diff --git a/src/core/expr/fnary/rowall.cc b/src/core/expr/fnary/rowall.cc index b154b1f5f7..2e14a9288f 100644 --- a/src/core/expr/fnary/rowall.cc +++ b/src/core/expr/fnary/rowall.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2019 H2O.ai +// Copyright 2019-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -20,27 +20,67 @@ // IN THE SOFTWARE. //------------------------------------------------------------------------------ #include -#include "expr/fnary/fnary.h" #include "column/const.h" #include "column/func_nary.h" +#include "expr/fnary/fnary.h" +#include "python/xargs.h" namespace dt { namespace expr { +std::string FExpr_RowAll::name() const { + return "rowall"; +} + + + +static bool op_rowall(size_t i, int8_t* out, const colvec& columns) { + for (const auto& col : columns) { + int8_t x; + bool xvalid = col.get_element(i, &x); + if (!xvalid || x == 0) { + *out = 0; + return true; + } + } + *out = 1; + return true; +} + + +Column FExpr_RowAll::apply_function(colvec&& columns) const { + if (columns.empty()) { + return Const_ColumnImpl::make_bool_column(1, true); + } + size_t nrows = columns[0].nrows(); + for (size_t i = 0; i < columns.size(); ++i) { + xassert(columns[i].nrows() == nrows); + if (columns[i].stype() != SType::BOOL) { + throw TypeError() << "Function `rowall` requires a sequence of boolean " + "columns, however column " << i << " has type `" + << columns[i].stype() << "`"; + } + } + return Column(new FuncNary_ColumnImpl( + std::move(columns), op_rowall, nrows, SType::BOOL)); +} + + static const char* doc_rowall = -R"(rowall(cols) +R"(rowall(*cols) -- For each row in `cols` return `True` if all values in that row are `True`, or otherwise return `False`. + Parameters ---------- -cols: Expr +cols: FExpr[bool] Input boolean columns. -return: Expr +return: FExpr[bool] f-expression consisting of one boolean column that has the same number of rows as in `cols`. @@ -48,19 +88,15 @@ except: TypeError The exception is raised when one of the columns from `cols` has a non-boolean type. -See Also --------- -- :func:`rowany()` -- row-wise `any() `_ function. - -Example -------- +Examples +-------- :: >>> from datatable import dt, f - >>> DT = dt.Frame({"A":[True, True], - ... "B":[True, False], - ... "C":[True, True]}) + >>> DT = dt.Frame({"A": [True, True], + ... "B": [True, False], + ... "C": [True, True]}) >>> DT | A B C | bool8 bool8 bool8 @@ -69,7 +105,6 @@ Example 1 | 1 0 1 [2 rows x 3 columns] - :: >>> DT[:, dt.rowall(f[:])] @@ -81,44 +116,16 @@ Example [2 rows x 1 column] +See Also +-------- +- :func:`rowany()` -- row-wise `any() `_ function. )"; -py::PKArgs args_rowall(0, 0, 0, true, false, {}, "rowall", doc_rowall); - - - -static bool op_rowall(size_t i, int8_t* out, const colvec& columns) { - for (const auto& col : columns) { - int8_t x; - bool xvalid = col.get_element(i, &x); - if (!xvalid || x == 0) { - *out = 0; - return true; - } - } - *out = 1; - return true; -} - - - -Column naryop_rowall(colvec&& columns) { - if (columns.empty()) { - return Const_ColumnImpl::make_bool_column(1, true); - } - size_t nrows = columns[0].nrows(); - for (size_t i = 0; i < columns.size(); ++i) { - xassert(columns[i].nrows() == nrows); - if (columns[i].stype() != SType::BOOL) { - throw TypeError() << "Function `rowall` requires a sequence of boolean " - "columns, however column " << i << " has type `" - << columns[i].stype() << "`"; - } - } - - return Column(new FuncNary_ColumnImpl( - std::move(columns), op_rowall, nrows, SType::BOOL)); -} +DECLARE_PYFN(&py_rowfn) + ->name("rowall") + ->docs(doc_rowall) + ->allow_varargs() + ->add_info(FN_ROWALL); diff --git a/src/core/expr/fnary/rowany.cc b/src/core/expr/fnary/rowany.cc index 588a7952ec..ff29ba046a 100644 --- a/src/core/expr/fnary/rowany.cc +++ b/src/core/expr/fnary/rowany.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2019 H2O.ai +// Copyright 2019-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -20,16 +20,55 @@ // IN THE SOFTWARE. //------------------------------------------------------------------------------ #include -#include "expr/fnary/fnary.h" #include "column/const.h" #include "column/func_nary.h" +#include "expr/fnary/fnary.h" +#include "python/xargs.h" namespace dt { namespace expr { +std::string FExpr_RowAny::name() const { + return "rowany"; +} + + + +static bool op_rowany(size_t i, int8_t* out, const colvec& columns) { + for (const auto& col : columns) { + int8_t x; + bool xvalid = col.get_element(i, &x); + if (xvalid && x) { + *out = 1; + return true; + } + } + *out = 0; + return true; +} + + + +Column FExpr_RowAny::apply_function(colvec&& columns) const { + if (columns.empty()) { + return Const_ColumnImpl::make_bool_column(1, true); + } + size_t nrows = columns[0].nrows(); + for (size_t i = 0; i < columns.size(); ++i) { + xassert(columns[i].nrows() == nrows); + if (columns[i].stype() != SType::BOOL) { + throw TypeError() << "Function `rowany` requires a sequence of boolean " + "columns, however column " << i << " has type `" + << columns[i].stype() << "`"; + } + } + return Column(new FuncNary_ColumnImpl( + std::move(columns), op_rowany, nrows, SType::BOOL)); +} + static const char* doc_rowany = -R"(rowany(cols) +R"(rowany(*cols) -- For each row in `cols` return `True` if any of the values in that row @@ -37,12 +76,13 @@ are `True`, or otherwise return `False`. The function uses shortcut evaluation: if the `True` value is found in one of the columns, then the subsequent columns are skipped. + Parameters ---------- -cols: Expr +cols: FExpr[bool] Input boolean columns. -return: Expr +return: FExpr[bool] f-expression consisting of one boolean column that has the same number of rows as in `cols`. @@ -50,13 +90,9 @@ except: TypeError The exception is raised when one of the columns from `cols` has a non-boolean type. -See Also --------- - -- :func:`rowall()` -- row-wise `all() `_ function. -Example -------- +Examples +-------- :: >>> from datatable import dt, f @@ -71,7 +107,6 @@ Example 1 | 1 0 1 [2 rows x 3 columns] - :: >>> DT[:, dt.rowany(f[:])] @@ -83,44 +118,17 @@ Example [2 rows x 1 column] +See Also +-------- +- :func:`rowall()` -- row-wise `all() `_ function. )"; -py::PKArgs args_rowany(0, 0, 0, true, false, {}, "rowany", doc_rowany); - - - -static bool op_rowany(size_t i, int8_t* out, const colvec& columns) { - for (const auto& col : columns) { - int8_t x; - bool xvalid = col.get_element(i, &x); - if (xvalid && x) { - *out = 1; - return true; - } - } - *out = 0; - return true; -} - - - -Column naryop_rowany(colvec&& columns) { - if (columns.empty()) { - return Const_ColumnImpl::make_bool_column(1, true); - } - size_t nrows = columns[0].nrows(); - for (size_t i = 0; i < columns.size(); ++i) { - xassert(columns[i].nrows() == nrows); - if (columns[i].stype() != SType::BOOL) { - throw TypeError() << "Function `rowany` requires a sequence of boolean " - "columns, however column " << i << " has type `" - << columns[i].stype() << "`"; - } - } - return Column(new FuncNary_ColumnImpl( - std::move(columns), op_rowany, nrows, SType::BOOL)); -} +DECLARE_PYFN(&py_rowfn) + ->name("rowany") + ->docs(doc_rowany) + ->allow_varargs() + ->add_info(FN_ROWANY); diff --git a/src/core/expr/fnary/rowcount.cc b/src/core/expr/fnary/rowcount.cc index b73ece94fa..07963910c3 100644 --- a/src/core/expr/fnary/rowcount.cc +++ b/src/core/expr/fnary/rowcount.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2019 H2O.ai +// Copyright 2019-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -20,37 +20,69 @@ // IN THE SOFTWARE. //------------------------------------------------------------------------------ #include -#include "expr/fnary/fnary.h" -#include "expr/funary/umaker.h" #include "column/const.h" #include "column/func_nary.h" +#include "expr/fnary/fnary.h" +#include "expr/funary/umaker.h" +#include "python/xargs.h" namespace dt { namespace expr { +std::string FExpr_RowCount::name() const { + return "rowcount"; +} + + + +static bool op_rowcount(size_t i, int32_t* out, const colvec& columns) { + int32_t valid_count = static_cast(columns.size()); + for (const auto& col : columns) { + int8_t x; + // each column is ISNA(col), so the return value is 1 if + // the value in the original column is NA. + col.get_element(i, &x); + valid_count -= x; + } + *out = valid_count; + return true; +} + + +Column FExpr_RowCount::apply_function(colvec&& columns) const { + if (columns.empty()) { + return Const_ColumnImpl::make_int_column(1, 0, SType::INT32); + } + size_t nrows = columns[0].nrows(); + for (size_t i = 0; i < columns.size(); ++i) { + xassert(columns[i].nrows() == nrows); + columns[i] = unaryop(Op::ISNA, std::move(columns[i])); + } + return Column(new FuncNary_ColumnImpl( + std::move(columns), op_rowcount, nrows, SType::INT32)); +} + + static const char* doc_rowcount = -R"(rowcount(cols) +R"(rowcount(*cols) -- For each row, count the number of non-missing values in `cols`. + Parameters ---------- -cols: Expr +cols: FExpr Input columns. -return: Expr +return: FExpr f-expression consisting of one `int32` column and the same number of rows as in `cols`. -See Also --------- - -- :func:`rowsum()` -- sum of all values row-wise. -Example -------- +Examples +-------- :: >>> from datatable import dt, f @@ -83,39 +115,17 @@ Note the exclusion of null values in the count:: 4 | 2 [5 rows x 1 column] -)"; - -py::PKArgs args_rowcount(0, 0, 0, true, false, {}, "rowcount", doc_rowcount); - - - -static bool op_rowcount(size_t i, int32_t* out, const colvec& columns) { - int32_t valid_count = static_cast(columns.size()); - for (const auto& col : columns) { - int8_t x; - // each column is ISNA(col), so the return value is 1 if - // the value in the original column is NA. - col.get_element(i, &x); - valid_count -= x; - } - *out = valid_count; - return true; -} - +See Also +-------- +- :func:`rowsum()` -- sum of all values row-wise. +)"; -Column naryop_rowcount(colvec&& columns) { - if (columns.empty()) { - return Const_ColumnImpl::make_int_column(1, 0, SType::INT32); - } - size_t nrows = columns[0].nrows(); - for (size_t i = 0; i < columns.size(); ++i) { - xassert(columns[i].nrows() == nrows); - columns[i] = unaryop(Op::ISNA, std::move(columns[i])); - } - return Column(new FuncNary_ColumnImpl( - std::move(columns), op_rowcount, nrows, SType::INT32)); -} +DECLARE_PYFN(&py_rowfn) + ->name("rowcount") + ->docs(doc_rowcount) + ->allow_varargs() + ->add_info(FN_ROWCOUNT); diff --git a/src/core/expr/fnary/rowfirstlast.cc b/src/core/expr/fnary/rowfirstlast.cc index 77031f24e8..b40d781f46 100644 --- a/src/core/expr/fnary/rowfirstlast.cc +++ b/src/core/expr/fnary/rowfirstlast.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2019 H2O.ai +// Copyright 2019-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -20,37 +20,98 @@ // IN THE SOFTWARE. //------------------------------------------------------------------------------ #include -#include "expr/fnary/fnary.h" #include "column/const.h" #include "column/func_nary.h" +#include "expr/fnary/fnary.h" +#include "python/xargs.h" namespace dt { namespace expr { +template +std::string FExpr_RowFirstLast::name() const { + return FIRST? "rowfirst" : "rowlast"; +} + + + +template +static bool op_rowfirstlast(size_t i, T* out, const colvec& columns) { + size_t n = columns.size(); + for (size_t j = 0; j < n; ++j) { + bool xvalid = columns[FIRST? j : n - j - 1].get_element(i, out); + if (xvalid) return true; + } + return false; +} + + +template +static inline Column _rowfirstlast(colvec&& columns, SType outtype) { + auto fn = op_rowfirstlast; + size_t nrows = columns[0].nrows(); + return Column(new FuncNary_ColumnImpl( + std::move(columns), fn, nrows, outtype)); +} + + +template +Column FExpr_RowFirstLast::apply_function(colvec&& columns) const { + if (columns.empty()) { + return Const_ColumnImpl::make_na_column(1); + } + + // Detect common stype + SType stype0 = SType::VOID; + for (const auto& col : columns) { + stype0 = common_stype(stype0, col.stype()); + } + if (stype0 == SType::INVALID) { + throw TypeError() << "Incompatible column types in function `" << name() << "`"; + } + promote_columns(columns, stype0); + + switch (stype0) { + case SType::BOOL: return _rowfirstlast(std::move(columns), stype0); + case SType::INT8: return _rowfirstlast(std::move(columns), stype0); + case SType::INT16: return _rowfirstlast(std::move(columns), stype0); + case SType::INT32: return _rowfirstlast(std::move(columns), stype0); + case SType::INT64: return _rowfirstlast(std::move(columns), stype0); + case SType::FLOAT32: return _rowfirstlast(std::move(columns), stype0); + case SType::FLOAT64: return _rowfirstlast(std::move(columns), stype0); + case SType::STR32: + case SType::STR64: return _rowfirstlast(std::move(columns), stype0); + default: { + throw TypeError() << "Unknown type " << stype0; + } + } +} + +template class FExpr_RowFirstLast; +template class FExpr_RowFirstLast; + + static const char* doc_rowfirst = -R"(rowfirst(cols) +R"(rowfirst(*cols) -- For each row, find the first non-missing value in `cols`. If all values in a row are missing, then this function will also produce a missing value. + Parameters ---------- -cols: Expr +cols: FExpr Input columns. -return: Expr +return: FExpr f-expression consisting of one column and the same number of rows as in `cols`. except: TypeError The exception is raised when input columns have incompatible types. -See Also --------- - -- :func:`rowlast()` -- find the last non-missing value row-wise. Examples -------- @@ -59,7 +120,7 @@ Examples >>> from datatable import dt, f >>> DT = dt.Frame({"A": [1, 1, 2, 1, 2], ... "B": [None, 2, 3, 4, None], - ... "C":[True, False, False, True, True]}) + ... "C": [True, False, False, True, True]}) >>> DT | A B C | int32 int32 bool8 @@ -84,7 +145,6 @@ Examples 4 | 2 [5 rows x 1 column] - :: >>> DT[:, dt.rowfirst(f['B', 'C'])] @@ -97,16 +157,22 @@ Examples 3 | 4 4 | 1 [5 rows x 1 column] + + +See Also +-------- +- :func:`rowlast()` -- find the last non-missing value row-wise. )"; static const char* doc_rowlast = -R"(rowlast(cols) +R"(rowlast(*cols) -- For each row, find the last non-missing value in `cols`. If all values in a row are missing, then this function will also produce a missing value. + Parameters ---------- cols: Expr @@ -119,11 +185,6 @@ return: Expr except: TypeError The exception is raised when input columns have incompatible types. -See Also --------- - -- :func:`rowfirst()` -- find the first non-missing value row-wise. - Examples -------- @@ -183,69 +244,24 @@ Examples 3 | 4 4 | 1 [5 rows x 1 column] -)"; - - -py::PKArgs args_rowfirst(0, 0, 0, true, false, {}, "rowfirst", doc_rowfirst); -py::PKArgs args_rowlast(0, 0, 0, true, false, {}, "rowlast", doc_rowlast); - - - -template -static bool op_rowfirstlast(size_t i, T* out, const colvec& columns) { - size_t n = columns.size(); - for (size_t j = 0; j < n; ++j) { - bool xvalid = columns[FIRST? j : n - j - 1].get_element(i, out); - if (xvalid) return true; - } - return false; -} - - -template -static inline Column _rowfirstlast(colvec&& columns, SType outtype, bool FIRST) -{ - auto fn = FIRST? op_rowfirstlast - : op_rowfirstlast; - size_t nrows = columns[0].nrows(); - return Column(new FuncNary_ColumnImpl( - std::move(columns), fn, nrows, outtype)); -} - - - -Column naryop_rowfirstlast(colvec&& columns, bool FIRST) { - if (columns.empty()) { - return Const_ColumnImpl::make_na_column(1); - } - const char* fnname = FIRST? "rowfirst" : "rowlast"; - // Detect common stype - SType stype0 = SType::VOID; - for (const auto& col : columns) { - stype0 = common_stype(stype0, col.stype()); - } - if (stype0 == SType::INVALID) { - throw TypeError() << "Incompatible column types in function `" << fnname << "`"; - } - promote_columns(columns, stype0); - switch (stype0) { - case SType::BOOL: return _rowfirstlast(std::move(columns), stype0, FIRST); - case SType::INT8: return _rowfirstlast(std::move(columns), stype0, FIRST); - case SType::INT16: return _rowfirstlast(std::move(columns), stype0, FIRST); - case SType::INT32: return _rowfirstlast(std::move(columns), stype0, FIRST); - case SType::INT64: return _rowfirstlast(std::move(columns), stype0, FIRST); - case SType::FLOAT32: return _rowfirstlast(std::move(columns), stype0, FIRST); - case SType::FLOAT64: return _rowfirstlast(std::move(columns), stype0, FIRST); - case SType::STR32: - case SType::STR64: return _rowfirstlast(std::move(columns), stype0, FIRST); - default: { - throw TypeError() << "Unknown type " << stype0; - } - } -} +See Also +-------- +- :func:`rowfirst()` -- find the first non-missing value row-wise. +)"; +DECLARE_PYFN(&py_rowfn) + ->name("rowfirst") + ->docs(doc_rowfirst) + ->allow_varargs() + ->add_info(FN_ROWFIRST); + +DECLARE_PYFN(&py_rowfn) + ->name("rowlast") + ->docs(doc_rowlast) + ->allow_varargs() + ->add_info(FN_ROWLAST); diff --git a/src/core/expr/fnary/rowmean.cc b/src/core/expr/fnary/rowmean.cc index 3e8fe797b5..b7dfcdcc00 100644 --- a/src/core/expr/fnary/rowmean.cc +++ b/src/core/expr/fnary/rowmean.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2019-2020 H2O.ai +// Copyright 2019-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -20,44 +20,17 @@ // IN THE SOFTWARE. //------------------------------------------------------------------------------ #include -#include "expr/fnary/fnary.h" #include "column/const.h" #include "column/func_nary.h" +#include "expr/fnary/fnary.h" +#include "python/xargs.h" namespace dt { namespace expr { - -static const char* doc_rowmean = -R"(rowmean(cols) --- - -For each row, find the mean values among the columns from `cols` -skipping missing values. If a row contains only the missing values, -this function will produce a missing value too. - -Parameters ----------- -cols: Expr - Input columns. - -return: Expr - f-expression consisting of one column that has the same number of rows - as in `cols`. The column stype is `float32` when all the `cols` - are `float32`, and `float64` in all the other cases. - -except: TypeError - The exception is raised when `cols` has non-numeric columns. - -See Also --------- - -- :func:`rowsd()` -- calculate the standard deviation row-wise. - -)"; - -py::PKArgs args_rowmean(0, 0, 0, true, false, {}, "rowmean", doc_rowmean); - +std::string FExpr_RowMean::name() const { + return "rowmean"; +} template @@ -88,11 +61,11 @@ static inline Column _rowmean(colvec&& columns) { } -Column naryop_rowmean(colvec&& columns) { +Column FExpr_RowMean::apply_function(colvec&& columns) const { if (columns.empty()) { return Const_ColumnImpl::make_na_column(1); } - SType res_stype = detect_common_numeric_stype(columns, "rowmean"); + SType res_stype = common_numeric_stype(columns); if (res_stype == SType::INT32 || res_stype == SType::INT64) { res_stype = SType::FLOAT64; } @@ -109,5 +82,41 @@ Column naryop_rowmean(colvec&& columns) { +static const char* doc_rowmean = +R"(rowmean(*cols) +-- + +For each row, find the mean values among the columns from `cols` +skipping missing values. If a row contains only the missing values, +this function will produce a missing value too. + + +Parameters +---------- +cols: FExpr + Input columns. + +return: FExpr + f-expression consisting of one column that has the same number of rows + as in `cols`. The column stype is `float32` when all the `cols` + are `float32`, and `float64` in all the other cases. + +except: TypeError + The exception is raised when `cols` has non-numeric columns. + + +See Also +-------- +- :func:`rowsd()` -- calculate the standard deviation row-wise. +)"; + +DECLARE_PYFN(&py_rowfn) + ->name("rowmean") + ->docs(doc_rowmean) + ->allow_varargs() + ->add_info(FN_ROWMEAN); + + + }} // namespace dt::expr diff --git a/src/core/expr/fnary/rowminmax.cc b/src/core/expr/fnary/rowminmax.cc index 145845ac68..6f96377834 100644 --- a/src/core/expr/fnary/rowminmax.cc +++ b/src/core/expr/fnary/rowminmax.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2019 H2O.ai +// Copyright 2019-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -20,26 +20,89 @@ // IN THE SOFTWARE. //------------------------------------------------------------------------------ #include -#include "expr/fnary/fnary.h" #include "column/const.h" #include "column/func_nary.h" +#include "expr/fnary/fnary.h" +#include "python/xargs.h" namespace dt { namespace expr { +template +std::string FExpr_RowMinMax::name() const { + return MIN? "rowmin" : "rowmax"; +} + + + +template +static bool op_rowminmax(size_t i, T* out, const colvec& columns) { + bool minmax_valid = false; + T minmax = 0; + for (const auto& col : columns) { + T x; + bool xvalid = col.get_element(i, &x); + if (!xvalid) continue; + if (minmax_valid) { + if (MIN) { if (x < minmax) minmax = x; } + else { if (x > minmax) minmax = x; } + } else { + minmax = x; + minmax_valid = true; + } + } + *out = minmax; + return minmax_valid; +} + + +template +static inline Column _rowminmax(colvec&& columns) { + auto fn = op_rowminmax; + size_t nrows = columns[0].nrows(); + return Column(new FuncNary_ColumnImpl( + std::move(columns), fn, nrows, stype_from)); +} + + +template +Column FExpr_RowMinMax::apply_function(colvec&& columns) const { + if (columns.empty()) { + return Const_ColumnImpl::make_na_column(1); + } + SType res_stype = common_numeric_stype(columns); + promote_columns(columns, res_stype); + + switch (res_stype) { + case SType::INT32: return _rowminmax(std::move(columns)); + case SType::INT64: return _rowminmax(std::move(columns)); + case SType::FLOAT32: return _rowminmax(std::move(columns)); + case SType::FLOAT64: return _rowminmax(std::move(columns)); + default: throw RuntimeError() + << "Wrong `res_stype` in `naryop_rowminmax()`: " + << res_stype; // LCOV_EXCL_LINE + } +} + +template class FExpr_RowMinMax; +template class FExpr_RowMinMax; + + + static const char* doc_rowmin = -R"(rowmin(cols) +R"(rowmin(*cols) -- For each row, find the smallest value among the columns from `cols`, excluding missing values. + Parameters ---------- -cols: Expr +cols: FExpr Input columns. -return: Expr +return: FExpr f-expression consisting of one column that has the same number of rows as in `cols`. The column stype is the smallest common stype for `cols`, but not less than `int32`. @@ -47,13 +110,9 @@ return: Expr except: TypeError The exception is raised when `cols` has non-numeric columns. -See Also --------- - -- :func:`rowmax()` -- find the largest element row-wise. -Example -------- +Examples +-------- :: >>> from datatable import dt, f @@ -85,21 +144,25 @@ Example [5 rows x 1 column] +See Also +-------- +- :func:`rowmax()` -- find the largest element row-wise. )"; static const char* doc_rowmax = -R"(rowmax(cols) +R"(rowmax(*cols) -- For each row, find the largest value among the columns from `cols`. + Parameters ---------- -cols: Expr +cols: FExpr Input columns. -return: Expr +return: FExpr f-expression consisting of one column that has the same number of rows as in `cols`. The column stype is the smallest common stype for `cols`, but not less than `int32`. @@ -107,13 +170,9 @@ return: Expr except: TypeError The exception is raised when `cols` has non-numeric columns. -See Also --------- - -- :func:`rowmin()` -- find the smallest element row-wise. -Example -------- +Examples +-------- :: >>> from datatable import dt, f @@ -145,62 +204,22 @@ Example [5 rows x 1 column] +See Also +-------- +- :func:`rowmin()` -- find the smallest element row-wise. )"; -py::PKArgs args_rowmin(0, 0, 0, true, false, {}, "rowmin", doc_rowmin); -py::PKArgs args_rowmax(0, 0, 0, true, false, {}, "rowmax", doc_rowmax); - - - -template -static bool op_rowminmax(size_t i, T* out, const colvec& columns) { - bool minmax_valid = false; - T minmax = 0; - for (const auto& col : columns) { - T x; - bool xvalid = col.get_element(i, &x); - if (!xvalid) continue; - if (minmax_valid) { - if (MIN) { if (x < minmax) minmax = x; } - else { if (x > minmax) minmax = x; } - } else { - minmax = x; - minmax_valid = true; - } - } - *out = minmax; - return minmax_valid; -} - - -template -static inline Column _rowminmax(colvec&& columns, bool MIN) { - auto fn = MIN? op_rowminmax - : op_rowminmax; - size_t nrows = columns[0].nrows(); - return Column(new FuncNary_ColumnImpl( - std::move(columns), fn, nrows, stype_from)); -} - - -Column naryop_rowminmax(colvec&& columns, bool MIN) { - if (columns.empty()) { - return Const_ColumnImpl::make_na_column(1); - } - const char* fnname = MIN? "rowmin" : "rowmax"; - SType res_stype = detect_common_numeric_stype(columns, fnname); - promote_columns(columns, res_stype); - - switch (res_stype) { - case SType::INT32: return _rowminmax(std::move(columns), MIN); - case SType::INT64: return _rowminmax(std::move(columns), MIN); - case SType::FLOAT32: return _rowminmax(std::move(columns), MIN); - case SType::FLOAT64: return _rowminmax(std::move(columns), MIN); - default: throw RuntimeError() - << "Wrong `res_stype` in `naryop_rowminmax()`: " - << res_stype; // LCOV_EXCL_LINE - } -} +DECLARE_PYFN(&py_rowfn) + ->name("rowmin") + ->docs(doc_rowmin) + ->allow_varargs() + ->add_info(FN_ROWMIN); + +DECLARE_PYFN(&py_rowfn) + ->name("rowmax") + ->docs(doc_rowmax) + ->allow_varargs() + ->add_info(FN_ROWMAX); diff --git a/src/core/expr/fnary/rowsd.cc b/src/core/expr/fnary/rowsd.cc index 37ffc3e82b..097aafa8fa 100644 --- a/src/core/expr/fnary/rowsd.cc +++ b/src/core/expr/fnary/rowsd.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2019-2020 H2O.ai +// Copyright 2019-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -20,44 +20,17 @@ // IN THE SOFTWARE. //------------------------------------------------------------------------------ #include -#include "expr/fnary/fnary.h" #include "column/const.h" #include "column/func_nary.h" +#include "expr/fnary/fnary.h" +#include "python/xargs.h" namespace dt { namespace expr { - -static const char* doc_rowsd = -R"(rowsd(cols) --- - -For each row, find the standard deviation among the columns from `cols` -skipping missing values. If a row contains only the missing values, -this function will produce a missing value too. - -Parameters ----------- -cols: Expr - Input columns. - -return: Expr - f-expression consisting of one column that has the same number of rows - as in `cols`. The column stype is `float32` when all the `cols` - are `float32`, and `float64` in all the other cases. - -except: TypeError - The exception is raised when `cols` has non-numeric columns. - -See Also --------- - -- :func:`rowmean()` -- calculate the mean value row-wise. - -)"; - -py::PKArgs args_rowsd(0, 0, 0, true, false, {}, "rowsd", doc_rowsd); - +std::string FExpr_RowSd::name() const { + return "rowsd"; +} template @@ -92,11 +65,11 @@ static inline Column _rowsd(colvec&& columns) { } -Column naryop_rowsd(colvec&& columns) { +Column FExpr_RowSd::apply_function(colvec&& columns) const { if (columns.empty()) { return Const_ColumnImpl::make_na_column(1); } - SType res_stype = detect_common_numeric_stype(columns, "rowsd"); + SType res_stype = common_numeric_stype(columns); if (res_stype == SType::INT32 || res_stype == SType::INT64) { res_stype = SType::FLOAT64; } @@ -114,4 +87,40 @@ Column naryop_rowsd(colvec&& columns) { +static const char* doc_rowsd = +R"(rowsd(*cols) +-- + +For each row, find the standard deviation among the columns from `cols` +skipping missing values. If a row contains only the missing values, +this function will produce a missing value too. + + +Parameters +---------- +cols: FExpr + Input columns. + +return: FExpr + f-expression consisting of one column that has the same number of rows + as in `cols`. The column stype is `float32` when all the `cols` + are `float32`, and `float64` in all the other cases. + +except: TypeError + The exception is raised when `cols` has non-numeric columns. + + +See Also +-------- +- :func:`rowmean()` -- calculate the mean value row-wise. +)"; + +DECLARE_PYFN(&py_rowfn) + ->name("rowsd") + ->docs(doc_rowsd) + ->allow_varargs() + ->add_info(FN_ROWSD); + + + }} // namespace dt::expr diff --git a/src/core/expr/fnary/rowsum.cc b/src/core/expr/fnary/rowsum.cc index e420d3e97c..24a409bbda 100644 --- a/src/core/expr/fnary/rowsum.cc +++ b/src/core/expr/fnary/rowsum.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2019 H2O.ai +// Copyright 2019-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -23,42 +23,15 @@ #include "column/func_nary.h" #include "expr/fnary/fnary.h" #include "models/utils.h" +#include "python/xargs.h" namespace dt { namespace expr { -static const char* doc_rowsum = -R"(rowsum(cols) --- - -For each row, calculate the sum of all values in `cols`. Missing values -are treated as if they are zeros and skipped during the calcultion. - -Parameters ----------- -cols: Expr - Input columns. - -return: Expr - f-expression consisting of one column and the same number - of rows as in `cols`. The stype of the resulting column - will be the smallest common stype calculated for `cols`, - but not less than `int32`. - -except: TypeError - The exception is raised when one of the columns from `cols` - has a non-numeric type. - -See Also --------- - -- :func:`rowcount()` -- count non-missing values row-wise. - -)"; - -py::PKArgs args_rowsum(0, 0, 0, true, false, {}, "rowsum", doc_rowsum); - +std::string FExpr_RowSum::name() const { + return "rowsum"; +} template @@ -84,11 +57,11 @@ static inline Column _rowsum(colvec&& columns) { } -Column naryop_rowsum(colvec&& columns) { +Column FExpr_RowSum::apply_function(colvec&& columns) const { if (columns.empty()) { return Const_ColumnImpl::make_int_column(1, 0, SType::INT32); } - SType res_stype = detect_common_numeric_stype(columns, "rowsum"); + SType res_stype = common_numeric_stype(columns); promote_columns(columns, res_stype); switch (res_stype) { @@ -104,5 +77,42 @@ Column naryop_rowsum(colvec&& columns) { +static const char* doc_rowsum = +R"(rowsum(*cols) +-- + +For each row, calculate the sum of all values in `cols`. Missing values +are treated as if they are zeros and skipped during the calcultion. + + +Parameters +---------- +cols: FExpr + Input columns. + +return: FExpr + f-expression consisting of one column and the same number + of rows as in `cols`. The stype of the resulting column + will be the smallest common stype calculated for `cols`, + but not less than `int32`. + +except: TypeError + The exception is raised when one of the columns from `cols` + has a non-numeric type. + + +See Also +-------- +- :func:`rowcount()` -- count non-missing values row-wise. +)"; + +DECLARE_PYFN(&py_rowfn) + ->name("rowsum") + ->docs(doc_rowsum) + ->allow_varargs() + ->add_info(FN_ROWSUM); + + + }} // namespace dt::expr diff --git a/src/core/expr/head_func.cc b/src/core/expr/head_func.cc index 7317068896..3a53f37f53 100644 --- a/src/core/expr/head_func.cc +++ b/src/core/expr/head_func.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2019-2020 H2O.ai +// Copyright 2019-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -142,12 +142,6 @@ static ptrHead make_reduce2(Op op, const py::otuple& params) { } -static ptrHead make_rowfn(Op op, const py::otuple& params) { - xassert(params.size() == 0); - (void) params; - return ptrHead(new Head_Func_Nary(op)); -} - std::unordered_map Head_Func::factory; @@ -157,7 +151,6 @@ void Head_Func::init() { for (size_t i = BINOP_FIRST; i <= BINOP_LAST; ++i) factory[i] = make_binop; for (size_t i = REDUCER_FIRST; i <= REDUCER_LAST; ++i) factory[i] = make_reduce1; for (size_t i = MATH_FIRST; i <= MATH_LAST; ++i) factory[i] = make_unop; - for (size_t i = ROWFNS_FIRST; i <= ROWFNS_LAST; ++i) factory[i] = make_rowfn; factory[static_cast(Op::SETPLUS)] = make_colsetop; factory[static_cast(Op::SETMINUS)] = make_colsetop; factory[static_cast(Op::SHIFTFN)] = &Head_Func_Shift::make; diff --git a/src/core/expr/head_func.h b/src/core/expr/head_func.h index 04a3457f4c..3b2cb7a81d 100644 --- a/src/core/expr/head_func.h +++ b/src/core/expr/head_func.h @@ -91,17 +91,6 @@ class Head_Func_Binary : public Head_Func { -class Head_Func_Nary : public Head_Func { - private: - Op op_; - - public: - explicit Head_Func_Nary(Op); - Workframe evaluate_n(const vecExpr&, EvalContext&) const override; -}; - - - class Head_Func_Shift : public Head_Func { private: int shift_; diff --git a/src/core/expr/head_func_nary.cc b/src/core/expr/head_func_nary.cc deleted file mode 100644 index eed665c0ac..0000000000 --- a/src/core/expr/head_func_nary.cc +++ /dev/null @@ -1,59 +0,0 @@ -//------------------------------------------------------------------------------ -// Copyright 2019-2020 H2O.ai -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//------------------------------------------------------------------------------ -#include "expr/fnary/fnary.h" -#include "expr/expr.h" -#include "expr/head_func.h" -#include "expr/workframe.h" -#include "utils/assert.h" -#include "utils/exceptions.h" -namespace dt { -namespace expr { - - -Head_Func_Nary::Head_Func_Nary(Op op) : op_(op) {} - - -Workframe Head_Func_Nary::evaluate_n( - const vecExpr& args, EvalContext& ctx) const -{ - Workframe inputs(ctx); - for (const auto& arg : args) { - inputs.cbind(arg->evaluate_n(ctx)); - } - - Grouping gmode = inputs.get_grouping_mode(); - std::vector columns; - columns.reserve(inputs.ncols()); - for (size_t i = 0; i < inputs.ncols(); ++i) { - columns.emplace_back(inputs.retrieve_column(i)); - } - - Column res = naryop(op_, std::move(columns)); - Workframe out(ctx); - out.add_column(std::move(res), "", gmode); - return out; -} - - - - -}} // namespace dt::expr diff --git a/src/core/expr/op.h b/src/core/expr/op.h index 2db729f9e9..e5ba43aaa5 100644 --- a/src/core/expr/op.h +++ b/src/core/expr/op.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2018-2019 H2O.ai +// Copyright 2018-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -36,8 +36,6 @@ static constexpr size_t REDUCER_FIRST = 401; static constexpr size_t REDUCER_LAST = 412; static constexpr size_t MATH_FIRST = 501; static constexpr size_t MATH_LAST = 554; -static constexpr size_t ROWFNS_FIRST = 601; -static constexpr size_t ROWFNS_LAST = 610; static constexpr size_t UNOP_COUNT = UNOP_LAST - UNOP_FIRST + 1; static constexpr size_t BINOP_COUNT = BINOP_LAST - BINOP_FIRST + 1; static constexpr size_t REDUCER_COUNT = REDUCER_LAST - REDUCER_FIRST + 1; @@ -148,18 +146,6 @@ enum class Op : size_t { FMOD, MAXIMUM, MINIMUM = MATH_LAST, - - // Row-functions - ROWALL = ROWFNS_FIRST, // fnary/rowall.cc - ROWANY, // fnary/rowany.cc - ROWCOUNT, // fnary/rowcount.cc - ROWFIRST, // fnary/rowfirst.cc - ROWLAST, // fnary/rowlast.cc - ROWMAX, // fnary/rowmax.cc - ROWMEAN, // fnary/rowmean.cc - ROWMIN, // fnary/rowmin.cc - ROWSD, // fnary/rowsd.cc - ROWSUM = ROWFNS_LAST, // fnary/rowsum.cc }; diff --git a/src/core/python/xargs.cc b/src/core/python/xargs.cc index dba1b323e3..2de08961e4 100644 --- a/src/core/python/xargs.cc +++ b/src/core/python/xargs.cc @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2018-2020 H2O.ai +// Copyright 2018-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -137,6 +137,11 @@ XArgs* XArgs::docs(const char* str) { return this; } +XArgs* XArgs::add_info(int info) { + info_ = info; + return this; +} + size_t XArgs::n_positional_args() const { return nargs_posonly_; @@ -163,6 +168,9 @@ const char* XArgs::arg_name(size_t i) const { return arg_names_[i]; } +int XArgs::get_info() const { + return info_; +} @@ -357,15 +365,21 @@ const Arg& XArgs::operator[](size_t i) const { return bound_args_[i]; } -size_t XArgs::num_varargs() const noexcept { - return n_varargs_; -} - size_t XArgs::num_varkwds() const noexcept { return n_varkwds_; } + + +//------------------------------------------------------------------------------ +// varargs +//------------------------------------------------------------------------------ + +size_t XArgs::num_varargs() const noexcept { + return n_varargs_; +} + py::robj XArgs::vararg(size_t i) const { xassert(i < n_varargs_); auto j = static_cast(i + n_bound_args_); @@ -373,6 +387,48 @@ py::robj XArgs::vararg(size_t i) const { return py::robj(PyTuple_GET_ITEM(args_tuple_, j)); } +XArgs::VarArgsIterable XArgs::varargs() const noexcept { + return XArgs::VarArgsIterable(*this); +} + + + +XArgs::VarArgsIterable::VarArgsIterable(const XArgs& args) + : parent_(args) {} + +XArgs::VarArgsIterator XArgs::VarArgsIterable::begin() const { + size_t i0 = parent_.n_bound_args_; + return XArgs::VarArgsIterator(parent_, i0); +} + +XArgs::VarArgsIterator XArgs::VarArgsIterable::end() const { + size_t i1 = parent_.n_bound_args_ + parent_.n_varargs_; + return XArgs::VarArgsIterator(parent_, i1); +} + + + +XArgs::VarArgsIterator::VarArgsIterator(const XArgs& args, size_t i0) + : parent_(args), pos_(static_cast(i0)) {} + +XArgs::VarArgsIterator& XArgs::VarArgsIterator::operator++() { + ++pos_; + return *this; +} + +py::robj XArgs::VarArgsIterator::operator*() const { + return py::robj(PyTuple_GET_ITEM(parent_.args_tuple_, pos_)); +} + +bool XArgs::VarArgsIterator::operator==(const VarArgsIterator& other) const { + return (pos_ == other.pos_); +} + +bool XArgs::VarArgsIterator::operator!=(const VarArgsIterator& other) const { + return (pos_ != other.pos_); +} + + diff --git a/src/core/python/xargs.h b/src/core/python/xargs.h index 80d26c7e2c..b33e6792ad 100644 --- a/src/core/python/xargs.h +++ b/src/core/python/xargs.h @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// Copyright 2018-2020 H2O.ai +// Copyright 2018-2021 H2O.ai // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -53,7 +53,8 @@ class XArgs : public ArgParent { bool has_varargs_; bool has_varkwds_; bool has_renamed_args_; - size_t : 40; + int : 8; + int info_; // custom user info that can be stored inside XArgs // Runtime arguments std::vector bound_args_; @@ -79,6 +80,7 @@ class XArgs : public ArgParent { XArgs* allow_varargs(); XArgs* allow_varkwds(); XArgs* docs(const char*); + XArgs* add_info(int); size_t n_positional_args() const override; size_t n_positional_or_keyword_args() const override; @@ -132,9 +134,40 @@ class XArgs : public ArgParent { size_t num_varargs() const noexcept; size_t num_varkwds() const noexcept; py::robj vararg(size_t i) const; + int get_info() const; + + + class VarArgsIterator { + private: + const XArgs& parent_; + Py_ssize_t pos_; // position within parent_'s arg_tuple_ + + public: + using value_type = py::robj; + using category_type = std::input_iterator_tag; + VarArgsIterator(const XArgs& args, size_t i0); + VarArgsIterator(const VarArgsIterator&) = default; + VarArgsIterator& operator=(const VarArgsIterator&) = delete; + VarArgsIterator& operator++(); + value_type operator*() const; + bool operator==(const VarArgsIterator& other) const; + bool operator!=(const VarArgsIterator& other) const; + }; + + class VarArgsIterable { + private: + const XArgs& parent_; + public: + using iterator = VarArgsIterator; + VarArgsIterable(const XArgs&); + iterator begin() const; + iterator end() const; + }; + + VarArgsIterable varargs() const noexcept; + // VarKwdsIterable varkwds() const noexcept; - // VarArgsIterable varargs() const noexcept; // template T get(size_t i) const; // template T get(size_t i, T default_value) const; @@ -148,17 +181,18 @@ class XArgs : public ArgParent { void bind(PyObject* args, PyObject* kwds); size_t find_kwd(PyObject* kwd); - // friend class VarArgsIterable; - // friend class VarArgsIterator; // friend class VarKwdsIterator; }; +#define PASTE_TOKENS(x, y) x ## y +#define PASTE_TOKENS2(x, y) PASTE_TOKENS(x, y) +#define ARGS_NAME PASTE_TOKENS2(args_, __LINE__) #define DECLARE_PYFN(fn) \ - static py::XArgs* args_ ## __LINE__ = (new py::XArgs(fn)) \ + static py::XArgs* ARGS_NAME = (new py::XArgs(fn)) \ ->pyfunction( \ [](PyObject*, PyObject* args, PyObject* kwds) -> PyObject* { \ - return (args_ ## __LINE__)->exec_function(args, kwds); \ + return ARGS_NAME->exec_function(args, kwds); \ }) diff --git a/src/datatable/expr/expr.py b/src/datatable/expr/expr.py index b02a62f2a0..5df60fafc1 100644 --- a/src/datatable/expr/expr.py +++ b/src/datatable/expr/expr.py @@ -134,18 +134,6 @@ class OpCodes(enum.Enum): MAXIMUM = 553 MINIMUM = 554 - # Row-functions - ROWALL = 601 - ROWANY = 602 - ROWCOUNT = 603 - ROWFIRST = 604 - ROWLAST = 605 - ROWMAX = 606 - ROWMEAN = 607 - ROWMIN = 608 - ROWSD = 609 - ROWSUM = 610 - #------------------------------------------------------------------------------- diff --git a/tests/ijby/test-rowwise.py b/tests/ijby/test-rowwise.py index 52d559f40a..43900bd08d 100644 --- a/tests/ijby/test-rowwise.py +++ b/tests/ijby/test-rowwise.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- #------------------------------------------------------------------------------- -# Copyright 2019-2020 H2O.ai +# Copyright 2019-2021 H2O.ai # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -34,21 +34,16 @@ stypes_float = ltype.real.stypes stypes_str = ltype.str.stypes +rowfns = [rowall, rowany, rowsum, rowcount, rowmin, rowmax, + rowfirst, rowlast, rowmean, rowsd] -def test_reprs(): - # Check that row-expressions can be repr'd without errors - assert repr(rowall()) - assert repr(rowany()) - assert repr(rowsum()) - assert repr(rowcount()) - assert repr(rowmin()) - assert repr(rowmax()) - assert repr(rowfirst()) - assert repr(rowlast()) - assert repr(rowmean()) - assert repr(rowsd()) - +@pytest.mark.parametrize('rowfn', rowfns) +def test_reprs(rowfn): + name = rowfn.__name__ + assert repr(rowfn()) == f"FExpr<{name}([])>" + assert repr(rowfn(f[:])) == f"FExpr<{name}(f[:])>" + assert repr(rowfn(f.A, f.B, f.C)) == f"FExpr<{name}([f.A, f.B, f.C])>" @@ -66,7 +61,7 @@ def test_rowall_simple(): def test_rowall_single_column(): DT = dt.Frame([[True, False, None, True]]) - RES = rowall(DT) + RES = DT[:, rowall(f[:])] assert_equals(RES, dt.Frame([True, False, False, True])) @@ -233,7 +228,7 @@ def test_rowminmax_floats(): def test_rowmean_simple(): DT = dt.Frame(A=range(5)) - assert_equals(rowmean(DT), dt.Frame(range(5), stype=dt.float64)) + assert_equals(DT[:, rowmean(f[:])], dt.Frame(range(5), stype=dt.float64)) def test_rowmean_floats(): @@ -250,7 +245,7 @@ def test_rowmean_wrong_types(): DT = dt.Frame(A=[3, 5, 6], B=["a", "d", "e"]) with pytest.raises(TypeError, match="Function rowmean expects a sequence " "of numeric columns"): - assert rowmean(DT) + assert DT[:, rowmean(f[:])] @@ -261,12 +256,14 @@ def test_rowmean_wrong_types(): def test_rowsd_single_column(): DT = dt.Frame(A=range(5)) - assert_equals(rowsd(DT), dt.Frame([math.nan]*5)) + RES = DT[:, rowsd(f[:])] + assert_equals(RES, dt.Frame([math.nan]*5)) def test_rowsd_same_columns(): DT = dt.Frame([range(5)] * 10) - assert_equals(rowsd(DT), dt.Frame([0.0]*5)) + RES = DT[:, rowsd(f[:])] + assert_equals(RES, dt.Frame([0.0]*5)) def test_rowsd_floats(): @@ -284,7 +281,7 @@ def test_rowsd_wrong_types(): DT = dt.Frame(A=[3, 5, 6], B=["a", "d", "e"]) with pytest.raises(TypeError, match="Function rowsd expects a sequence " "of numeric columns"): - assert rowsd(DT) + assert DT[:, rowsd(f[:])] @@ -322,16 +319,12 @@ def test_rowsum_different_types(): def test_rowsum_promote_to_float32(): DT = dt.Frame([[2], [7], [11]], stypes=[dt.int32, dt.float32, dt.int64]) - assert_equals(rowsum(DT), + assert_equals(DT[:, rowsum(f[:])], dt.Frame([20], stype=dt.float32)) def test_rowsum_promote_to_float64(): DT = dt.Frame([[2], [3], [1], [5], [None]], stypes=[dt.int8, dt.float64, dt.int64, dt.float32, dt.int16]) - assert_equals(rowsum(DT), + assert_equals(DT[:, rowsum(f[:])], dt.Frame([11], stype=dt.float64)) - - - -