Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Aggregator #1156

Merged
merged 20 commits into from
Jul 19, 2018
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 9 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ fast_objects = $(addprefix $(BUILDDIR)/, \
expr/py_expr.o \
expr/reduceop.o \
expr/unaryop.o \
extras/aggregator.o \
groupby.o \
jay/open_jay.o \
jay/save_jay.o \
Expand Down Expand Up @@ -654,6 +655,9 @@ $(BUILDDIR)/expr/py_expr.h: c/expr/py_expr.h $(BUILDDIR)/column.h $(BUILDDIR)/gr
@echo • Refreshing c/expr/py_expr.h
@cp c/expr/py_expr.h $@

$(BUILDDIR)/extras/aggregator.h: c/extras/aggregator.h $(BUILDDIR)/datatable.h $(BUILDDIR)/py_datatable.h $(BUILDDIR)/rowindex.h $(BUILDDIR)/types.h
@echo • Refreshing c/extras/aggregator.h
@cp c/extras/aggregator.h $@

$(BUILDDIR)/python/float.h: c/python/float.h $(BUILDDIR)/utils/pyobj.h
@echo • Refreshing c/python/float.h
Expand Down Expand Up @@ -802,7 +806,7 @@ $(BUILDDIR)/datatable_rbind.o : c/datatable_rbind.cc $(BUILDDIR)/column.h $(BUIL
@echo • Compiling $<
@$(CC) -c $< $(CCFLAGS) -o $@

$(BUILDDIR)/datatablemodule.o : c/datatablemodule.cc $(BUILDDIR)/capi.h $(BUILDDIR)/csv/py_csv.h $(BUILDDIR)/csv/writer.h $(BUILDDIR)/expr/py_expr.h $(BUILDDIR)/options.h $(BUILDDIR)/py_column.h $(BUILDDIR)/py_columnset.h $(BUILDDIR)/py_datatable.h $(BUILDDIR)/py_datawindow.h $(BUILDDIR)/py_encodings.h $(BUILDDIR)/py_groupby.h $(BUILDDIR)/py_rowindex.h $(BUILDDIR)/py_types.h $(BUILDDIR)/py_utils.h $(BUILDDIR)/utils/assert.h
$(BUILDDIR)/datatablemodule.o : c/datatablemodule.cc $(BUILDDIR)/capi.h $(BUILDDIR)/csv/py_csv.h $(BUILDDIR)/csv/writer.h $(BUILDDIR)/expr/py_expr.h $(BUILDDIR)/extras/aggregator.h $(BUILDDIR)/options.h $(BUILDDIR)/py_column.h $(BUILDDIR)/py_columnset.h $(BUILDDIR)/py_datatable.h $(BUILDDIR)/py_datawindow.h $(BUILDDIR)/py_encodings.h $(BUILDDIR)/py_groupby.h $(BUILDDIR)/py_rowindex.h $(BUILDDIR)/py_types.h $(BUILDDIR)/py_utils.h $(BUILDDIR)/utils/assert.h
@echo • Compiling $<
@$(CC) -c $< $(CCFLAGS) -o $@

Expand All @@ -825,6 +829,10 @@ $(BUILDDIR)/expr/reduceop.o : c/expr/reduceop.cc $(BUILDDIR)/expr/py_expr.h $(BU
$(BUILDDIR)/expr/unaryop.o : c/expr/unaryop.cc $(BUILDDIR)/expr/py_expr.h $(BUILDDIR)/types.h
@echo • Compiling $<
@$(CC) -c $< $(CCFLAGS) -o $@

$(BUILDDIR)/extras/aggregator.o : c/extras/aggregator.cc $(BUILDDIR)/extras/aggregator.h $(BUILDDIR)/py_utils.h $(BUILDDIR)/rowindex.h $(BUILDDIR)/types.h $(BUILDDIR)/utils/omp.h $(BUILDDIR)/utils/pyobj.h
@echo • Compiling $<
@$(CC) -c $< $(CCFLAGS) -o $@

$(BUILDDIR)/groupby.o : c/groupby.cc $(BUILDDIR)/utils/exceptions.h $(BUILDDIR)/groupby.h
@echo • Compiling $<
Expand Down
2 changes: 2 additions & 0 deletions c/datatablemodule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "py_types.h"
#include "py_utils.h"
#include "utils/assert.h"
#include "extras/aggregator.h"

extern void init_jay();

Expand Down Expand Up @@ -191,6 +192,7 @@ static PyMethodDef DatatableModuleMethods[] = {
METHODv(expr_unaryop),
METHOD0(is_debug_mode),
METHOD0(has_omp_support),
METHODv(aggregate),

{nullptr, nullptr, 0, nullptr} /* Sentinel */
};
Expand Down
41 changes: 41 additions & 0 deletions c/expr/reduceop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ enum OpCode {
Stdev = 4,
First = 5,
Sum = 6,
Count = 7,
};

template<typename T>
Expand Down Expand Up @@ -77,6 +78,24 @@ static void sum_skipna(const int32_t* groups, int32_t grp, void** params) {




//------------------------------------------------------------------------------
// Count calculation
//------------------------------------------------------------------------------

template<typename IT, typename OT>
static void count_skipna(const int32_t* groups, int32_t grp, void** params) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Traditionally, count(x) function returns the number of non-NA values in x.
In your implementation, however, the function doesn't count anything, but merely returns the number of elements in each group. It's a valid function, just not a suitable name...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reimplemented this function to match the existing name.

Column* col1 = static_cast<Column*>(params[1]);
OT* outputs = static_cast<OT*>(col1->data_w());

int32_t row0 = groups[grp];
int32_t row1 = groups[grp + 1];

outputs[grp] = static_cast<OT>(row1 - row0);
}



//------------------------------------------------------------------------------
// Mean calculation
//------------------------------------------------------------------------------
Expand Down Expand Up @@ -200,6 +219,7 @@ static gmapperfn resolve1(int opcode) {
case OpCode::Max: return max_skipna<T1>;
case OpCode::Stdev: return stdev_skipna<T1, T2>;
case OpCode::Sum: return sum_skipna<T1, T2>;
case OpCode::Count: return count_skipna<T1, T2>;
default: return nullptr;
}
}
Expand All @@ -218,6 +238,22 @@ static gmapperfn resolve0(int opcode, SType stype) {
default: return nullptr;
}
}

if (opcode == OpCode::Count) {
switch (stype) {
case SType::BOOL:
case SType::INT8: return count_skipna<int8_t, uint64_t>;
case SType::INT16: return count_skipna<int16_t, uint64_t>;
case SType::INT32: return count_skipna<int32_t, uint64_t>;
case SType::INT64: return count_skipna<int64_t, uint64_t>;
case SType::FLOAT32: return count_skipna<float, uint64_t>;
case SType::FLOAT64: return count_skipna<double, uint64_t>;
case SType::STR32: return count_skipna<int32_t, uint64_t>;
case SType::STR64: return count_skipna<int64_t, uint64_t>;
default: return nullptr;
}
}

switch (stype) {
case SType::BOOL:
case SType::INT8: return resolve1<int8_t, double>(opcode);
Expand Down Expand Up @@ -251,6 +287,11 @@ Column* reduceop(int opcode, Column* arg, const Groupby& groupby)
res_type = SType::INT64;
}
}

if (opcode == OpCode::Count) {
res_type = SType::INT64;
}

int32_t ngrps = static_cast<int32_t>(groupby.ngroups());
if (ngrps == 0) ngrps = 1;

Expand Down