-
Notifications
You must be signed in to change notification settings - Fork 154
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Aggregator #1156
Merged
Merged
Aggregator #1156
Changes from 17 commits
Commits
Show all changes
20 commits
Select commit
Hold shift + click to select a range
d2df55d
Initial commit of the aggregator (#1077)
fbd98cf
Categorical aggregations, except for the 2D case.
b50cbd7
Initial commit of ND aggregator.
81b76c9
Initial commit of 2D categorical aggregator and some improvements.
1c17235
Tests for 1D and 2D aggregators.
1f4d7f5
Extracting aggregator as a separate class.
10cf165
Merge branch 'master' into aggregator
oleksiyskononenko d025bee
Changes to make the aggregator branch be consistent with the master one.
e85f74e
Various fixes: editor problems, warnings, etc.
ea708af
Moving the Python aggregator code from `py_datatable` to
56be087
Adding __init__.py
d932738
Emptying the `__init__.py` file
57d7232
Merge remote-tracking branch 'origin/master' into aggregator.
b231984
Move machine precision `epsilon` to the `Aggregator` class.
27de79d
Generate exemplars and members dataframes
f1695dc
Proper implementation of count_skipna reducer
90a0c5a
Adding the list of parameters to the aggregate docstring
3b7fbf9
Removing trailing whitespace
b76e964
Minor count_skipna modification to avoid branching
e6979aa
Setting up correct column names for exemplars and members
File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ enum OpCode { | |
Stdev = 4, | ||
First = 5, | ||
Sum = 6, | ||
Count = 7, | ||
}; | ||
|
||
template<typename T> | ||
|
@@ -77,6 +78,31 @@ static void sum_skipna(const int32_t* groups, int32_t grp, void** params) { | |
|
||
|
||
|
||
|
||
//------------------------------------------------------------------------------ | ||
// Count calculation | ||
//------------------------------------------------------------------------------ | ||
|
||
template<typename IT, typename OT> | ||
static void count_skipna(const int32_t* groups, int32_t grp, void** params) { | ||
Column* col0 = static_cast<Column*>(params[0]); | ||
Column* col1 = static_cast<Column*>(params[1]); | ||
const IT* inputs = static_cast<const IT*>(col0->data()); | ||
OT* outputs = static_cast<OT*>(col1->data_w()); | ||
OT count = 0; | ||
int32_t row0 = groups[grp]; | ||
int32_t row1 = groups[grp + 1]; | ||
col0->rowindex().strided_loop(row0, row1, 1, | ||
[&](int64_t i) { | ||
IT x = inputs[i]; | ||
if (!ISNA<IT>(x)) | ||
++count; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, fixed. |
||
}); | ||
outputs[grp] = count; | ||
} | ||
|
||
|
||
|
||
//------------------------------------------------------------------------------ | ||
// Mean calculation | ||
//------------------------------------------------------------------------------ | ||
|
@@ -200,6 +226,7 @@ static gmapperfn resolve1(int opcode) { | |
case OpCode::Max: return max_skipna<T1>; | ||
case OpCode::Stdev: return stdev_skipna<T1, T2>; | ||
case OpCode::Sum: return sum_skipna<T1, T2>; | ||
case OpCode::Count: return count_skipna<T1, T2>; | ||
default: return nullptr; | ||
} | ||
} | ||
|
@@ -218,6 +245,22 @@ static gmapperfn resolve0(int opcode, SType stype) { | |
default: return nullptr; | ||
} | ||
} | ||
|
||
if (opcode == OpCode::Count) { | ||
switch (stype) { | ||
case SType::BOOL: | ||
case SType::INT8: return count_skipna<int8_t, uint64_t>; | ||
case SType::INT16: return count_skipna<int16_t, uint64_t>; | ||
case SType::INT32: return count_skipna<int32_t, uint64_t>; | ||
case SType::INT64: return count_skipna<int64_t, uint64_t>; | ||
case SType::FLOAT32: return count_skipna<float, uint64_t>; | ||
case SType::FLOAT64: return count_skipna<double, uint64_t>; | ||
case SType::STR32: return count_skipna<int32_t, uint64_t>; | ||
case SType::STR64: return count_skipna<int64_t, uint64_t>; | ||
default: return nullptr; | ||
} | ||
} | ||
|
||
switch (stype) { | ||
case SType::BOOL: | ||
case SType::INT8: return resolve1<int8_t, double>(opcode); | ||
|
@@ -251,6 +294,11 @@ Column* reduceop(int opcode, Column* arg, const Groupby& groupby) | |
res_type = SType::INT64; | ||
} | ||
} | ||
|
||
if (opcode == OpCode::Count) { | ||
res_type = SType::INT64; | ||
} | ||
|
||
int32_t ngrps = static_cast<int32_t>(groupby.ngroups()); | ||
if (ngrps == 0) ngrps = 1; | ||
|
||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Traditionally,
count(x)
function returns the number of non-NA values inx
.In your implementation, however, the function doesn't count anything, but merely returns the number of elements in each group. It's a valid function, just not a suitable name...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reimplemented this function to match the existing name.