Permalink
Browse files

added new "distinct" operation to groupBy.

  • Loading branch information...
arq5x committed Dec 30, 2011
1 parent a93f95b commit 3c9fb01843d797c64be578ecbcd1be683cfe245a
Showing with 22 additions and 4 deletions.
  1. +22 −4 src/groupBy/groupBy.cpp
View
@@ -11,6 +11,7 @@ Licenced under the MIT license.
#include <vector>
#include <map>
#include <numeric>
+#include <algorithm>
#include <iterator>
#include <iostream>
#include <iomanip>
@@ -184,8 +185,8 @@ int main(int argc, char* argv[]) {
for( size_t i = 0; i < ops.size(); i++ ) {
if ((ops[i] != "sum") && (ops[i] != "max") && (ops[i] != "min") && (ops[i] != "mean") &&
(ops[i] != "mode") && (ops[i] != "median") && (ops[i] != "antimode") && (ops[i] != "stdev") &&
- (ops[i] != "sstdev") && (ops[i] != "count") && (ops[i] != "collapse") && (ops[i] != "concat") &&
- (ops[i] != "freqdesc") && (ops[i] != "freqasc"))
+ (ops[i] != "sstdev") && (ops[i] != "count") && (ops[i] != "collapse") && (ops[i] != "distinct") &&
+ (ops[i] != "concat") && (ops[i] != "freqdesc") && (ops[i] != "freqasc"))
{
cerr << endl << "*****" << endl << "*****ERROR: Invalid operation selection \"" << ops[i] << endl << "\" *****" << endl;
showHelp = true;
@@ -262,7 +263,8 @@ void ShowHelp(void) {
cerr << "\t\t\t sum, count, min, max," << endl;
cerr << "\t\t\t mean, median, mode, antimode," << endl;
cerr << "\t\t\t stdev, sstdev (sample standard dev.)," << endl;
- cerr << "\t\t\t collapse (i.e., print a comma separated list), " << endl;
+ cerr << "\t\t\t collapse (i.e., print a comma separated list (duplicates allowed)), " << endl;
+ cerr << "\t\t\t distinct (i.e., print a comma separated list (NO duplicates allowed)), " << endl;
cerr << "\t\t\t concat (i.e., merge values into a single, non-delimited string), " << endl;
cerr << "\t\t\t freqdesc (i.e., print desc. list of values:freq)" << endl;
cerr << "\t\t\t freqasc (i.e., print asc. list of values:freq)" << endl;
@@ -316,7 +318,7 @@ void GroupBy (const string &inFile,
const bool printOriginalLine,
const bool printHeaderLine,
const bool InputHaveHeaderLine,
-const bool ignoreCase) {
+ const bool ignoreCase) {
// current line number
int lineNum = 0;
@@ -431,6 +433,22 @@ void ReportSummary(const vector<string> &group, const vector<vector<string> > &d
}
result.push_back(collapse);
}
+ else if (op == "distinct") {
+ string distinct;
+ // get the current column's data
+ vector<string> col_data = data[i];
+ // remove duplicate entries from the vector
+ // http://stackoverflow.com/questions/1041620/most-efficient-way-to-erase-duplicates-and-sort-a-c-vector
+ sort( col_data.begin(), col_data.end() );
+ col_data.erase( unique( col_data.begin(), col_data.end() ), col_data.end() );
+
+ for( size_t j = 0; j < col_data.size(); j++ ) {//Ugly, but cannot use back_inserter
+ if (j>0)
+ distinct.append(",");
+ distinct.append(col_data[j]);
+ }
+ result.push_back(distinct);
+ }
else if (op == "concat") {
string concat;
for( size_t j = 0; j < data[i].size(); j++ ) {//Ugly, but cannot use back_inserter

0 comments on commit 3c9fb01

Please sign in to comment.