Skip to content

Commit

Permalink
new feature generator is connected to gd.h - it uses new interactions…
Browse files Browse the repository at this point in the history
… preprocessing and FTRL-like hash
  • Loading branch information
trufanov-nok committed May 4, 2015
1 parent 72ceea8 commit ee1fc3c
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 44 deletions.
30 changes: 2 additions & 28 deletions vowpalwabbit/gd.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "parse_regressor.h"
#include "constant.h"
#include "interactions.h"

namespace GD{
LEARNER::base_learner* setup(vw& all);
Expand Down Expand Up @@ -69,34 +70,7 @@ namespace GD{
for (unsigned char* i = ec.indices.begin; i != ec.indices.end; i++)
foreach_feature<R,T>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[*i].begin, ec.atomics[*i].end, dat, offset);

for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++) {
if (ec.atomics[(unsigned char)(*i)[0]].size() > 0) {
v_array<feature> temp = ec.atomics[(unsigned char)(*i)[0]];
for (; temp.begin != temp.end; temp.begin++)
{
uint32_t halfhash = quadratic_constant * (temp.begin->weight_index) + offset;

foreach_feature<R,T>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(unsigned char)(*i)[1]].begin, ec.atomics[(unsigned char)(*i)[1]].end, dat,
halfhash, temp.begin->x);
}
}
}

for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++) {
if ((ec.atomics[(unsigned char)(*i)[0]].size() == 0) || (ec.atomics[(unsigned char)(*i)[1]].size() == 0) || (ec.atomics[(unsigned char)(*i)[2]].size() == 0)) { continue; }
v_array<feature> temp1 = ec.atomics[(unsigned char)(*i)[0]];
for (; temp1.begin != temp1.end; temp1.begin++) {
v_array<feature> temp2 = ec.atomics[(unsigned char)(*i)[1]];
for (; temp2.begin != temp2.end; temp2.begin++) {

uint32_t a = temp1.begin->weight_index;
uint32_t b = temp2.begin->weight_index;
uint32_t halfhash = cubic_constant2 * (cubic_constant * a + b) + offset;
float mult = temp1.begin->x * temp2.begin->x;
foreach_feature<R,T>(all.reg.weight_vector, all.reg.weight_mask, ec.atomics[(unsigned char)(*i)[2]].begin, ec.atomics[(unsigned char)(*i)[2]].end, dat, halfhash, mult);
}
}
}
INTERACTIONS::generate_interactions<R,S,T>(all, ec, dat);
}

// iterate through all namespaces and quadratic&cubic features, callback function T(some_data_R, feature_value_x, feature_weight)
Expand Down
24 changes: 8 additions & 16 deletions vowpalwabbit/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ namespace po = boost::program_options;
#include "unique_sort.h"
#include "constant.h"
#include "vw.h"
#include "interactions.h"

using namespace std;

Expand Down Expand Up @@ -841,22 +842,13 @@ void setup_example(vw& all, example* ae)
ae->total_sum_feat_sq += ae->sum_feat_sq[*i];
}

for (vector<string>::iterator i = all.pairs.begin(); i != all.pairs.end();i++)
{
ae->num_features
+= ae->atomics[(int)(*i)[0]].size()
*ae->atomics[(int)(*i)[1]].size();
ae->total_sum_feat_sq += ae->sum_feat_sq[(int)(*i)[0]]*ae->sum_feat_sq[(int)(*i)[1]];
}

for (vector<string>::iterator i = all.triples.begin(); i != all.triples.end();i++)
{
ae->num_features
+= ae->atomics[(int)(*i)[0]].size()
*ae->atomics[(int)(*i)[1]].size()
*ae->atomics[(int)(*i)[2]].size();
ae->total_sum_feat_sq += ae->sum_feat_sq[(int)(*i)[0]] * ae->sum_feat_sq[(int)(*i)[1]] * ae->sum_feat_sq[(int)(*i)[2]];
}
// generate atomic features for all interactions
size_t new_features_cnt;
float new_features_sum_feat_sq;
INTERACTIONS::eval_count_of_generated_ft(all, *ae, new_features_cnt, new_features_sum_feat_sq);
ae->num_features += new_features_cnt;
ae->total_sum_feat_sq += new_features_sum_feat_sq;

}
}

Expand Down

0 comments on commit ee1fc3c

Please sign in to comment.