Skip to content

Commit

Permalink
Implemented instance weighting. Currently, only L-BFGS can support in…
Browse files Browse the repository at this point in the history
…stance weighting. Debugging later.
  • Loading branch information
chokkan committed May 2, 2012
1 parent d284755 commit a6f144b
Show file tree
Hide file tree
Showing 10 changed files with 33 additions and 10 deletions.
10 changes: 9 additions & 1 deletion frontend/reader.c
Expand Up @@ -34,6 +34,7 @@

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <crfsuite.h>
#include "iwa.h"
Expand Down Expand Up @@ -106,7 +107,13 @@ int read_data(FILE *fpi, FILE *fpo, crfsuite_data_t* data, int group)
break;
case IWA_ITEM:
if (lid == -1) {
lid = labels->get(labels, token->attr);
if (strncmp(token->attr, "@weight:", 8) == 0) {
/* Instance weighting. */
inst.weight = atof(token->attr+8);
} else {
/* Label. */
lid = labels->get(labels, token->attr);
}
} else {
crfsuite_attribute_init(&cont);
cont.aid = attrs->get(attrs, token->attr);
Expand All @@ -124,6 +131,7 @@ int read_data(FILE *fpi, FILE *fpo, crfsuite_data_t* data, int group)
crfsuite_data_append(data, &inst);
crfsuite_instance_finish(&inst);
inst.group = group;
inst.weight = 1.;
++n;
break;
}
Expand Down
4 changes: 3 additions & 1 deletion include/crfsuite.h
Expand Up @@ -54,7 +54,7 @@ extern "C" {
*/

/** Version number of CRFSuite library. */
#define CRFSUITE_VERSION "0.12"
#define CRFSUITE_VERSION "0.12.1"

/** Copyright string of CRFSuite library. */
#define CRFSUITE_COPYRIGHT "Copyright (c) 2007-2011 Naoaki Okazaki"
Expand Down Expand Up @@ -160,6 +160,8 @@ typedef struct {
crfsuite_item_t *items;
/** Array of the label sequence. */
int *labels;
/** Instance weight. */
floatval_t weight;
/** Group ID of the instance. */
int group;
} crfsuite_instance_t;
Expand Down
2 changes: 1 addition & 1 deletion lib/crf/src/crf1d_encode.c
Expand Up @@ -834,7 +834,7 @@ static int encoder_objective_and_gradients_batch(encoder_t *self, dataset_t *ds,
logl += logp;

/* Update the model expectations of features. */
crf1de_model_expectation(crf1de, seq, g, 1.);
crf1de_model_expectation(crf1de, seq, g, seq->weight);
}

*f = -logl;
Expand Down
4 changes: 2 additions & 2 deletions lib/crf/src/crf1d_feature.c
Expand Up @@ -199,7 +199,7 @@ crf1df_feature_t* crf1df_generate(
f.type = FT_TRANS;
f.src = prev;
f.dst = cur;
f.freq = 1;
f.freq = seq->weight;
featureset_add(set, &f);
}

Expand All @@ -208,7 +208,7 @@ crf1df_feature_t* crf1df_generate(
f.type = FT_STATE;
f.src = item->contents[c].aid;
f.dst = cur;
f.freq = item->contents[c].value;
f.freq = seq->weight * item->contents[c].value;
featureset_add(set, &f);

/* Generate state features connecting attributes with all
Expand Down
4 changes: 4 additions & 0 deletions lib/crf/src/crfsuite.c
Expand Up @@ -155,6 +155,7 @@ int crfsuite_item_empty(crfsuite_item_t* item)
void crfsuite_instance_init(crfsuite_instance_t* inst)
{
memset(inst, 0, sizeof(*inst));
inst->weight = 1.;
}

void crfsuite_instance_init_n(crfsuite_instance_t* inst, int num_items)
Expand Down Expand Up @@ -186,6 +187,7 @@ void crfsuite_instance_copy(crfsuite_instance_t* dst, const crfsuite_instance_t*
dst->cap_items = src->cap_items;
dst->items = (crfsuite_item_t*)calloc(dst->num_items, sizeof(crfsuite_item_t));
dst->labels = (int*)calloc(dst->num_items, sizeof(int));
dst->weight = src->weight;
dst->group = src->group;
for (i = 0;i < dst->num_items;++i) {
crfsuite_item_copy(&dst->items[i], &src->items[i]);
Expand All @@ -200,11 +202,13 @@ void crfsuite_instance_swap(crfsuite_instance_t* x, crfsuite_instance_t* y)
x->cap_items = y->cap_items;
x->items = y->items;
x->labels = y->labels;
x->weight = y->weight;
x->group = y->group;
y->num_items = tmp.num_items;
y->cap_items = tmp.cap_items;
y->items = tmp.items;
y->labels = tmp.labels;
y->weight = tmp.weight;
y->group = tmp.group;
}

Expand Down
4 changes: 2 additions & 2 deletions lib/crf/src/train_arow.c
Expand Up @@ -308,8 +308,8 @@ int crfsuite_train_arow(
d = diff(inst->labels, viterbi, inst->num_items);
if (0 < d) {
floatval_t alpha, frac;
floatval_t sc, norm2;
floatval_t tau, cost;
floatval_t sc;
floatval_t cost;

/*
Compute the cost of this instance.
Expand Down
2 changes: 1 addition & 1 deletion lib/crf/src/train_l2sgd.c
Expand Up @@ -289,7 +289,7 @@ l2sgd_calibration(
const training_option_t* opt
)
{
int i, s;
int i;
int dec = 0, ok, trials = 1;
int num = opt->calibration_candidates;
clock_t clk_begin = clock();
Expand Down
13 changes: 11 additions & 2 deletions win32/liblbfgs/lbfgs.h
Expand Up @@ -573,7 +573,7 @@ Among the various ports of L-BFGS, this library provides several features:
The library is thread-safe, which is the secondary gain from the callback
interface.
- <b>Cross platform.</b> The source code can be compiled on Microsoft Visual
Studio 2005, GNU C Compiler (gcc), etc.
Studio 2010, GNU C Compiler (gcc), etc.
- <b>Configurable precision</b>: A user can choose single-precision (float)
or double-precision (double) accuracy by changing ::LBFGS_FLOAT macro.
- <b>SSE/SSE2 optimization</b>:
Expand All @@ -592,12 +592,20 @@ This library is used by:
@section download Download
- <a href="http://www.chokkan.org/software/dist/liblbfgs-1.9.tar.gz">Source code</a>
- <a href="https://github.com/downloads/chokkan/liblbfgs/liblbfgs-1.10.tar.gz">Source code</a>
- <a href="https://github.com/chokkan/liblbfgs">GitHub repository</a>
libLBFGS is distributed under the term of the
<a href="http://opensource.org/licenses/mit-license.php">MIT license</a>.
@section changelog History
- Version 1.10 (2010-12-22):
- Fixed compiling errors on Mac OS X; this patch was kindly submitted by
Nic Schraudolph.
- Reduced compiling warnings on Mac OS X; this patch was kindly submitted
by Tamas Nepusz.
- Replaced memalign() with posix_memalign().
- Updated solution and project files for Microsoft Visual Studio 2010.
- Version 1.9 (2010-01-29):
- Fixed a mistake in checking the validity of the parameters "ftol" and
"wolfe"; this was discovered by Kevin S. Van Horn.
Expand Down Expand Up @@ -718,6 +726,7 @@ Special thanks go to:
- Yoshimasa Tsuruoka and Daisuke Okanohara for technical information about
OWL-QN
- Takashi Imamichi for the useful enhancements of the backtracking method
- Kevin S. Van Horn, Nic Schraudolph, and Tamas Nepusz for bug fixes
Finally I would like to thank the original author, Jorge Nocedal, who has been
distributing the effieicnt and explanatory implementation in an open source
Expand Down
Binary file modified win32/liblbfgs/lbfgs.lib
Binary file not shown.
Binary file removed win32/liblbfgs/lbfgs_debug.lib
Binary file not shown.

0 comments on commit a6f144b

Please sign in to comment.