Skip to content

Commit

Permalink
warm start parameter search supported
Browse files Browse the repository at this point in the history
 -add functions for parameter search
 -add init_sol in struct parameter
 -modify function train to support initial solution for L2R_LR and L2R_L2LOSS_SVC
 -interface and train.c update: option -C is added
  • Loading branch information
boyu committed Jun 29, 2015
1 parent 25fb1ce commit 621b867
Show file tree
Hide file tree
Showing 12 changed files with 422 additions and 35 deletions.
2 changes: 1 addition & 1 deletion Makefile
Expand Up @@ -2,7 +2,7 @@ CXX ?= g++
CC ?= gcc
CFLAGS = -Wall -Wconversion -O3 -fPIC
LIBS = blas/blas.a
SHVER = 2
SHVER = 3
OS = $(shell uname)
#LIBS = -lblas

Expand Down
27 changes: 27 additions & 0 deletions README
Expand Up @@ -131,11 +131,16 @@ options:
-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
-wi weight: weights adjust the parameter C of different classes (see README for details)
-v n: n-fold cross validation mode
-C : find parameter C (only for -s 0 and 2)
-q : quiet mode (no outputs)

Option -v randomly splits the data into n parts and calculates cross
validation accuracy on them.

Option -C conducts cross validation under different C values and finds
the best one. This options is supported only by -s 0 and -s 2. If
the solver is not specified, -s 2 is used.

Formulations:

For L2-regularized logistic regression (-s 0), we solve
Expand Down Expand Up @@ -245,6 +250,12 @@ Do five-fold cross-validation using L2-loss svm.
Use a smaller stopping tolerance 0.001 than the default
0.1 if you want more accurate solutions.

> train -C -s 0 data_file

Conduct cross validation many times by logistic regression
and finds the parameter C which achieves the best cross
validation accuracy.

> train -c 10 -w1 2 -w2 5 -w3 2 four_class_data_file

Train four classifiers:
Expand Down Expand Up @@ -407,6 +418,22 @@ Library Usage

The format of prob is same as that for train().

- Function: void find_parameter_C(const struct problem *prob,
const struct parameter *param, int nr_fold, double start_C,
double max_C, double *best_C, double *best_rate);

This function is similar to cross_validation. However, instead of
conducting cross validation under a specified parameter C, it
conducts cross validation many times under parameters C = start_C,
2*start_C, 4*start_C, 8*start_C, ..., and finds the best one with
the highest cross validation accuracy.

If start_C <= 0, then this procedure calculates a small enough C
for prob as the start_C. The procedure stops when the models of
all folds become stable or C reaches max_C. The best C and the
corresponding accuracy are assigned to *best_C and *best_rate,
respectively.

- Function: double predict(const model *model_, const feature_node *x);

For a classification model, the predicted class for x is returned.
Expand Down
209 changes: 203 additions & 6 deletions linear.cpp
Expand Up @@ -27,6 +27,7 @@ static void print_string_stdout(const char *s)
fputs(s,stdout);
fflush(stdout);
}
static void print_null(const char *s) {}

static void (*liblinear_print_string) (const char *) = &print_string_stdout;

Expand Down Expand Up @@ -2180,14 +2181,18 @@ static void group_classes(const problem *prob, int *nr_class_ret, int **label_re

static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
{
double eps=param->eps;
//inner and outer tolerances for TRON
double eps = param->eps;
double eps_cg = 0.1;
if(param->init_sol != NULL)
eps_cg = 0.5;

int pos = 0;
int neg = 0;
for(int i=0;i<prob->l;i++)
if(prob->y[i] > 0)
pos++;
neg = prob->l - pos;

double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;

function *fun_obj=NULL;
Expand All @@ -2204,7 +2209,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
C[i] = Cn;
}
fun_obj=new l2r_lr_fun(prob, C);
TRON tron_obj(fun_obj, primal_solver_tol);
TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
tron_obj.set_print_string(liblinear_print_string);
tron_obj.tron(w);
delete fun_obj;
Expand All @@ -2222,7 +2227,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
C[i] = Cn;
}
fun_obj=new l2r_l2_svc_fun(prob, C);
TRON tron_obj(fun_obj, primal_solver_tol);
TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
tron_obj.set_print_string(liblinear_print_string);
tron_obj.tron(w);
delete fun_obj;
Expand Down Expand Up @@ -2287,6 +2292,36 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
}
}

// Calculate the initial C for parameter selection
static double calc_start_C(const problem *prob, const parameter *param)
{
int i;
double xTx,max_xTx;
max_xTx = 0;
for(i=0; i<prob->l; i++)
{
xTx = 0;
feature_node *xi=prob->x[i];
while(xi->index != -1)
{
double val = xi->value;
xTx += val*val;
xi++;
}
if(xTx > max_xTx)
max_xTx = xTx;
}

double min_C = 1.0;
if(param->solver_type == L2R_LR)
min_C = 1.0 / (prob->l * max_xTx);
else if(param->solver_type == L2R_L2LOSS_SVC)
min_C = 1.0 / (2 * prob->l * max_xTx);

return pow( 2, floor(log(min_C) / log(2.0)) );
}


//
// Interface functions
//
Expand All @@ -2310,7 +2345,7 @@ model* train(const problem *prob, const parameter *param)
model_->w = Malloc(double, w_size);
model_->nr_class = 2;
model_->label = NULL;
train_one(prob, param, &model_->w[0], 0, 0);
train_one(prob, param, model_->w, 0, 0);
}
else
{
Expand Down Expand Up @@ -2380,8 +2415,15 @@ model* train(const problem *prob, const parameter *param)
sub_prob.y[k] = +1;
for(; k<sub_prob.l; k++)
sub_prob.y[k] = -1;

if(param->init_sol != NULL)
for(i=0;i<w_size;i++)
model_->w[i] = param->init_sol[i];
else
for(i=0;i<w_size;i++)
model_->w[i] = 0;

train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
}
else
{
Expand All @@ -2400,6 +2442,13 @@ model* train(const problem *prob, const parameter *param)
for(; k<sub_prob.l; k++)
sub_prob.y[k] = -1;

if(param->init_sol != NULL)
for(j=0;j<w_size;j++)
w[j] = param->init_sol[j*nr_class+i];
else
for(j=0;j<w_size;j++)
w[j] = 0;

train_one(&sub_prob, param, w, weighted_C[i], param->C);

for(int j=0;j<w_size;j++)
Expand Down Expand Up @@ -2480,6 +2529,148 @@ void cross_validation(const problem *prob, const parameter *param, int nr_fold,
free(perm);
}

void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
{
// variables for CV
int i;
int *fold_start;
int l = prob->l;
int *perm = Malloc(int, l);
double *target = Malloc(double, prob->l);
struct problem *subprob = Malloc(problem,nr_fold);

// variables for warm start
double ratio = 2;
double **prev_w = Malloc(double*, nr_fold);
for(i = 0; i < nr_fold; i++)
prev_w[i] = NULL;
int num_unchanged_w = 0;
struct parameter param1 = *param;
void (*default_print_string) (const char *) = liblinear_print_string;

if (nr_fold > l)
{
nr_fold = l;
fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
}
fold_start = Malloc(int,nr_fold+1);
for(i=0;i<l;i++) perm[i]=i;
for(i=0;i<l;i++)
{
int j = i+rand()%(l-i);
swap(perm[i],perm[j]);
}
for(i=0;i<=nr_fold;i++)
fold_start[i]=i*l/nr_fold;

for(i=0;i<nr_fold;i++)
{
int begin = fold_start[i];
int end = fold_start[i+1];
int j,k;

subprob[i].bias = prob->bias;
subprob[i].n = prob->n;
subprob[i].l = l-(end-begin);
subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
subprob[i].y = Malloc(double,subprob[i].l);

k=0;
for(j=0;j<begin;j++)
{
subprob[i].x[k] = prob->x[perm[j]];
subprob[i].y[k] = prob->y[perm[j]];
++k;
}
for(j=end;j<l;j++)
{
subprob[i].x[k] = prob->x[perm[j]];
subprob[i].y[k] = prob->y[perm[j]];
++k;
}

}

*best_rate = 0;
if(start_C <= 0)
start_C = calc_start_C(prob,param);
param1.C = start_C;

while(param1.C <= max_C)
{
//Output diabled for running CV at a particular C
set_print_string_function(&print_null);

for(i=0; i<nr_fold; i++)
{
int j;
int begin = fold_start[i];
int end = fold_start[i+1];

param1.init_sol = prev_w[i];
struct model *submodel = train(&subprob[i],&param1);

int total_w_size;
if(submodel->nr_class == 2)
total_w_size = subprob[i].n;
else
total_w_size = subprob[i].n * submodel->nr_class;

if(prev_w[i] != NULL && num_unchanged_w >= 0)
{
double norm_w_diff = 0;
for(j=0; j<total_w_size; j++)
{
norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
prev_w[i][j] = submodel->w[j];
}
norm_w_diff = sqrt(norm_w_diff);

if(norm_w_diff > 1e-15)
num_unchanged_w = -1;
}
else
{
prev_w[i] = Malloc(double, total_w_size);
for(j=0; j<total_w_size; j++)
prev_w[i][j] = submodel->w[j];
}

for(j=begin; j<end; j++)
target[perm[j]] = predict(submodel,prob->x[perm[j]]);

free_and_destroy_model(&submodel);
}
set_print_string_function(default_print_string);

int total_correct = 0;
for(i=0; i<prob->l; i++)
if(target[i] == prob->y[i])
++total_correct;
double current_rate = (double)total_correct/prob->l;
if(current_rate > *best_rate)
{
*best_C = param1.C;
*best_rate = current_rate;
}

info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
num_unchanged_w++;
if(num_unchanged_w == 3)
break;
param1.C = param1.C*ratio;
}

if(param1.C > max_C && max_C > start_C)
info("warning: maximum C reached.\n");
free(fold_start);
free(perm);
free(target);
for(i=0; i<nr_fold; i++)
free(prev_w[i]);
free(prev_w);
}

double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
{
int idx;
Expand Down Expand Up @@ -2839,6 +3030,8 @@ void destroy_param(parameter* param)
free(param->weight_label);
if(param->weight != NULL)
free(param->weight);
if(param->init_sol != NULL)
free(param->init_sol);
}

const char *check_parameter(const problem *prob, const parameter *param)
Expand All @@ -2865,6 +3058,10 @@ const char *check_parameter(const problem *prob, const parameter *param)
&& param->solver_type != L2R_L1LOSS_SVR_DUAL)
return "unknown solver type";

if(param->init_sol != NULL
&& param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";

return NULL;
}

Expand Down
2 changes: 2 additions & 0 deletions linear.h
Expand Up @@ -32,6 +32,7 @@ struct parameter
int *weight_label;
double* weight;
double p;
double *init_sol;
};

struct model
Expand All @@ -46,6 +47,7 @@ struct model

struct model* train(const struct problem *prob, const struct parameter *param);
void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
void find_parameter_C(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate);

double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
double predict(const struct model *model_, const struct feature_node *x);
Expand Down
12 changes: 11 additions & 1 deletion matlab/README
Expand Up @@ -131,7 +131,12 @@ nr_feature, bias, Label, w]:

If the '-v' option is specified, cross validation is conducted and the
returned model is just a scalar: cross-validation accuracy for
classification and mean-squared error for regression.
classification and mean-squared error for regression. If the '-C' option
is specified, the best parameter C is found by cross validation. The
returned model is a two dimensional vector, where the first value is
the best C and the second value is the corresponding cross-validation
accuracy. The parameter selection utility is supported by only -s 0
and -s 2.

Result of Prediction
====================
Expand Down Expand Up @@ -184,6 +189,11 @@ For probability estimates, you need '-b 1' only in the testing phase:

matlab> [predict_label, accuracy, prob_estimates] = predict(heart_scale_label, heart_scale_inst, model, '-b 1');

Use the best parameter to train (only supported by -s 0 and -s 2):

matlab> best = train(heart_scale_label, heart_scale_inst, '-C -s 0');
matlab> model = train(heart_scale_label, heart_scale_inst, sprintf('-c %f -s 0', best(1))); % use the same solver: -s 0

Additional Information
======================

Expand Down

0 comments on commit 621b867

Please sign in to comment.