From 621b867f3329abff59da69bbe52a9153752035be Mon Sep 17 00:00:00 2001 From: boyu Date: Mon, 29 Jun 2015 04:56:32 +0800 Subject: [PATCH] warm start parameter search supported -add functions for parameter search -add init_sol in struct parameter -modify function train to support initial solution for L2R_LR and L2R_L2LOSS_SVC -interface and train.c update: option -C is added --- Makefile | 2 +- README | 27 ++++++ linear.cpp | 209 ++++++++++++++++++++++++++++++++++++++-- linear.h | 2 + matlab/README | 12 ++- matlab/train.c | 73 ++++++++++++-- python/README | 7 ++ python/liblinear.py | 35 +++++-- python/liblinearutil.py | 16 ++- train.c | 51 +++++++++- tron.cpp | 20 ++-- tron.h | 3 +- 12 files changed, 422 insertions(+), 35 deletions(-) diff --git a/Makefile b/Makefile index ac6a32a..0534f2b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ CXX ?= g++ CC ?= gcc CFLAGS = -Wall -Wconversion -O3 -fPIC LIBS = blas/blas.a -SHVER = 2 +SHVER = 3 OS = $(shell uname) #LIBS = -lblas diff --git a/README b/README index 1b7fca5..6f9e783 100644 --- a/README +++ b/README @@ -131,11 +131,16 @@ options: -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1) -wi weight: weights adjust the parameter C of different classes (see README for details) -v n: n-fold cross validation mode +-C : find parameter C (only for -s 0 and 2) -q : quiet mode (no outputs) Option -v randomly splits the data into n parts and calculates cross validation accuracy on them. +Option -C conducts cross validation under different C values and finds +the best one. This options is supported only by -s 0 and -s 2. If +the solver is not specified, -s 2 is used. + Formulations: For L2-regularized logistic regression (-s 0), we solve @@ -245,6 +250,12 @@ Do five-fold cross-validation using L2-loss svm. Use a smaller stopping tolerance 0.001 than the default 0.1 if you want more accurate solutions. +> train -C -s 0 data_file + +Conduct cross validation many times by logistic regression +and finds the parameter C which achieves the best cross +validation accuracy. + > train -c 10 -w1 2 -w2 5 -w3 2 four_class_data_file Train four classifiers: @@ -407,6 +418,22 @@ Library Usage The format of prob is same as that for train(). +- Function: void find_parameter_C(const struct problem *prob, + const struct parameter *param, int nr_fold, double start_C, + double max_C, double *best_C, double *best_rate); + + This function is similar to cross_validation. However, instead of + conducting cross validation under a specified parameter C, it + conducts cross validation many times under parameters C = start_C, + 2*start_C, 4*start_C, 8*start_C, ..., and finds the best one with + the highest cross validation accuracy. + + If start_C <= 0, then this procedure calculates a small enough C + for prob as the start_C. The procedure stops when the models of + all folds become stable or C reaches max_C. The best C and the + corresponding accuracy are assigned to *best_C and *best_rate, + respectively. + - Function: double predict(const model *model_, const feature_node *x); For a classification model, the predicted class for x is returned. diff --git a/linear.cpp b/linear.cpp index 230948c..7ad136f 100644 --- a/linear.cpp +++ b/linear.cpp @@ -27,6 +27,7 @@ static void print_string_stdout(const char *s) fputs(s,stdout); fflush(stdout); } +static void print_null(const char *s) {} static void (*liblinear_print_string) (const char *) = &print_string_stdout; @@ -2180,14 +2181,18 @@ static void group_classes(const problem *prob, int *nr_class_ret, int **label_re static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn) { - double eps=param->eps; + //inner and outer tolerances for TRON + double eps = param->eps; + double eps_cg = 0.1; + if(param->init_sol != NULL) + eps_cg = 0.5; + int pos = 0; int neg = 0; for(int i=0;il;i++) if(prob->y[i] > 0) pos++; neg = prob->l - pos; - double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l; function *fun_obj=NULL; @@ -2204,7 +2209,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do C[i] = Cn; } fun_obj=new l2r_lr_fun(prob, C); - TRON tron_obj(fun_obj, primal_solver_tol); + TRON tron_obj(fun_obj, primal_solver_tol, eps_cg); tron_obj.set_print_string(liblinear_print_string); tron_obj.tron(w); delete fun_obj; @@ -2222,7 +2227,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do C[i] = Cn; } fun_obj=new l2r_l2_svc_fun(prob, C); - TRON tron_obj(fun_obj, primal_solver_tol); + TRON tron_obj(fun_obj, primal_solver_tol, eps_cg); tron_obj.set_print_string(liblinear_print_string); tron_obj.tron(w); delete fun_obj; @@ -2287,6 +2292,36 @@ static void train_one(const problem *prob, const parameter *param, double *w, do } } +// Calculate the initial C for parameter selection +static double calc_start_C(const problem *prob, const parameter *param) +{ + int i; + double xTx,max_xTx; + max_xTx = 0; + for(i=0; il; i++) + { + xTx = 0; + feature_node *xi=prob->x[i]; + while(xi->index != -1) + { + double val = xi->value; + xTx += val*val; + xi++; + } + if(xTx > max_xTx) + max_xTx = xTx; + } + + double min_C = 1.0; + if(param->solver_type == L2R_LR) + min_C = 1.0 / (prob->l * max_xTx); + else if(param->solver_type == L2R_L2LOSS_SVC) + min_C = 1.0 / (2 * prob->l * max_xTx); + + return pow( 2, floor(log(min_C) / log(2.0)) ); +} + + // // Interface functions // @@ -2310,7 +2345,7 @@ model* train(const problem *prob, const parameter *param) model_->w = Malloc(double, w_size); model_->nr_class = 2; model_->label = NULL; - train_one(prob, param, &model_->w[0], 0, 0); + train_one(prob, param, model_->w, 0, 0); } else { @@ -2380,8 +2415,15 @@ model* train(const problem *prob, const parameter *param) sub_prob.y[k] = +1; for(; kinit_sol != NULL) + for(i=0;iw[i] = param->init_sol[i]; + else + for(i=0;iw[i] = 0; - train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]); + train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]); } else { @@ -2400,6 +2442,13 @@ model* train(const problem *prob, const parameter *param) for(; kinit_sol != NULL) + for(j=0;jinit_sol[j*nr_class+i]; + else + for(j=0;jC); for(int j=0;jl; + int *perm = Malloc(int, l); + double *target = Malloc(double, prob->l); + struct problem *subprob = Malloc(problem,nr_fold); + + // variables for warm start + double ratio = 2; + double **prev_w = Malloc(double*, nr_fold); + for(i = 0; i < nr_fold; i++) + prev_w[i] = NULL; + int num_unchanged_w = 0; + struct parameter param1 = *param; + void (*default_print_string) (const char *) = liblinear_print_string; + + if (nr_fold > l) + { + nr_fold = l; + fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n"); + } + fold_start = Malloc(int,nr_fold+1); + for(i=0;ibias; + subprob[i].n = prob->n; + subprob[i].l = l-(end-begin); + subprob[i].x = Malloc(struct feature_node*,subprob[i].l); + subprob[i].y = Malloc(double,subprob[i].l); + + k=0; + for(j=0;jx[perm[j]]; + subprob[i].y[k] = prob->y[perm[j]]; + ++k; + } + for(j=end;jx[perm[j]]; + subprob[i].y[k] = prob->y[perm[j]]; + ++k; + } + + } + + *best_rate = 0; + if(start_C <= 0) + start_C = calc_start_C(prob,param); + param1.C = start_C; + + while(param1.C <= max_C) + { + //Output diabled for running CV at a particular C + set_print_string_function(&print_null); + + for(i=0; inr_class == 2) + total_w_size = subprob[i].n; + else + total_w_size = subprob[i].n * submodel->nr_class; + + if(prev_w[i] != NULL && num_unchanged_w >= 0) + { + double norm_w_diff = 0; + for(j=0; jw[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]); + prev_w[i][j] = submodel->w[j]; + } + norm_w_diff = sqrt(norm_w_diff); + + if(norm_w_diff > 1e-15) + num_unchanged_w = -1; + } + else + { + prev_w[i] = Malloc(double, total_w_size); + for(j=0; jw[j]; + } + + for(j=begin; jx[perm[j]]); + + free_and_destroy_model(&submodel); + } + set_print_string_function(default_print_string); + + int total_correct = 0; + for(i=0; il; i++) + if(target[i] == prob->y[i]) + ++total_correct; + double current_rate = (double)total_correct/prob->l; + if(current_rate > *best_rate) + { + *best_C = param1.C; + *best_rate = current_rate; + } + + info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate); + num_unchanged_w++; + if(num_unchanged_w == 3) + break; + param1.C = param1.C*ratio; + } + + if(param1.C > max_C && max_C > start_C) + info("warning: maximum C reached.\n"); + free(fold_start); + free(perm); + free(target); + for(i=0; iweight_label); if(param->weight != NULL) free(param->weight); + if(param->init_sol != NULL) + free(param->init_sol); } const char *check_parameter(const problem *prob, const parameter *param) @@ -2865,6 +3058,10 @@ const char *check_parameter(const problem *prob, const parameter *param) && param->solver_type != L2R_L1LOSS_SVR_DUAL) return "unknown solver type"; + if(param->init_sol != NULL + && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC) + return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC"; + return NULL; } diff --git a/linear.h b/linear.h index 6b07b47..bc6aaf8 100644 --- a/linear.h +++ b/linear.h @@ -32,6 +32,7 @@ struct parameter int *weight_label; double* weight; double p; + double *init_sol; }; struct model @@ -46,6 +47,7 @@ struct model struct model* train(const struct problem *prob, const struct parameter *param); void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target); +void find_parameter_C(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate); double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values); double predict(const struct model *model_, const struct feature_node *x); diff --git a/matlab/README b/matlab/README index f2b02b7..f53f435 100644 --- a/matlab/README +++ b/matlab/README @@ -131,7 +131,12 @@ nr_feature, bias, Label, w]: If the '-v' option is specified, cross validation is conducted and the returned model is just a scalar: cross-validation accuracy for -classification and mean-squared error for regression. +classification and mean-squared error for regression. If the '-C' option +is specified, the best parameter C is found by cross validation. The +returned model is a two dimensional vector, where the first value is +the best C and the second value is the corresponding cross-validation +accuracy. The parameter selection utility is supported by only -s 0 +and -s 2. Result of Prediction ==================== @@ -184,6 +189,11 @@ For probability estimates, you need '-b 1' only in the testing phase: matlab> [predict_label, accuracy, prob_estimates] = predict(heart_scale_label, heart_scale_inst, model, '-b 1'); +Use the best parameter to train (only supported by -s 0 and -s 2): + +matlab> best = train(heart_scale_label, heart_scale_inst, '-C -s 0'); +matlab> model = train(heart_scale_label, heart_scale_inst, sprintf('-c %f -s 0', best(1))); % use the same solver: -s 0 + Additional Information ====================== diff --git a/matlab/train.c b/matlab/train.c index 93e3eb8..5c3ef4a 100644 --- a/matlab/train.c +++ b/matlab/train.c @@ -1,4 +1,3 @@ -#include #include #include #include @@ -60,6 +59,7 @@ void exit_with_help() "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n" "-wi weight: weights adjust the parameter C of different classes (see README for details)\n" "-v n: n-fold cross validation mode\n" + "-C : find parameter C (only for -s 0 and 2)\n" "-q : quiet mode (no outputs)\n" "col:\n" " if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n" @@ -71,11 +71,28 @@ struct parameter param; // set by parse_command_line struct problem prob; // set by read_problem struct model *model_; struct feature_node *x_space; -int cross_validation_flag; +int flag_cross_validation; +int flag_find_C; +int flag_C_specified; +int flag_solver_specified; int col_format_flag; int nr_fold; double bias; + +void do_find_parameter_C(double *best_C, double *best_rate) +{ + double start_C; + double max_C = 1024; + if (flag_C_specified) + start_C = param.C; + else + start_C = -1.0; + find_parameter_C(&prob, ¶m, nr_fold, start_C, max_C, best_C, best_rate); + mexPrintf("Best C = %lf CV accuracy = %g%%\n", *best_C, 100.0**best_rate); +} + + double do_cross_validation() { int i; @@ -101,8 +118,8 @@ double do_cross_validation() sumyy += y*y; sumvy += v*y; } - printf("Cross Validation Mean squared error = %g\n",total_error/prob.l); - printf("Cross Validation Squared correlation coefficient = %g\n", + mexPrintf("Cross Validation Mean squared error = %g\n",total_error/prob.l); + mexPrintf("Cross Validation Squared correlation coefficient = %g\n", ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/ ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy)) ); @@ -113,7 +130,7 @@ double do_cross_validation() for(i=0;i=argc && argv[i-1][1] != 'q') // since option -q has no parameter + if(i>=argc && argv[i-1][1] != 'q' && argv[i-1][1] != 'C') // since options -q and -C have no parameter return 1; switch(argv[i-1][1]) { case 's': param.solver_type = atoi(argv[i]); + flag_solver_specified = 1; break; case 'c': param.C = atof(argv[i]); + flag_C_specified = 1; break; case 'p': param.p = atof(argv[i]); @@ -186,7 +209,7 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name) bias = atof(argv[i]); break; case 'v': - cross_validation_flag = 1; + flag_cross_validation = 1; nr_fold = atoi(argv[i]); if(nr_fold < 2) { @@ -205,6 +228,10 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name) print_func = &print_null; i--; break; + case 'C': + flag_find_C = 1; + i--; + break; default: mexPrintf("unknown option\n"); return 1; @@ -213,6 +240,23 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name) set_print_string_function(print_func); + // default solver for parameter selection is L2R_L2LOSS_SVC + if(flag_find_C) + { + if(!flag_cross_validation) + nr_fold = 5; + if(!flag_solver_specified) + { + mexPrintf("Solver not specified. Using -s 2\n"); + param.solver_type = L2R_L2LOSS_SVC; + } + else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC) + { + mexPrintf("Warm-start parameter search only available for -s 0 and -s 2\n"); + return 1; + } + } + if(param.eps == INF) { switch(param.solver_type) @@ -406,7 +450,18 @@ void mexFunction( int nlhs, mxArray *plhs[], return; } - if(cross_validation_flag) + if (flag_find_C) + { + double best_C, best_rate, *ptr; + + do_find_parameter_C(&best_C, &best_rate); + + plhs[0] = mxCreateDoubleMatrix(2, 1, mxREAL); + ptr = mxGetPr(plhs[0]); + ptr[0] = best_C; + ptr[1] = best_rate; + } + else if(flag_cross_validation) { double *ptr; plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL); diff --git a/python/README b/python/README index e6349cf..47e0b4a 100644 --- a/python/README +++ b/python/README @@ -277,6 +277,11 @@ The above command loads structure. If '-v' is specified, cross validation is conducted and the returned model is just a scalar: cross-validation accuracy for classification and mean-squared error for regression. + If the '-C' option is specified, the best parameter C is found + by cross validation. The returned model is a tuple of the best C + and the corresponding cross-validation accuracy. The parameter + selection utility is supported by only -s 0 and -s 2. + To train the same data many times with different parameters, the second and the third ways should be faster.. @@ -290,6 +295,8 @@ The above command loads >>> m = train(prob, '-w1 5 -c 5') >>> m = train(prob, param) >>> CV_ACC = train(y, x, '-v 3') + >>> best_C, best_rate = train(y, x, '-C -s 0') + >>> m = train(y, x, '-c {0} -s 0'.format(best_C)) # use the same solver: -s 0 - Function: predict diff --git a/python/liblinear.py b/python/liblinear.py index 9587718..d650062 100644 --- a/python/liblinear.py +++ b/python/liblinear.py @@ -16,7 +16,7 @@ if sys.platform == 'win32': liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll')) else: - liblinear = CDLL(path.join(dirname, '../liblinear.so.2')) + liblinear = CDLL(path.join(dirname, '../liblinear.so.3')) except: # For unix the prefix 'lib' is not considered. if find_library('linear'): @@ -127,8 +127,8 @@ def set_bias(self, bias): class parameter(Structure): - _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"] - _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double] + _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p", "init_sol"] + _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double, POINTER(c_double)] _fields_ = genFields(_names, _types) def __init__(self, options = None): @@ -152,10 +152,14 @@ def set_to_default_values(self): self.C = 1 self.p = 0.1 self.nr_weight = 0 - self.weight_label = (c_int * 0)() - self.weight = (c_double * 0)() + self.weight_label = None + self.weight = None + self.init_sol = None self.bias = -1 - self.cross_validation = False + self.flag_cross_validation = False + self.flag_C_specified = False + self.flag_solver_specified = False + self.flag_find_C = False self.nr_fold = 0 self.print_func = cast(None, PRINT_STRING_FUN) @@ -176,9 +180,11 @@ def parse_options(self, options): if argv[i] == "-s": i = i + 1 self.solver_type = int(argv[i]) + self.flag_solver_specified = True elif argv[i] == "-c": i = i + 1 self.C = float(argv[i]) + self.flag_C_specified = True elif argv[i] == "-p": i = i + 1 self.p = float(argv[i]) @@ -190,18 +196,20 @@ def parse_options(self, options): self.bias = float(argv[i]) elif argv[i] == "-v": i = i + 1 - self.cross_validation = 1 + self.flag_cross_validation = 1 self.nr_fold = int(argv[i]) if self.nr_fold < 2 : raise ValueError("n-fold cross validation: n must >= 2") elif argv[i].startswith("-w"): i = i + 1 self.nr_weight += 1 - nr_weight = self.nr_weight weight_label += [int(argv[i-1][2:])] weight += [float(argv[i])] elif argv[i] == "-q": self.print_func = PRINT_STRING_FUN(print_null) + elif argv[i] == "-C": + self.flag_find_C = True + else : raise ValueError("Wrong options") i += 1 @@ -213,6 +221,16 @@ def parse_options(self, options): self.weight[i] = weight[i] self.weight_label[i] = weight_label[i] + # default solver for parameter selection is L2R_L2LOSS_SVC + if self.flag_find_C: + if not self.flag_cross_validation: + self.nr_fold = 5 + if not self.flag_solver_specified: + self.solver_type = L2R_L2LOSS_SVC + self.flag_solver_specified = True + elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC]: + raise ValueError("Warm-start parameter search only available for -s 0 and -s 2") + if self.eps == float('inf'): if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]: self.eps = 0.01 @@ -280,6 +298,7 @@ def toPyModel(model_ptr): return m fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)]) +fillprototype(liblinear.find_parameter_C, None, [POINTER(problem), POINTER(parameter), c_int, c_double, c_double, POINTER(c_double), POINTER(c_double)]) fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)]) fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)]) diff --git a/python/liblinearutil.py b/python/liblinearutil.py index 40de52a..5ba5efa 100644 --- a/python/liblinearutil.py +++ b/python/liblinearutil.py @@ -150,7 +150,21 @@ def train(arg1, arg2=None, arg3=None): if err_msg : raise ValueError('Error: %s' % err_msg) - if param.cross_validation: + if param.flag_find_C: + nr_fold = param.nr_fold + best_C = c_double() + best_rate = c_double() + max_C = 1024 + if param.flag_C_specified: + start_C = param.C + else: + start_C = -1.0 + liblinear.find_parameter_C(prob, param, nr_fold, start_C, max_C, best_C, best_rate) + print("Best C = %lf CV accuracy = %g%%\n"% (best_C.value, 100.0*best_rate.value)) + return best_C.value,best_rate.value + + + elif param.flag_cross_validation: l, nr_fold = prob.l, param.nr_fold target = (c_double * l)() liblinear.cross_validation(prob, param, nr_fold, target) diff --git a/train.c b/train.c index 80d9810..4df8594 100644 --- a/train.c +++ b/train.c @@ -49,6 +49,7 @@ void exit_with_help() "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n" "-wi weight: weights adjust the parameter C of different classes (see README for details)\n" "-v n: n-fold cross validation mode\n" + "-C : find parameter C (only for -s 0 and 2)\n" "-q : quiet mode (no outputs)\n" ); exit(1); @@ -84,12 +85,16 @@ static char* readline(FILE *input) void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name); void read_problem(const char *filename); void do_cross_validation(); +void do_find_parameter_C(); struct feature_node *x_space; struct parameter param; struct problem prob; struct model* model_; int flag_cross_validation; +int flag_find_C; +int flag_C_specified; +int flag_solver_specified; int nr_fold; double bias; @@ -109,7 +114,11 @@ int main(int argc, char **argv) exit(1); } - if(flag_cross_validation) + if (flag_find_C) + { + do_find_parameter_C(); + } + else if(flag_cross_validation) { do_cross_validation(); } @@ -132,6 +141,18 @@ int main(int argc, char **argv) return 0; } +void do_find_parameter_C() +{ + double start_C, best_C, best_rate; + double max_C = 1024; + if (flag_C_specified) + start_C = param.C; + else + start_C = -1.0; + find_parameter_C(&prob, ¶m, nr_fold, start_C, max_C, &best_C, &best_rate); + printf("Best C = %lf CV accuracy = %g%%\n", best_C, 100.0*best_rate); +} + void do_cross_validation() { int i; @@ -186,7 +207,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode param.nr_weight = 0; param.weight_label = NULL; param.weight = NULL; + param.init_sol = NULL; flag_cross_validation = 0; + flag_C_specified = 0; + flag_solver_specified = 0; + flag_find_C = 0; bias = -1; // parse options @@ -199,10 +224,12 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode { case 's': param.solver_type = atoi(argv[i]); + flag_solver_specified = 1; break; case 'c': param.C = atof(argv[i]); + flag_C_specified = 1; break; case 'p': @@ -240,6 +267,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode i--; break; + case 'C': + flag_find_C = 1; + i--; + break; + default: fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]); exit_with_help(); @@ -267,6 +299,23 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode sprintf(model_file_name,"%s.model",p); } + // default solver for parameter selection is L2R_L2LOSS_SVC + if(flag_find_C) + { + if(!flag_cross_validation) + nr_fold = 5; + if(!flag_solver_specified) + { + fprintf(stderr, "Solver not specified. Using -s 2\n"); + param.solver_type = L2R_L2LOSS_SVC; + } + else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC) + { + fprintf(stderr, "Warm-start parameter search only available for -s 0 and -s 2\n"); + exit_with_help(); + } + } + if(param.eps == INF) { switch(param.solver_type) diff --git a/tron.cpp b/tron.cpp index 7d1fd6e..2cd2834 100644 --- a/tron.cpp +++ b/tron.cpp @@ -41,10 +41,11 @@ void TRON::info(const char *fmt,...) (*tron_print_string)(buf); } -TRON::TRON(const function *fun_obj, double eps, int max_iter) +TRON::TRON(const function *fun_obj, double eps, double eps_cg, int max_iter) { this->fun_obj=const_cast(fun_obj); this->eps=eps; + this->eps_cg=eps_cg; this->max_iter=max_iter; tron_print_string = default_print; } @@ -71,16 +72,21 @@ void TRON::tron(double *w) double *w_new = new double[n]; double *g = new double[n]; + // calculate gradient norm at w=0 for stopping condition. + double *w0 = new double[n]; for (i=0; ifun(w0); + fun_obj->grad(w0, g); + double gnorm0 = dnrm2_(&n, g, &inc); + delete [] w0; f = fun_obj->fun(w); fun_obj->grad(w, g); delta = dnrm2_(&n, g, &inc); - double gnorm1 = delta; - double gnorm = gnorm1; + double gnorm = delta; - if (gnorm <= eps*gnorm1) + if (gnorm <= eps*gnorm0) search = 0; iter = 1; @@ -130,7 +136,7 @@ void TRON::tron(double *w) fun_obj->grad(w, g); gnorm = dnrm2_(&n, g, &inc); - if (gnorm <= eps*gnorm1) + if (gnorm <= eps*gnorm0) break; } if (f < -1.0e+32) @@ -172,7 +178,7 @@ int TRON::trcg(double delta, double *g, double *s, double *r) r[i] = -g[i]; d[i] = r[i]; } - cgtol = 0.1*dnrm2_(&n, g, &inc); + cgtol = eps_cg*dnrm2_(&n, g, &inc); int cg_iter = 0; rTr = ddot_(&n, r, &inc, r, &inc); diff --git a/tron.h b/tron.h index 3045c2e..56002dc 100644 --- a/tron.h +++ b/tron.h @@ -15,7 +15,7 @@ class function class TRON { public: - TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000); + TRON(const function *fun_obj, double eps = 0.1, double eps_cg = 0.1, int max_iter = 1000); ~TRON(); void tron(double *w); @@ -26,6 +26,7 @@ class TRON double norm_inf(int n, double *x); double eps; + double eps_cg; int max_iter; function *fun_obj; void info(const char *fmt,...);