From 621b867f3329abff59da69bbe52a9153752035be Mon Sep 17 00:00:00 2001
From: boyu <r02222047@ntu.edu.tw>
Date: Mon, 29 Jun 2015 04:56:32 +0800
Subject: [PATCH] warm start parameter search supported  -add functions for
 parameter search  -add init_sol in struct parameter  -modify function train
 to support initial solution for L2R_LR and L2R_L2LOSS_SVC  -interface and
 train.c update: option -C is added

---
 Makefile                |   2 +-
 README                  |  27 ++++++
 linear.cpp              | 209 ++++++++++++++++++++++++++++++++++++++--
 linear.h                |   2 +
 matlab/README           |  12 ++-
 matlab/train.c          |  73 ++++++++++++--
 python/README           |   7 ++
 python/liblinear.py     |  35 +++++--
 python/liblinearutil.py |  16 ++-
 train.c                 |  51 +++++++++-
 tron.cpp                |  20 ++--
 tron.h                  |   3 +-
 12 files changed, 422 insertions(+), 35 deletions(-)

diff --git a/Makefile b/Makefile
index ac6a32a..0534f2b 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ CXX ?= g++
 CC ?= gcc
 CFLAGS = -Wall -Wconversion -O3 -fPIC
 LIBS = blas/blas.a
-SHVER = 2
+SHVER = 3
 OS = $(shell uname)
 #LIBS = -lblas
 
diff --git a/README b/README
index 1b7fca5..6f9e783 100644
--- a/README
+++ b/README
@@ -131,11 +131,16 @@ options:
 -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
 -wi weight: weights adjust the parameter C of different classes (see README for details)
 -v n: n-fold cross validation mode
+-C : find parameter C (only for -s 0 and 2)
 -q : quiet mode (no outputs)
 
 Option -v randomly splits the data into n parts and calculates cross
 validation accuracy on them.
 
+Option -C conducts cross validation under different C values and finds
+the best one. This options is supported only by -s 0 and -s 2. If
+the solver is not specified, -s 2 is used.
+
 Formulations:
 
 For L2-regularized logistic regression (-s 0), we solve
@@ -245,6 +250,12 @@ Do five-fold cross-validation using L2-loss svm.
 Use a smaller stopping tolerance 0.001 than the default
 0.1 if you want more accurate solutions.
 
+> train -C -s 0 data_file
+
+Conduct cross validation many times by logistic regression
+and finds the parameter C which achieves the best cross 
+validation accuracy.
+
 > train -c 10 -w1 2 -w2 5 -w3 2 four_class_data_file
 
 Train four classifiers:
@@ -407,6 +418,22 @@ Library Usage
 
     The format of prob is same as that for train().
 
+- Function: void find_parameter_C(const struct problem *prob, 
+            const struct parameter *param, int nr_fold, double start_C, 
+	    double max_C, double *best_C, double *best_rate);
+
+    This function is similar to cross_validation. However, instead of
+    conducting cross validation under a specified parameter C, it 
+    conducts cross validation many times under parameters C = start_C, 
+    2*start_C, 4*start_C, 8*start_C, ..., and finds the best one with
+    the highest cross validation accuracy.
+    
+    If start_C <= 0, then this procedure calculates a small enough C 
+    for prob as the start_C. The procedure stops when the models of 
+    all folds become stable or C reaches max_C. The best C and the 
+    corresponding accuracy are assigned to *best_C and *best_rate,
+    respectively.
+
 - Function: double predict(const model *model_, const feature_node *x);
 
     For a classification model, the predicted class for x is returned.
diff --git a/linear.cpp b/linear.cpp
index 230948c..7ad136f 100644
--- a/linear.cpp
+++ b/linear.cpp
@@ -27,6 +27,7 @@ static void print_string_stdout(const char *s)
 	fputs(s,stdout);
 	fflush(stdout);
 }
+static void print_null(const char *s) {}
 
 static void (*liblinear_print_string) (const char *) = &print_string_stdout;
 
@@ -2180,14 +2181,18 @@ static void group_classes(const problem *prob, int *nr_class_ret, int **label_re
 
 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
 {
-	double eps=param->eps;
+	//inner and outer tolerances for TRON
+	double eps = param->eps;
+	double eps_cg = 0.1;
+	if(param->init_sol != NULL)
+		eps_cg = 0.5;
+
 	int pos = 0;
 	int neg = 0;
 	for(int i=0;i<prob->l;i++)
 		if(prob->y[i] > 0)
 			pos++;
 	neg = prob->l - pos;
-
 	double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
 
 	function *fun_obj=NULL;
@@ -2204,7 +2209,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
 					C[i] = Cn;
 			}
 			fun_obj=new l2r_lr_fun(prob, C);
-			TRON tron_obj(fun_obj, primal_solver_tol);
+			TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
 			tron_obj.set_print_string(liblinear_print_string);
 			tron_obj.tron(w);
 			delete fun_obj;
@@ -2222,7 +2227,7 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
 					C[i] = Cn;
 			}
 			fun_obj=new l2r_l2_svc_fun(prob, C);
-			TRON tron_obj(fun_obj, primal_solver_tol);
+			TRON tron_obj(fun_obj, primal_solver_tol, eps_cg);
 			tron_obj.set_print_string(liblinear_print_string);
 			tron_obj.tron(w);
 			delete fun_obj;
@@ -2287,6 +2292,36 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
 	}
 }
 
+// Calculate the initial C for parameter selection
+static double calc_start_C(const problem *prob, const parameter *param)
+{
+	int i;
+	double xTx,max_xTx;
+	max_xTx = 0;
+	for(i=0; i<prob->l; i++)
+	{
+		xTx = 0;
+		feature_node *xi=prob->x[i];
+		while(xi->index != -1)
+		{
+			double val = xi->value;
+			xTx += val*val;
+			xi++;
+		}
+		if(xTx > max_xTx)
+			max_xTx = xTx;
+	}
+
+	double min_C = 1.0;
+	if(param->solver_type == L2R_LR)
+		min_C = 1.0 / (prob->l * max_xTx);
+	else if(param->solver_type == L2R_L2LOSS_SVC)
+		min_C = 1.0 / (2 * prob->l * max_xTx);
+
+	return pow( 2, floor(log(min_C) / log(2.0)) );
+}
+
+
 //
 // Interface functions
 //
@@ -2310,7 +2345,7 @@ model* train(const problem *prob, const parameter *param)
 		model_->w = Malloc(double, w_size);
 		model_->nr_class = 2;
 		model_->label = NULL;
-		train_one(prob, param, &model_->w[0], 0, 0);
+		train_one(prob, param, model_->w, 0, 0);
 	}
 	else
 	{
@@ -2380,8 +2415,15 @@ model* train(const problem *prob, const parameter *param)
 					sub_prob.y[k] = +1;
 				for(; k<sub_prob.l; k++)
 					sub_prob.y[k] = -1;
+				
+				if(param->init_sol != NULL)
+					for(i=0;i<w_size;i++)
+						model_->w[i] = param->init_sol[i];
+				else
+					for(i=0;i<w_size;i++)
+						model_->w[i] = 0;
 
-				train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
+				train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
 			}
 			else
 			{
@@ -2400,6 +2442,13 @@ model* train(const problem *prob, const parameter *param)
 					for(; k<sub_prob.l; k++)
 						sub_prob.y[k] = -1;
 
+					if(param->init_sol != NULL)
+						for(j=0;j<w_size;j++)
+							w[j] = param->init_sol[j*nr_class+i];
+					else
+						for(j=0;j<w_size;j++)
+							w[j] = 0;
+
 					train_one(&sub_prob, param, w, weighted_C[i], param->C);
 
 					for(int j=0;j<w_size;j++)
@@ -2480,6 +2529,148 @@ void cross_validation(const problem *prob, const parameter *param, int nr_fold,
 	free(perm);
 }
 
+void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate)
+{
+	// variables for CV
+	int i;
+	int *fold_start;
+	int l = prob->l;
+	int *perm = Malloc(int, l);
+	double *target = Malloc(double, prob->l);
+	struct problem *subprob = Malloc(problem,nr_fold);
+
+	// variables for warm start
+	double ratio = 2;
+	double **prev_w = Malloc(double*, nr_fold);
+	for(i = 0; i < nr_fold; i++)
+		prev_w[i] = NULL;
+	int num_unchanged_w = 0;
+	struct parameter param1 = *param;
+	void (*default_print_string) (const char *) = liblinear_print_string;
+
+	if (nr_fold > l)
+	{
+		nr_fold = l;
+		fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
+	}
+	fold_start = Malloc(int,nr_fold+1);
+	for(i=0;i<l;i++) perm[i]=i;
+	for(i=0;i<l;i++)
+	{
+		int j = i+rand()%(l-i);
+		swap(perm[i],perm[j]);
+	}
+	for(i=0;i<=nr_fold;i++)
+		fold_start[i]=i*l/nr_fold;
+
+	for(i=0;i<nr_fold;i++)
+	{
+		int begin = fold_start[i];
+		int end = fold_start[i+1];
+		int j,k;
+
+		subprob[i].bias = prob->bias;
+		subprob[i].n = prob->n;
+		subprob[i].l = l-(end-begin);
+		subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
+		subprob[i].y = Malloc(double,subprob[i].l);
+
+		k=0;
+		for(j=0;j<begin;j++)
+		{
+			subprob[i].x[k] = prob->x[perm[j]];
+			subprob[i].y[k] = prob->y[perm[j]];
+			++k;
+		}
+		for(j=end;j<l;j++)
+		{
+			subprob[i].x[k] = prob->x[perm[j]];
+			subprob[i].y[k] = prob->y[perm[j]];
+			++k;
+		}
+
+	}
+
+	*best_rate = 0;
+	if(start_C <= 0)
+		start_C = calc_start_C(prob,param);
+	param1.C = start_C;
+
+	while(param1.C <= max_C)
+	{
+		//Output diabled for running CV at a particular C
+		set_print_string_function(&print_null);
+
+		for(i=0; i<nr_fold; i++)
+		{
+			int j;
+			int begin = fold_start[i];
+			int end = fold_start[i+1];
+
+			param1.init_sol = prev_w[i];
+			struct model *submodel = train(&subprob[i],&param1);
+
+			int total_w_size;
+			if(submodel->nr_class == 2)
+				total_w_size = subprob[i].n;
+			else
+				total_w_size = subprob[i].n * submodel->nr_class;
+
+			if(prev_w[i] != NULL && num_unchanged_w >= 0)
+			{
+				double norm_w_diff = 0;
+				for(j=0; j<total_w_size; j++)
+				{
+					norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
+					prev_w[i][j] = submodel->w[j];
+				}
+				norm_w_diff = sqrt(norm_w_diff);
+
+				if(norm_w_diff > 1e-15)
+					num_unchanged_w = -1;
+			}
+			else
+			{
+				prev_w[i] = Malloc(double, total_w_size);
+				for(j=0; j<total_w_size; j++)
+					prev_w[i][j] = submodel->w[j];
+			}
+
+			for(j=begin; j<end; j++)
+				target[perm[j]] = predict(submodel,prob->x[perm[j]]);
+
+			free_and_destroy_model(&submodel);
+		}
+		set_print_string_function(default_print_string);
+
+		int total_correct = 0;
+		for(i=0; i<prob->l; i++)
+			if(target[i] == prob->y[i])
+				++total_correct;
+		double current_rate = (double)total_correct/prob->l;
+		if(current_rate > *best_rate)
+		{
+			*best_C = param1.C;
+			*best_rate = current_rate;
+		}
+
+		info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate);
+		num_unchanged_w++;
+		if(num_unchanged_w == 3)
+			break;
+		param1.C = param1.C*ratio;
+	}
+
+	if(param1.C > max_C && max_C > start_C) 
+		info("warning: maximum C reached.\n");
+	free(fold_start);
+	free(perm);
+	free(target);
+	for(i=0; i<nr_fold; i++)
+		free(prev_w[i]);
+	free(prev_w);
+}
+
 double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
 {
 	int idx;
@@ -2839,6 +3030,8 @@ void destroy_param(parameter* param)
 		free(param->weight_label);
 	if(param->weight != NULL)
 		free(param->weight);
+	if(param->init_sol != NULL)
+		free(param->init_sol);
 }
 
 const char *check_parameter(const problem *prob, const parameter *param)
@@ -2865,6 +3058,10 @@ const char *check_parameter(const problem *prob, const parameter *param)
 		&& param->solver_type != L2R_L1LOSS_SVR_DUAL)
 		return "unknown solver type";
 
+	if(param->init_sol != NULL 
+		&& param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC)
+		return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC";
+
 	return NULL;
 }
 
diff --git a/linear.h b/linear.h
index 6b07b47..bc6aaf8 100644
--- a/linear.h
+++ b/linear.h
@@ -32,6 +32,7 @@ struct parameter
 	int *weight_label;
 	double* weight;
 	double p;
+	double *init_sol;
 };
 
 struct model
@@ -46,6 +47,7 @@ struct model
 
 struct model* train(const struct problem *prob, const struct parameter *param);
 void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
+void find_parameter_C(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate);
 
 double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
 double predict(const struct model *model_, const struct feature_node *x);
diff --git a/matlab/README b/matlab/README
index f2b02b7..f53f435 100644
--- a/matlab/README
+++ b/matlab/README
@@ -131,7 +131,12 @@ nr_feature, bias, Label, w]:
 
 If the '-v' option is specified, cross validation is conducted and the
 returned model is just a scalar: cross-validation accuracy for 
-classification and mean-squared error for regression.
+classification and mean-squared error for regression. If the '-C' option
+is specified, the best parameter C is found by cross validation. The 
+returned model is a two dimensional vector, where the first value is 
+the best C and the second value is the corresponding cross-validation 
+accuracy. The parameter selection utility is supported by only -s 0
+and -s 2.
 
 Result of Prediction
 ====================
@@ -184,6 +189,11 @@ For probability estimates, you need '-b 1' only in the testing phase:
 
 matlab> [predict_label, accuracy, prob_estimates] = predict(heart_scale_label, heart_scale_inst, model, '-b 1');
 
+Use the best parameter to train (only supported by -s 0 and -s 2):
+
+matlab> best = train(heart_scale_label, heart_scale_inst, '-C -s 0');
+matlab> model = train(heart_scale_label, heart_scale_inst, sprintf('-c %f -s 0', best(1))); % use the same solver: -s 0 
+
 Additional Information
 ======================
 
diff --git a/matlab/train.c b/matlab/train.c
index 93e3eb8..5c3ef4a 100644
--- a/matlab/train.c
+++ b/matlab/train.c
@@ -1,4 +1,3 @@
-#include <stdio.h>
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
@@ -60,6 +59,7 @@ void exit_with_help()
 	"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
 	"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
 	"-v n: n-fold cross validation mode\n"
+	"-C : find parameter C (only for -s 0 and 2)\n"
 	"-q : quiet mode (no outputs)\n"
 	"col:\n"
 	"	if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n"
@@ -71,11 +71,28 @@ struct parameter param;		// set by parse_command_line
 struct problem prob;		// set by read_problem
 struct model *model_;
 struct feature_node *x_space;
-int cross_validation_flag;
+int flag_cross_validation;
+int flag_find_C;
+int flag_C_specified;
+int flag_solver_specified;
 int col_format_flag;
 int nr_fold;
 double bias;
 
+
+void do_find_parameter_C(double *best_C, double *best_rate)
+{
+	double start_C;
+	double max_C = 1024;
+	if (flag_C_specified)
+		start_C = param.C;
+	else
+		start_C = -1.0;
+	find_parameter_C(&prob, &param, nr_fold, start_C, max_C, best_C, best_rate);
+	mexPrintf("Best C = %lf  CV accuracy = %g%%\n", *best_C, 100.0**best_rate);	
+}
+
+
 double do_cross_validation()
 {
 	int i;
@@ -101,8 +118,8 @@ double do_cross_validation()
                         sumyy += y*y;
                         sumvy += v*y;
                 }
-                printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
-                printf("Cross Validation Squared correlation coefficient = %g\n",
+                mexPrintf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
+                mexPrintf("Cross Validation Squared correlation coefficient = %g\n",
                         ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
                         ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
                         );
@@ -113,7 +130,7 @@ double do_cross_validation()
 		for(i=0;i<prob.l;i++)
 			if(target[i] == prob.y[i])
 				++total_correct;
-		printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
+		mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
 		retval = 100.0*total_correct/prob.l;
 	}
 
@@ -137,8 +154,12 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
 	param.nr_weight = 0;
 	param.weight_label = NULL;
 	param.weight = NULL;
-	cross_validation_flag = 0;
+	param.init_sol = NULL;
+	flag_cross_validation = 0;
 	col_format_flag = 0;
+	flag_C_specified = 0;
+	flag_solver_specified = 0;
+	flag_find_C = 0;
 	bias = -1;
 
 
@@ -166,15 +187,17 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
 	{
 		if(argv[i][0] != '-') break;
 		++i;
-		if(i>=argc && argv[i-1][1] != 'q') // since option -q has no parameter
+		if(i>=argc && argv[i-1][1] != 'q' && argv[i-1][1] != 'C') // since options -q and -C have no parameter
 			return 1;
 		switch(argv[i-1][1])
 		{
 			case 's':
 				param.solver_type = atoi(argv[i]);
+				flag_solver_specified = 1;
 				break;
 			case 'c':
 				param.C = atof(argv[i]);
+				flag_C_specified = 1;
 				break;
 			case 'p':
 				param.p = atof(argv[i]);
@@ -186,7 +209,7 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
 				bias = atof(argv[i]);
 				break;
 			case 'v':
-				cross_validation_flag = 1;
+				flag_cross_validation = 1;
 				nr_fold = atoi(argv[i]);
 				if(nr_fold < 2)
 				{
@@ -205,6 +228,10 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
 				print_func = &print_null;
 				i--;
 				break;
+			case 'C':
+				flag_find_C = 1;
+				i--;
+				break;
 			default:
 				mexPrintf("unknown option\n");
 				return 1;
@@ -213,6 +240,23 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
 
 	set_print_string_function(print_func);
 
+	// default solver for parameter selection is L2R_L2LOSS_SVC
+	if(flag_find_C)
+	{
+		if(!flag_cross_validation)
+			nr_fold = 5;
+		if(!flag_solver_specified)
+		{
+			mexPrintf("Solver not specified. Using -s 2\n");
+			param.solver_type = L2R_L2LOSS_SVC;
+		}
+		else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
+		{
+			mexPrintf("Warm-start parameter search only available for -s 0 and -s 2\n");
+			return 1;
+		}
+	}
+
 	if(param.eps == INF)
 	{
 		switch(param.solver_type)
@@ -406,7 +450,18 @@ void mexFunction( int nlhs, mxArray *plhs[],
 			return;
 		}
 
-		if(cross_validation_flag)
+		if (flag_find_C)
+		{
+			double best_C, best_rate, *ptr;
+			
+			do_find_parameter_C(&best_C, &best_rate);	
+			
+			plhs[0] = mxCreateDoubleMatrix(2, 1, mxREAL);
+			ptr = mxGetPr(plhs[0]);
+			ptr[0] = best_C;
+			ptr[1] = best_rate;
+		}
+		else if(flag_cross_validation)
 		{
 			double *ptr;
 			plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
diff --git a/python/README b/python/README
index e6349cf..47e0b4a 100644
--- a/python/README
+++ b/python/README
@@ -277,6 +277,11 @@ The above command loads
            structure. If '-v' is specified, cross validation is
            conducted and the returned model is just a scalar: cross-validation
            accuracy for classification and mean-squared error for regression.
+           If the '-C' option is specified, the best parameter C is found 
+	   by cross validation. The returned model is a tuple of the best C 
+	   and the corresponding cross-validation accuracy. The parameter 
+	   selection utility is supported by only -s 0 and -s 2.
+
 
     To train the same data many times with different
     parameters, the second and the third ways should be faster..
@@ -290,6 +295,8 @@ The above command loads
     >>> m = train(prob, '-w1 5 -c 5')
     >>> m = train(prob, param)
     >>> CV_ACC = train(y, x, '-v 3')
+    >>> best_C, best_rate = train(y, x, '-C -s 0')
+    >>> m = train(y, x, '-c {0} -s 0'.format(best_C)) # use the same solver: -s 0
 
 - Function: predict
 
diff --git a/python/liblinear.py b/python/liblinear.py
index 9587718..d650062 100644
--- a/python/liblinear.py
+++ b/python/liblinear.py
@@ -16,7 +16,7 @@
 	if sys.platform == 'win32':
 		liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll'))
 	else:
-		liblinear = CDLL(path.join(dirname, '../liblinear.so.2'))
+		liblinear = CDLL(path.join(dirname, '../liblinear.so.3'))
 except:
 # For unix the prefix 'lib' is not considered.
 	if find_library('linear'):
@@ -127,8 +127,8 @@ def set_bias(self, bias):
 
 
 class parameter(Structure):
-	_names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"]
-	_types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double]
+	_names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p", "init_sol"]
+	_types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double, POINTER(c_double)]
 	_fields_ = genFields(_names, _types)
 
 	def __init__(self, options = None):
@@ -152,10 +152,14 @@ def set_to_default_values(self):
 		self.C = 1
 		self.p = 0.1
 		self.nr_weight = 0
-		self.weight_label = (c_int * 0)()
-		self.weight = (c_double * 0)()
+		self.weight_label = None
+		self.weight = None
+		self.init_sol = None
 		self.bias = -1
-		self.cross_validation = False
+		self.flag_cross_validation = False
+		self.flag_C_specified = False
+		self.flag_solver_specified = False
+		self.flag_find_C = False
 		self.nr_fold = 0
 		self.print_func = cast(None, PRINT_STRING_FUN)
 
@@ -176,9 +180,11 @@ def parse_options(self, options):
 			if argv[i] == "-s":
 				i = i + 1
 				self.solver_type = int(argv[i])
+				self.flag_solver_specified = True
 			elif argv[i] == "-c":
 				i = i + 1
 				self.C = float(argv[i])
+				self.flag_C_specified = True
 			elif argv[i] == "-p":
 				i = i + 1
 				self.p = float(argv[i])
@@ -190,18 +196,20 @@ def parse_options(self, options):
 				self.bias = float(argv[i])
 			elif argv[i] == "-v":
 				i = i + 1
-				self.cross_validation = 1
+				self.flag_cross_validation = 1
 				self.nr_fold = int(argv[i])
 				if self.nr_fold < 2 :
 					raise ValueError("n-fold cross validation: n must >= 2")
 			elif argv[i].startswith("-w"):
 				i = i + 1
 				self.nr_weight += 1
-				nr_weight = self.nr_weight
 				weight_label += [int(argv[i-1][2:])]
 				weight += [float(argv[i])]
 			elif argv[i] == "-q":
 				self.print_func = PRINT_STRING_FUN(print_null)
+			elif argv[i] == "-C":
+				self.flag_find_C = True
+
 			else :
 				raise ValueError("Wrong options")
 			i += 1
@@ -213,6 +221,16 @@ def parse_options(self, options):
 			self.weight[i] = weight[i]
 			self.weight_label[i] = weight_label[i]
 
+		# default solver for parameter selection is L2R_L2LOSS_SVC
+		if self.flag_find_C:
+			if not self.flag_cross_validation:
+				self.nr_fold = 5
+			if not self.flag_solver_specified:
+				self.solver_type = L2R_L2LOSS_SVC
+				self.flag_solver_specified = True
+			elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC]:
+				raise ValueError("Warm-start parameter search only available for -s 0 and -s 2")
+
 		if self.eps == float('inf'):
 			if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
 				self.eps = 0.01
@@ -280,6 +298,7 @@ def toPyModel(model_ptr):
 	return m
 
 fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
+fillprototype(liblinear.find_parameter_C, None, [POINTER(problem), POINTER(parameter), c_int, c_double, c_double, POINTER(c_double), POINTER(c_double)])
 fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
 
 fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
diff --git a/python/liblinearutil.py b/python/liblinearutil.py
index 40de52a..5ba5efa 100644
--- a/python/liblinearutil.py
+++ b/python/liblinearutil.py
@@ -150,7 +150,21 @@ def train(arg1, arg2=None, arg3=None):
 	if err_msg :
 		raise ValueError('Error: %s' % err_msg)
 
-	if param.cross_validation:
+	if param.flag_find_C:
+		nr_fold = param.nr_fold
+		best_C = c_double()
+		best_rate = c_double()		
+		max_C = 1024
+		if param.flag_C_specified:
+			start_C = param.C
+		else:
+			start_C = -1.0
+		liblinear.find_parameter_C(prob, param, nr_fold, start_C, max_C, best_C, best_rate)
+		print("Best C = %lf  CV accuracy = %g%%\n"% (best_C.value, 100.0*best_rate.value))
+		return best_C.value,best_rate.value
+
+
+	elif param.flag_cross_validation:
 		l, nr_fold = prob.l, param.nr_fold
 		target = (c_double * l)()
 		liblinear.cross_validation(prob, param, nr_fold, target)
diff --git a/train.c b/train.c
index 80d9810..4df8594 100644
--- a/train.c
+++ b/train.c
@@ -49,6 +49,7 @@ void exit_with_help()
 	"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
 	"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
 	"-v n: n-fold cross validation mode\n"
+	"-C : find parameter C (only for -s 0 and 2)\n"
 	"-q : quiet mode (no outputs)\n"
 	);
 	exit(1);
@@ -84,12 +85,16 @@ static char* readline(FILE *input)
 void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
 void read_problem(const char *filename);
 void do_cross_validation();
+void do_find_parameter_C();
 
 struct feature_node *x_space;
 struct parameter param;
 struct problem prob;
 struct model* model_;
 int flag_cross_validation;
+int flag_find_C;
+int flag_C_specified;
+int flag_solver_specified;
 int nr_fold;
 double bias;
 
@@ -109,7 +114,11 @@ int main(int argc, char **argv)
 		exit(1);
 	}
 
-	if(flag_cross_validation)
+	if (flag_find_C)
+	{
+		do_find_parameter_C();
+	}
+	else if(flag_cross_validation)
 	{
 		do_cross_validation();
 	}
@@ -132,6 +141,18 @@ int main(int argc, char **argv)
 	return 0;
 }
 
+void do_find_parameter_C()
+{
+	double start_C, best_C, best_rate;
+	double max_C = 1024;
+	if (flag_C_specified)
+		start_C = param.C;
+	else
+		start_C = -1.0;
+	find_parameter_C(&prob, &param, nr_fold, start_C, max_C, &best_C, &best_rate);
+	printf("Best C = %lf  CV accuracy = %g%%\n", best_C, 100.0*best_rate);
+}
+
 void do_cross_validation()
 {
 	int i;
@@ -186,7 +207,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
 	param.nr_weight = 0;
 	param.weight_label = NULL;
 	param.weight = NULL;
+	param.init_sol = NULL;
 	flag_cross_validation = 0;
+	flag_C_specified = 0;
+	flag_solver_specified = 0;
+	flag_find_C = 0;
 	bias = -1;
 
 	// parse options
@@ -199,10 +224,12 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
 		{
 			case 's':
 				param.solver_type = atoi(argv[i]);
+				flag_solver_specified = 1;
 				break;
 
 			case 'c':
 				param.C = atof(argv[i]);
+				flag_C_specified = 1;
 				break;
 
 			case 'p':
@@ -240,6 +267,11 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
 				i--;
 				break;
 
+			case 'C':
+				flag_find_C = 1;
+				i--;
+				break;
+
 			default:
 				fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
 				exit_with_help();
@@ -267,6 +299,23 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
 		sprintf(model_file_name,"%s.model",p);
 	}
 
+	// default solver for parameter selection is L2R_L2LOSS_SVC
+	if(flag_find_C)
+	{
+		if(!flag_cross_validation)
+			nr_fold = 5;
+		if(!flag_solver_specified)
+		{
+			fprintf(stderr, "Solver not specified. Using -s 2\n");
+			param.solver_type = L2R_L2LOSS_SVC;
+		}
+		else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC)
+		{
+			fprintf(stderr, "Warm-start parameter search only available for -s 0 and -s 2\n");
+			exit_with_help();
+		}
+	}
+
 	if(param.eps == INF)
 	{
 		switch(param.solver_type)
diff --git a/tron.cpp b/tron.cpp
index 7d1fd6e..2cd2834 100644
--- a/tron.cpp
+++ b/tron.cpp
@@ -41,10 +41,11 @@ void TRON::info(const char *fmt,...)
 	(*tron_print_string)(buf);
 }
 
-TRON::TRON(const function *fun_obj, double eps, int max_iter)
+TRON::TRON(const function *fun_obj, double eps, double eps_cg, int max_iter)
 {
 	this->fun_obj=const_cast<function *>(fun_obj);
 	this->eps=eps;
+	this->eps_cg=eps_cg;
 	this->max_iter=max_iter;
 	tron_print_string = default_print;
 }
@@ -71,16 +72,21 @@ void TRON::tron(double *w)
 	double *w_new = new double[n];
 	double *g = new double[n];
 
+	// calculate gradient norm at w=0 for stopping condition.
+	double *w0 = new double[n];
 	for (i=0; i<n; i++)
-		w[i] = 0;
+		w0[i] = 0;
+	fun_obj->fun(w0);
+	fun_obj->grad(w0, g);
+	double gnorm0 = dnrm2_(&n, g, &inc);
+	delete [] w0;
 
 	f = fun_obj->fun(w);
 	fun_obj->grad(w, g);
 	delta = dnrm2_(&n, g, &inc);
-	double gnorm1 = delta;
-	double gnorm = gnorm1;
+	double gnorm = delta;
 
-	if (gnorm <= eps*gnorm1)
+	if (gnorm <= eps*gnorm0)
 		search = 0;
 
 	iter = 1;
@@ -130,7 +136,7 @@ void TRON::tron(double *w)
 			fun_obj->grad(w, g);
 
 			gnorm = dnrm2_(&n, g, &inc);
-			if (gnorm <= eps*gnorm1)
+			if (gnorm <= eps*gnorm0)
 				break;
 		}
 		if (f < -1.0e+32)
@@ -172,7 +178,7 @@ int TRON::trcg(double delta, double *g, double *s, double *r)
 		r[i] = -g[i];
 		d[i] = r[i];
 	}
-	cgtol = 0.1*dnrm2_(&n, g, &inc);
+	cgtol = eps_cg*dnrm2_(&n, g, &inc);
 
 	int cg_iter = 0;
 	rTr = ddot_(&n, r, &inc, r, &inc);
diff --git a/tron.h b/tron.h
index 3045c2e..56002dc 100644
--- a/tron.h
+++ b/tron.h
@@ -15,7 +15,7 @@ class function
 class TRON
 {
 public:
-	TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000);
+	TRON(const function *fun_obj, double eps = 0.1, double eps_cg = 0.1, int max_iter = 1000);
 	~TRON();
 
 	void tron(double *w);
@@ -26,6 +26,7 @@ class TRON
 	double norm_inf(int n, double *x);
 
 	double eps;
+	double eps_cg;
 	int max_iter;
 	function *fun_obj;
 	void info(const char *fmt,...);