From a5daec48140a23580e09c469933cffd5ad1b5d94 Mon Sep 17 00:00:00 2001 From: Anshu Avinash Date: Wed, 23 Jul 2014 15:48:40 +0530 Subject: [PATCH] Added clsqr for solving least squares --- CMakeLists.txt | 1 + clsqr/CMakeLists.txt | 6 + clsqr/lsqr.c | 904 +++++++++++++++++++++++++++++++++++++++++++ clsqr/lsqr.h | 400 +++++++++++++++++++ clsqr/lsqr_c.README | 64 +++ clsqr/test_lsqr.c | 240 ++++++++++++ clsqr/test_lsqr.h | 26 ++ clsqr/test_lstp.c | 119 ++++++ clsqr/test_mult.c | 100 +++++ clsqr/test_prog.c | 36 ++ sql/CMakeLists.txt | 4 +- sql/opt_costmodel.h | 1 + sql/sp_head.cc | 1 + sql/sql_class.cc | 83 +++- sql/sql_class.h | 6 +- sql/sql_parse.cc | 2 +- sql/sql_select.cc | 1 + 17 files changed, 1975 insertions(+), 19 deletions(-) create mode 100644 clsqr/CMakeLists.txt create mode 100644 clsqr/lsqr.c create mode 100644 clsqr/lsqr.h create mode 100644 clsqr/lsqr_c.README create mode 100644 clsqr/test_lsqr.c create mode 100644 clsqr/test_lsqr.h create mode 100644 clsqr/test_lstp.c create mode 100644 clsqr/test_mult.c create mode 100644 clsqr/test_prog.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 849da49d82a00..ac83a175a5b8d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -412,6 +412,7 @@ ADD_SUBDIRECTORY(libservices) ADD_SUBDIRECTORY(scripts) ADD_SUBDIRECTORY(sql/share) ADD_SUBDIRECTORY(support-files) +ADD_SUBDIRECTORY(clsqr) IF(NOT WITHOUT_SERVER) ADD_SUBDIRECTORY(tests) diff --git a/clsqr/CMakeLists.txt b/clsqr/CMakeLists.txt new file mode 100644 index 0000000000000..c2dbbb6f040ee --- /dev/null +++ b/clsqr/CMakeLists.txt @@ -0,0 +1,6 @@ +PROJECT(CLSQR C) + +SET(CLSQR_HEADERS ${PROJECT_BINARY_DIR}/lsqr.h) + +SET(CLSQR_SOURCES lsqr.c) +ADD_CONVENIENCE_LIBRARY(clsqr ${CLSQR_HEADERS} ${CLSQR_SOURCES}) diff --git a/clsqr/lsqr.c b/clsqr/lsqr.c new file mode 100644 index 0000000000000..4de1ff3029801 --- /dev/null +++ b/clsqr/lsqr.c @@ -0,0 +1,904 @@ +/* +* lsqr.c +* This is a C version of LSQR derived by James W. Howse +* from the Fortran 77 implementation of C. C. Paige and M. A. Saunders. +* +* This file defines functions for data type allocation and deallocation, +* lsqr itself (the main algorithm), +* and functions that scale, copy, and compute the Euclidean norm of a vector. +* +* +* The following history comments are maintained by Michael Saunders. +* +* 08 Sep 1999: First version of lsqr.c (this file) provided by +* James W. Howse . +* +* 16 Aug 2005: Forrester H Cole reports "too many" +"* iterations" when a good solution x0 is supplied in +* input->sol_vec. Note that the f77 and Matlab versions of LSQR +* intentionally don't allow x0 to be input. They require +* the user to define r0 = b - A*x0 and solve A dx = r0, +* then add x = x0 + dx. Note that nothing is gained unless +* larger stopping tolerances are set in the solve for dx +* (because dx doesn't need to be computed accurately if +* x0 is very good). The same is true here. BEWARE! +* +* 14 Feb 2006: Marc Grunberg reports successful +* use on a sparse system of size 1422805 x 246588 with +* 76993294 nonzero entries. Also reports that the solution +* vector isn't initialized(!). Added +* output->sol_vec->elements[indx] = 0.0; +* to the first for loop. +* +* Presumably you are supposed to provide x0 even if it is zero. +* Again, BEWARE! I feel it's another reason not to allow x0. +* +* 18 Mar 2007: Dima Sorkin reports long strings +* split across lines (fixed later by Krishna Mohan Gundu's +* script -- see 30 Aug 2007 below). +* Also recommends changing %i to %li for long ints. +* +* 05 Jul 2007: Joel Erickson reports bug in +* resid_tol and resid_tol_mach used in the stopping rules: +* output->mat_resid_norm should be +* output->frob_mat_norm in both. +* +* 30 Aug 2007: Krishna Mohan Gundu also reports +* long strings split across lines, and provides perl script +* fix_newlines.pl to change to the form "..." "...". +* The script is included in the sol/software/lsqr/c/ directory. +* It has been used to create this version of lsqr.c. +* +* Krishna also questioned the fact that the test program +* seems to fail on the last few rectangular cases. +* Don't worry! ||A'r|| is in fact quite small. +* atol and btol have been reduced from 1e-10 to 1e-15 +* to allow more iterations and hence "success". +* +* 02 Sep 2007 Macro "sqr" now defined to be "lsqr_sqr". +*/ + +#include "lsqr.h" + +/*-------------------------------------------------------------------------*/ +/* */ +/* Define the data type allocation and deallocation functions. */ +/* */ +/*-------------------------------------------------------------------------*/ + + /*---------------------------------------------------------------*/ + /* */ + /* Define the error handling function for LSQR and its */ + /* associated functions. */ + /* */ + /*---------------------------------------------------------------*/ + +void lsqr_error( char *msg, + int code ) +{ + fprintf(stderr, "\t%s\n", msg); + exit(code); +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the allocation function for a long vector with */ + /* subscript range x[0..n-1]. */ + /* */ + /*---------------------------------------------------------------*/ + +lvec *alloc_lvec( long lvec_size ) +{ + lvec *lng_vec; + + lng_vec = (lvec *) malloc( sizeof(lvec) ); + if (!lng_vec) lsqr_error("lsqr: vector allocation failure in function " +"alloc_lvec()", -1); + + lng_vec->elements = (long *) malloc( (lvec_size) * sizeof(long) ); + if (!lng_vec->elements) lsqr_error("lsqr: element allocation failure in " +"function alloc_lvec()", -1); + + lng_vec->length = lvec_size; + + return lng_vec; +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the deallocation function for the vector */ + /* created with the function 'alloc_lvec()'. */ + /* */ + /*---------------------------------------------------------------*/ + +void free_lvec( lvec *lng_vec ) +{ + free((double *) (lng_vec->elements)); + free((lvec *) (lng_vec)); + return; +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the allocation function for a double vector with */ + /* subscript range x[0..n-1]. */ + /* */ + /*---------------------------------------------------------------*/ + +dvec *alloc_dvec( long dvec_size ) +{ + dvec *dbl_vec; + + dbl_vec = (dvec *) malloc( sizeof(dvec) ); + if (!dbl_vec) lsqr_error("lsqr: vector allocation failure in function " +"alloc_dvec()", -1); + + dbl_vec->elements = (double *) malloc( (dvec_size) * sizeof(double) ); + if (!dbl_vec->elements) lsqr_error("lsqr: element allocation failure in " +"function alloc_dvec()", -1); + + dbl_vec->length = dvec_size; + + return dbl_vec; +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the deallocation function for the vector */ + /* created with the function 'alloc_dvec()'. */ + /* */ + /*---------------------------------------------------------------*/ + +void free_dvec( dvec *dbl_vec ) +{ + free((double *) (dbl_vec->elements)); + free((dvec *) (dbl_vec)); + return; +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the allocation function for all of the structures */ + /* used by the function 'lsqr()'. */ + /* */ + /*---------------------------------------------------------------*/ + +void alloc_lsqr_mem( lsqr_input **in_struct, + lsqr_output **out_struct, + lsqr_work **wrk_struct, + lsqr_func **fnc_struct, + long max_num_rows, + long max_num_cols ) +{ + *in_struct = (lsqr_input *) alloc_lsqr_in( max_num_rows, max_num_cols ); + if (!in_struct) lsqr_error("lsqr: input structure allocation failure in " +"function alloc_lsqr_in()", -1); + + *out_struct = (lsqr_output *) alloc_lsqr_out( max_num_rows, max_num_cols ); + if (!out_struct) lsqr_error("lsqr: output structure allocation failure in " +"function alloc_lsqr_out()", -1); + + (*out_struct)->sol_vec = (dvec *) (*in_struct)->sol_vec; + + *wrk_struct = (lsqr_work *) alloc_lsqr_wrk( max_num_rows, max_num_cols ); + if (!wrk_struct) lsqr_error("lsqr: work structure allocation failure in " +"function alloc_lsqr_wrk()", -1); + + *fnc_struct = (lsqr_func *) alloc_lsqr_fnc( ); + if (!fnc_struct) lsqr_error("lsqr: work structure allocation failure in " +"function alloc_lsqr_fnc()", -1); + + return; +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the deallocation function for the structure */ + /* created with the function 'alloc_lsqr_mem()'. */ + /* */ + /*---------------------------------------------------------------*/ + +void free_lsqr_mem( lsqr_input *in_struct, + lsqr_output *out_struct, + lsqr_work *wrk_struct, + lsqr_func *fnc_struct ) +{ + free_lsqr_in(in_struct); + free_lsqr_out(out_struct); + free_lsqr_wrk(wrk_struct); + free_lsqr_fnc(fnc_struct); + return; +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the allocation function for the structure of */ + /* type 'lsqr_input'. */ + /* */ + /*---------------------------------------------------------------*/ + +lsqr_input *alloc_lsqr_in( long max_num_rows, + long max_num_cols ) +{ + lsqr_input *in_struct; + + in_struct = (lsqr_input *) malloc( sizeof(lsqr_input) ); + if (!in_struct) lsqr_error("lsqr: input structure allocation failure in " +"function alloc_lsqr_in()", -1); + + in_struct->rhs_vec = (dvec *) alloc_dvec( max_num_rows ); + if (!in_struct->rhs_vec) lsqr_error("lsqr: right hand side vector \'b\' " +"allocation failure in function alloc_lsqr_in()", -1); + + in_struct->sol_vec = (dvec *) alloc_dvec( max_num_cols ); + if (!in_struct->sol_vec) lsqr_error("lsqr: solution vector \'x\' allocation " +"failure in function alloc_lsqr_in()", -1); + + return in_struct; +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the deallocation function for the structure */ + /* created with the function 'alloc_lsqr_in()'. */ + /* */ + /*---------------------------------------------------------------*/ + +void free_lsqr_in( lsqr_input *in_struct ) +{ + free_dvec(in_struct->rhs_vec); + free_dvec(in_struct->sol_vec); + free((lsqr_input *) (in_struct)); + return; +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the allocation function for the structure of */ + /* type 'lsqr_output'. */ + /* */ + /*---------------------------------------------------------------*/ + +lsqr_output *alloc_lsqr_out( long max_num_rows, + long max_num_cols ) +{ + lsqr_output *out_struct; + + out_struct = (lsqr_output *) malloc( sizeof(lsqr_output) ); + if (!out_struct) lsqr_error("lsqr: output structure allocation failure in " +"function alloc_lsqr_out()", -1); + + out_struct->std_err_vec = (dvec *) alloc_dvec( max_num_cols ); + if (!out_struct->std_err_vec) lsqr_error("lsqr: standard error vector \'e\' " +"allocation failure in function alloc_lsqr_out()", -1); + + return out_struct; +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the deallocation function for the structure */ + /* created with the function 'alloc_lsqr_out()'. */ + /* */ + /*---------------------------------------------------------------*/ + +void free_lsqr_out( lsqr_output *out_struct ) +{ + free_dvec(out_struct->std_err_vec); + free((lsqr_output *) (out_struct)); + return; +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the allocation function for the structure of */ + /* type 'lsqr_work'. */ + /* */ + /*---------------------------------------------------------------*/ + +lsqr_work *alloc_lsqr_wrk( long max_num_rows, + long max_num_cols ) +{ + lsqr_work *wrk_struct; + + wrk_struct = (lsqr_work *) malloc( sizeof(lsqr_work) ); + if (!wrk_struct) lsqr_error("lsqr: work structure allocation failure in " +"function alloc_lsqr_wrk()", -1); + + wrk_struct->bidiag_wrk_vec = (dvec *) alloc_dvec( max_num_cols ); + if (!wrk_struct->bidiag_wrk_vec) lsqr_error("lsqr: bidiagonalization work " +"vector \'v\' allocation failure in function alloc_lsqr_wrk()", -1); + + wrk_struct->srch_dir_vec = (dvec *) alloc_dvec( max_num_cols ); + if (!wrk_struct->srch_dir_vec) + lsqr_error("lsqr: search direction vector \'w\' " +"allocation failure in function alloc_lsqr_wrk()", -1); + + return wrk_struct; +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the deallocation function for the structure */ + /* created with the function 'alloc_lsqr_wrk()'. */ + /* */ + /*---------------------------------------------------------------*/ + +void free_lsqr_wrk( lsqr_work *wrk_struct ) +{ + free_dvec(wrk_struct->bidiag_wrk_vec); + free_dvec(wrk_struct->srch_dir_vec); + free((lsqr_work *) (wrk_struct)); + return; +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the allocation function for the structure of */ + /* type 'lsqr_func'. */ + /* */ + /*---------------------------------------------------------------*/ + +lsqr_func *alloc_lsqr_fnc( ) +{ + lsqr_func *fnc_struct; + + fnc_struct = (lsqr_func *) malloc( sizeof(lsqr_func) ); + if (!fnc_struct) lsqr_error("lsqr: function structure allocation failure in " +"function alloc_lsqr_fnc()", -1); + + return fnc_struct; +} + + /*---------------------------------------------------------------*/ + /* */ + /* Define the deallocation function for the structure */ + /* created with the function 'alloc_lsqr_fnc()'. */ + /* */ + /*---------------------------------------------------------------*/ + +void free_lsqr_fnc( lsqr_func *fnc_struct ) +{ + free((lsqr_func *) (fnc_struct)); + return; +} + +/*-------------------------------------------------------------------------*/ +/* */ +/* Define the LSQR function. */ +/* */ +/*-------------------------------------------------------------------------*/ + +/* +*------------------------------------------------------------------------------ +* +* LSQR finds a solution x to the following problems: +* +* 1. Unsymmetric equations -- solve A*x = b +* +* 2. Linear least squares -- solve A*x = b +* in the least-squares sense +* +* 3. Damped least squares -- solve ( A )*x = ( b ) +* ( damp*I ) ( 0 ) +* in the least-squares sense +* +* where 'A' is a matrix with 'm' rows and 'n' columns, 'b' is an +* 'm'-vector, and 'damp' is a scalar. (All quantities are real.) +* The matrix 'A' is intended to be large and sparse. +* +* +* Notation +* -------- +* +* The following quantities are used in discussing the subroutine +* parameters: +* +* 'Abar' = ( A ), 'bbar' = ( b ) +* ( damp*I ) ( 0 ) +* +* 'r' = b - A*x, 'rbar' = bbar - Abar*x +* +* 'rnorm' = sqrt( norm(r)**2 + damp**2 * norm(x)**2 ) +* = norm( rbar ) +* +* 'rel_prec' = the relative precision of floating-point arithmetic +* on the machine being used. Typically 2.22e-16 +* with 64-bit arithmetic. +* +* LSQR minimizes the function 'rnorm' with respect to 'x'. +* +* +* References +* ---------- +* +* C.C. Paige and M.A. Saunders, LSQR: An algorithm for sparse +* linear equations and sparse least squares, +* ACM Transactions on Mathematical Software 8, 1 (March 1982), +* pp. 43-71. +* +* C.C. Paige and M.A. Saunders, Algorithm 583, LSQR: Sparse +* linear equations and least-squares problems, +* ACM Transactions on Mathematical Software 8, 2 (June 1982), +* pp. 195-209. +* +* C.L. Lawson, R.J. Hanson, D.R. Kincaid and F.T. Krogh, +* Basic linear algebra subprograms for Fortran usage, +* ACM Transactions on Mathematical Software 5, 3 (Sept 1979), +* pp. 308-323 and 324-325. +* +*------------------------------------------------------------------------------ +*/ +void lsqr( lsqr_input *input, lsqr_output *output, lsqr_work *work, + lsqr_func *func, + void *prod ) +{ + double dvec_norm2( dvec * ); + + long indx, + term_iter, + term_iter_max; + + double alpha, + beta, + rhobar, + phibar, + bnorm, + bbnorm, + cs1, + sn1, + psi, + rho, + cs, + sn, + theta, + phi, + tau, + ddnorm, + delta, + gammabar, + zetabar, + gamma, + cs2, + sn2, + zeta, + xxnorm, + res, + resid_tol, + cond_tol, + resid_tol_mach, + temp, + stop_crit_1, + stop_crit_2, + stop_crit_3; + + static char term_msg[8][80] = + { + "The exact solution is x = x0", + "The residual Ax - b is small enough, given ATOL and BTOL", + "The least squares error is small enough, given ATOL", + "The estimated condition number has exceeded CONLIM", + "The residual Ax - b is small enough, given machine precision", + "The least squares error is small enough, given machine precision", + "The estimated condition number has exceeded machine precision", + "The iteration limit has been reached" + }; + + if( input->lsqr_fp_out != NULL ) + fprintf( input->lsqr_fp_out, " Least Squares Solution of A*x = b\n" +" The matrix A has %7li rows and %7li columns\n" +" The damping parameter is\tDAMP = %10.2e\n" +" ATOL = %10.2e\t\tCONDLIM = %10.2e\n" +" BTOL = %10.2e\t\tITERLIM = %10li\n\n", + input->num_rows, input->num_cols, input->damp_val, input->rel_mat_err, + input->cond_lim, input->rel_rhs_err, input->max_iter ); + + output->term_flag = 0; + term_iter = 0; + + output->num_iters = 0; + + output->frob_mat_norm = 0.0; + output->mat_cond_num = 0.0; + output->sol_norm = 0.0; + + for(indx = 0; indx < input->num_cols; indx++) + { + work->bidiag_wrk_vec->elements[indx] = 0.0; + work->srch_dir_vec->elements[indx] = 0.0; + output->std_err_vec->elements[indx] = 0.0; + output->sol_vec->elements[indx] = 0.0; + } + + bbnorm = 0.0; + ddnorm = 0.0; + xxnorm = 0.0; + + cs2 = -1.0; + sn2 = 0.0; + zeta = 0.0; + res = 0.0; + + if( input->cond_lim > 0.0 ) + cond_tol = 1.0 / input->cond_lim; + else + cond_tol = DBL_EPSILON; + + alpha = 0.0; + beta = 0.0; +/* +* Set up the initial vectors u and v for bidiagonalization. These satisfy +* the relations +* BETA*u = b - A*x0 +* ALPHA*v = A^T*u +*/ + /* Compute b - A*x0 and store in vector u which initially held vector b */ + dvec_scale( (-1.0), input->rhs_vec ); + func->mat_vec_prod( 0, input->sol_vec, input->rhs_vec, prod ); + dvec_scale( (-1.0), input->rhs_vec ); + + /* compute Euclidean length of u and store as BETA */ + beta = dvec_norm2( input->rhs_vec ); + + if( beta > 0.0 ) + { + /* scale vector u by the inverse of BETA */ + dvec_scale( (1.0 / beta), input->rhs_vec ); + + /* Compute matrix-vector product A^T*u and store it in vector v */ + func->mat_vec_prod( 1, work->bidiag_wrk_vec, input->rhs_vec, prod ); + + /* compute Euclidean length of v and store as ALPHA */ + alpha = dvec_norm2( work->bidiag_wrk_vec ); + } + + if( alpha > 0.0 ) + { + /* scale vector v by the inverse of ALPHA */ + dvec_scale( (1.0 / alpha), work->bidiag_wrk_vec ); + + /* copy vector v to vector w */ + dvec_copy( work->bidiag_wrk_vec, work->srch_dir_vec ); + } + + output->mat_resid_norm = alpha * beta; + output->resid_norm = beta; + bnorm = beta; +/* +* If the norm || A^T r || is zero, then the initial guess is the exact +* solution. Exit and report this. +*/ + if( (output->mat_resid_norm == 0.0) && (input->lsqr_fp_out != NULL) ) + { + fprintf( input->lsqr_fp_out, "\tISTOP = %3li\t\t\tITER = %9li\n" +" || A ||_F = %13.5e\tcond( A ) = %13.5e\n" +" || r ||_2 = %13.5e\t|| A^T r ||_2 = %13.5e\n" +" || b ||_2 = %13.5e\t|| x - x0 ||_2 = %13.5e\n\n", + output->term_flag, output->num_iters, output->frob_mat_norm, + output->mat_cond_num, output->resid_norm, output->mat_resid_norm, + bnorm, output->sol_norm ); + + fprintf( input->lsqr_fp_out, " %s\n\n", term_msg[output->term_flag]); + + return; + } + + rhobar = alpha; + phibar = beta; +/* +* If statistics are printed at each iteration, print a header and the initial +* values for each quantity. +*/ + if( input->lsqr_fp_out != NULL ) + { + fprintf( input->lsqr_fp_out, +" ITER || r || Compatible " +"||A^T r|| / ||A|| ||r|| || A || cond( A )\n\n" ); + + stop_crit_1 = 1.0; + stop_crit_2 = alpha / beta; + + fprintf( input->lsqr_fp_out, + "%6li %13.5e %10.2e \t%10.2e \t%10.2e %10.2e\n", + output->num_iters, output->resid_norm, stop_crit_1, stop_crit_2, + output->frob_mat_norm, output->mat_cond_num); + } + +/* +* The main iteration loop is continued as long as no stopping criteria +* are satisfied and the number of total iterations is less than some upper +* bound. +*/ + while( output->term_flag == 0 ) + { + output->num_iters++; +/* +* Perform the next step of the bidiagonalization to obtain +* the next vectors u and v, and the scalars ALPHA and BETA. +* These satisfy the relations +* BETA*u = A*v - ALPHA*u, +* ALFA*v = A^T*u - BETA*v. +*/ + /* scale vector u by the negative of ALPHA */ + dvec_scale( (-alpha), input->rhs_vec ); + + /* compute A*v - ALPHA*u and store in vector u */ + func->mat_vec_prod( 0, work->bidiag_wrk_vec, input->rhs_vec, prod ); + + /* compute Euclidean length of u and store as BETA */ + beta = dvec_norm2( input->rhs_vec ); + + /* accumulate this quantity to estimate Frobenius norm of matrix A */ + /* bbnorm += sqr(alpha) + sqr(beta) + sqr(input->damp_val);*/ + bbnorm += alpha*alpha + beta*beta + + input->damp_val*input->damp_val; + + if( beta > 0.0 ) + { + /* scale vector u by the inverse of BETA */ + dvec_scale( (1.0 / beta), input->rhs_vec ); + + /* scale vector v by the negative of BETA */ + dvec_scale( (-beta), work->bidiag_wrk_vec ); + + /* compute A^T*u - BETA*v and store in vector v */ + func->mat_vec_prod( 1, work->bidiag_wrk_vec, input->rhs_vec, prod ); + + /* compute Euclidean length of v and store as ALPHA */ + alpha = dvec_norm2( work->bidiag_wrk_vec ); + + if( alpha > 0.0 ) + /* scale vector v by the inverse of ALPHA */ + dvec_scale( (1.0 / alpha), work->bidiag_wrk_vec ); + } +/* +* Use a plane rotation to eliminate the damping parameter. +* This alters the diagonal (RHOBAR) of the lower-bidiagonal matrix. +*/ + cs1 = rhobar / sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) ); + sn1 = input->damp_val + / sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) ); + + psi = sn1 * phibar; + phibar = cs1 * phibar; +/* +* Use a plane rotation to eliminate the subdiagonal element (BETA) +* of the lower-bidiagonal matrix, giving an upper-bidiagonal matrix. +*/ + rho = sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) + + lsqr_sqr(beta) ); + cs = sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) ) / rho; + sn = beta / rho; + + theta = sn * alpha; + rhobar = -cs * alpha; + phi = cs * phibar; + phibar = sn * phibar; + tau = sn * phi; +/* +* Update the solution vector x, the search direction vector w, and the +* standard error estimates vector se. +*/ + for(indx = 0; indx < input->num_cols; indx++) + { + /* update the solution vector x */ + output->sol_vec->elements[indx] += (phi / rho) * + work->srch_dir_vec->elements[indx]; + + /* update the standard error estimates vector se */ + output->std_err_vec->elements[indx] += lsqr_sqr( (1.0 / rho) * + work->srch_dir_vec->elements[indx] ); + + /* accumulate this quantity to estimate condition number of A +*/ + ddnorm += lsqr_sqr( (1.0 / rho) * + work->srch_dir_vec->elements[indx] ); + + /* update the search direction vector w */ + work->srch_dir_vec->elements[indx] = + work->bidiag_wrk_vec->elements[indx] - + (theta / rho) * work->srch_dir_vec->elements[indx]; + } +/* +* Use a plane rotation on the right to eliminate the super-diagonal element +* (THETA) of the upper-bidiagonal matrix. Then use the result to estimate +* the solution norm || x ||. +*/ + delta = sn2 * rho; + gammabar = -cs2 * rho; + zetabar = (phi - delta * zeta) / gammabar; + + /* compute an estimate of the solution norm || x || */ + output->sol_norm = sqrt( xxnorm + lsqr_sqr(zetabar) ); + + gamma = sqrt( lsqr_sqr(gammabar) + lsqr_sqr(theta) ); + cs2 = gammabar / gamma; + sn2 = theta / gamma; + zeta = (phi - delta * zeta) / gamma; + + /* accumulate this quantity to estimate solution norm || x || */ + xxnorm += lsqr_sqr(zeta); +/* +* Estimate the Frobenius norm and condition of the matrix A, and the +* Euclidean norms of the vectors r and A^T*r. +*/ + output->frob_mat_norm = sqrt( bbnorm ); + output->mat_cond_num = output->frob_mat_norm * sqrt( ddnorm ); + + res += lsqr_sqr(psi); + output->resid_norm = sqrt( lsqr_sqr(phibar) + res ); + + output->mat_resid_norm = alpha * fabs( tau ); +/* +* Use these norms to estimate the values of the three stopping criteria. +*/ + stop_crit_1 = output->resid_norm / bnorm; + + stop_crit_2 = 0.0; + if( output->resid_norm > 0.0 ) + stop_crit_2 = output->mat_resid_norm / ( output->frob_mat_norm * + output->resid_norm ); + + stop_crit_3 = 1.0 / output->mat_cond_num; + +/* 05 Jul 2007: Bug reported by Joel Erickson . +*/ + resid_tol = input->rel_rhs_err + input->rel_mat_err * + output->frob_mat_norm * /* (not output->mat_resid_norm *) */ + output->sol_norm / bnorm; + + resid_tol_mach = DBL_EPSILON + DBL_EPSILON * + output->frob_mat_norm * /* (not output->mat_resid_norm *) */ + output->sol_norm / bnorm; +/* +* Check to see if any of the stopping criteria are satisfied. +* First compare the computed criteria to the machine precision. +* Second compare the computed criteria to the the user specified precision. +*/ + /* iteration limit reached */ + if( output->num_iters >= input->max_iter ) + output->term_flag = 7; + + /* condition number greater than machine precision */ + if( stop_crit_3 <= DBL_EPSILON ) + output->term_flag = 6; + /* least squares error less than machine precision */ + if( stop_crit_2 <= DBL_EPSILON ) + output->term_flag = 5; + /* residual less than a function of machine precision */ + if( stop_crit_1 <= resid_tol_mach ) + output->term_flag = 4; + + /* condition number greater than CONLIM */ + if( stop_crit_3 <= cond_tol ) + output->term_flag = 3; + /* least squares error less than ATOL */ + if( stop_crit_2 <= input->rel_mat_err ) + output->term_flag = 2; + /* residual less than a function of ATOL and BTOL */ + if( stop_crit_1 <= resid_tol ) + output->term_flag = 1; +/* +* If statistics are printed at each iteration, print a header and the initial +* values for each quantity. +*/ + if( input->lsqr_fp_out != NULL ) + { + fprintf( input->lsqr_fp_out, + "%6li %13.5e %10.2e \t%10.2e \t%10.2e %10.2e\n", + output->num_iters, output->resid_norm, stop_crit_1, + stop_crit_2, + output->frob_mat_norm, output->mat_cond_num); + } +/* +* The convergence criteria are required to be met on NCONV consecutive +* iterations, where NCONV is set below. Suggested values are 1, 2, or 3. +*/ + if( output->term_flag == 0 ) + term_iter = -1; + + term_iter_max = 1; + term_iter++; + + if( (term_iter < term_iter_max) && + (output->num_iters < input->max_iter) ) + output->term_flag = 0; + } /* end while loop */ +/* +* Finish computing the standard error estimates vector se. +*/ + temp = 1.0; + + if( input->num_rows > input->num_cols ) + temp = ( double ) ( input->num_rows - input->num_cols ); + + if( lsqr_sqr(input->damp_val) > 0.0 ) + temp = ( double ) ( input->num_rows ); + + temp = output->resid_norm / sqrt( temp ); + + for(indx = 0; indx < input->num_cols; indx++) + /* update the standard error estimates vector se */ + output->std_err_vec->elements[indx] = temp * + sqrt( output->std_err_vec->elements[indx] ); +/* +* If statistics are printed at each iteration, print the statistics for the +* stopping condition. +*/ + if( input->lsqr_fp_out != NULL ) + { + fprintf( input->lsqr_fp_out, "\n\tISTOP = %3li\t\t\tITER = %9li\n" +" || A ||_F = %13.5e\tcond( A ) = %13.5e\n" +" || r ||_2 = %13.5e\t|| A^T r ||_2 = %13.5e\n" +" || b ||_2 = %13.5e\t|| x - x0 ||_2 = %13.5e\n\n", + output->term_flag, output->num_iters, output->frob_mat_norm, + output->mat_cond_num, output->resid_norm, output->mat_resid_norm, + bnorm, output->sol_norm ); + + fprintf( input->lsqr_fp_out, " %s\n\n", term_msg[output->term_flag]); + + } + + return; +} + +/*-------------------------------------------------------------------------*/ +/* */ +/* Define the function 'dvec_norm2()'. This function takes a vector */ +/* arguement and computes the Euclidean or L2 norm of this vector. Note */ +/* that this is a version of the BLAS function 'dnrm2()' rewritten to */ +/* use the current data structures. */ +/* */ +/*-------------------------------------------------------------------------*/ + +double dvec_norm2(dvec *vec) +{ + long indx; + double norm; + + norm = 0.0; + + for(indx = 0; indx < vec->length; indx++) + norm += lsqr_sqr(vec->elements[indx]); + + return sqrt(norm); +} + + +/*-------------------------------------------------------------------------*/ +/* */ +/* Define the function 'dvec_scale()'. This function takes a vector */ +/* and a scalar as arguments and multiplies each element of the vector */ +/* by the scalar. Note that this is a version of the BLAS function */ +/* 'dscal()' rewritten to use the current data structures. */ +/* */ +/*-------------------------------------------------------------------------*/ + +void dvec_scale(double scal, dvec *vec) +{ + long indx; + + for(indx = 0; indx < vec->length; indx++) + vec->elements[indx] *= scal; + + return; +} + +/*-------------------------------------------------------------------------*/ +/* */ +/* Define the function 'dvec_copy()'. This function takes two vectors */ +/* as arguements and copies the contents of the first into the second. */ +/* Note that this is a version of the BLAS function 'dcopy()' rewritten */ +/* to use the current data structures. */ +/* */ +/*-------------------------------------------------------------------------*/ + +void dvec_copy(dvec *orig, dvec *copy) +{ + long indx; + + for(indx = 0; indx < orig->length; indx++) + copy->elements[indx] = orig->elements[indx]; + + return; +} diff --git a/clsqr/lsqr.h b/clsqr/lsqr.h new file mode 100644 index 0000000000000..1f1d59a10ff88 --- /dev/null +++ b/clsqr/lsqr.h @@ -0,0 +1,400 @@ +/* +* lsqr.h +* Contains auxiliary functions, data type definitions, and function +* prototypes for the iterative linear solver LSQR. +* +* 08 Sep 1999: First version from James W. Howse +* 02 Sep 2007: Dima Sorkin advises that +* in C++ the use of macros is strongly deprecated. +* Originally, sqr, max, min, round, TRUE, FALSE, PI +* were defined here. Now, +* min, round, TRUE, FALSE are gone (never used); +* PI is defined explicitly in test_lstp.c; +* max is changed to lsqr_max in test_lsqr.c; +*/ + +/* +*------------------------------------------------------------------------------ +* +* LSQR finds a solution x to the following problems: +* +* 1. Unsymmetric equations -- solve A*x = b +* +* 2. Linear least squares -- solve A*x = b +* in the least-squares sense +* +* 3. Damped least squares -- solve ( A )*x = ( b ) +* ( damp*I ) ( 0 ) +* in the least-squares sense +* +* where 'A' is a matrix with 'm' rows and 'n' columns, 'b' is an +* 'm'-vector, and 'damp' is a scalar. (All quantities are real.) +* The matrix 'A' is intended to be large and sparse. +* +* +* Notation +* -------- +* +* The following quantities are used in discussing the subroutine +* parameters: +* +* 'Abar' = ( A ), 'bbar' = ( b ) +* ( damp*I ) ( 0 ) +* +* 'r' = b - A*x, 'rbar' = bbar - Abar*x +* +* 'rnorm' = sqrt( norm(r)**2 + damp**2 * norm(x)**2 ) +* = norm( rbar ) +* +* 'rel_prec' = the relative precision of floating-point arithmetic +* on the machine being used. For example, on the IBM 370, +* 'rel_prec' is about 1.0E-6 and 1.0D-16 in single and double +* precision respectively. +* +* LSQR minimizes the function 'rnorm' with respect to 'x'. +* +*------------------------------------------------------------------------------ +*/ + +/*---------------*/ +/* Include files */ +/*---------------*/ + +#include +#include +#include +#include +#include + +/* 02 Sep 2007: The following 7 macros + sqr, max, min, round, TRUE, FALSE, PI + are no longer defined here. + (min, round, TRUE, FALSE were never used anyway.) + "sqr" has been changed to lsqr_sqr. +*/ + +/*------------------------*/ +/* User-defined functions */ +/*------------------------*/ + +#define lsqr_sqr(a) ( (a) * (a) ) +/* +#define max(a,b) ( (a) < (b) ? (b) : (a) ) +#define min(a,b) ( (a) < (b) ? (a) : (b) ) +#define round(a) ( (a) > 0.0 ? (int)((a) + 0.5) : (int)((a) - 0.5) ) +*/ + +/*----------------------*/ +/* Default declarations */ +/*----------------------*/ + +/* +#define TRUE (1) +#define FALSE (0) +#define PI (4.0 * atan(1.0)) +*/ + +/*------------------*/ +/* Type definitions */ +/*------------------*/ + +typedef struct LONG_VECTOR { + long length; + long *elements; +} lvec; + +typedef struct DOUBLE_VECTOR { + long length; + double *elements; +} dvec; + +/* +*------------------------------------------------------------------------------ +* +* Input Quantities +* ---------------- +* +* num_rows input The number of rows (e.g., 'm') in the matrix A. +* +* num_cols input The number of columns (e.g., 'n') in the matrix A. +* +* damp_val input The damping parameter for problem 3 above. +* ('damp_val' should be 0.0 for problems 1 and 2.) +* If the system A*x = b is incompatible, values +* of 'damp_val' in the range +* 0 to sqrt('rel_prec')*norm(A) +* will probably have a negligible effect. +* Larger values of 'damp_val' will tend to decrease +* the norm of x and reduce the number of +* iterations required by LSQR. +* +* The work per iteration and the storage needed +* by LSQR are the same for all values of 'damp_val'. +* +* rel_mat_err input An estimate of the relative error in the data +* defining the matrix 'A'. For example, +* if 'A' is accurate to about 6 digits, set +* rel_mat_err = 1.0e-6 . +* +* rel_rhs_err input An extimate of the relative error in the data +* defining the right hand side (rhs) vector 'b'. For +* example, if 'b' is accurate to about 6 digits, set +* rel_rhs_err = 1.0e-6 . +* +* cond_lim input An upper limit on cond('Abar'), the apparent +* condition number of the matrix 'Abar'. +* Iterations will be terminated if a computed +* estimate of cond('Abar') exceeds 'cond_lim'. +* This is intended to prevent certain small or +* zero singular values of 'A' or 'Abar' from +* coming into effect and causing unwanted growth +* in the computed solution. +* +* 'cond_lim' and 'damp_val' may be used separately or +* together to regularize ill-conditioned systems. +* +* Normally, 'cond_lim' should be in the range +* 1000 to 1/rel_prec. +* Suggested value: +* cond_lim = 1/(100*rel_prec) for compatible systems, +* cond_lim = 1/(10*sqrt(rel_prec)) for least squares. +* +* Note: If the user is not concerned about the parameters +* 'rel_mat_err', 'rel_rhs_err' and 'cond_lim', any or all of them +* may be set to zero. The effect will be the same as the values +* 'rel_prec', 'rel_prec' and 1/rel_prec respectively. +* +* max_iter input An upper limit on the number of iterations. +* Suggested value: +* max_iter = n/2 for well-conditioned systems +* with clustered singular values, +* max_iter = 4*n otherwise. +* +* lsqr_fp_out input Pointer to the file for sending printed output. If +* not NULL, a summary will be printed to the file that +* 'lsqr_fp_out' points to. +* +* rhs_vec input The right hand side (rhs) vector 'b'. This vector +* has a length of 'm' (i.e., 'num_rows'). The routine +* LSQR is designed to over-write 'rhs_vec'. +* +* sol_vec input The initial guess for the solution vector 'x'. This +* vector has a length of 'n' (i.e., 'num_cols'). The +* routine LSQR is designed to over-write 'sol_vec'. +* +*------------------------------------------------------------------------------ +*/ + +typedef struct LSQR_INPUTS { + long num_rows; + long num_cols; + double damp_val; + double rel_mat_err; + double rel_rhs_err; + double cond_lim; + long max_iter; + FILE *lsqr_fp_out; + dvec *rhs_vec; + dvec *sol_vec; +} lsqr_input; + +/* +*------------------------------------------------------------------------------ +* +* Output Quantities +* ----------------- +* +* term_flag output An integer giving the reason for termination: +* +* 0 x = x0 is the exact solution. +* No iterations were performed. +* +* 1 The equations A*x = b are probably compatible. +* Norm(A*x - b) is sufficiently small, given the +* values of 'rel_mat_err' and 'rel_rhs_err'. +* +* 2 The system A*x = b is probably not +* compatible. A least-squares solution has +* been obtained that is sufficiently accurate, +* given the value of 'rel_mat_err'. +* +* 3 An estimate of cond('Abar') has exceeded +* 'cond_lim'. The system A*x = b appears to be +* ill-conditioned. Otherwise, there could be an +* error in subroutine APROD. +* +* 4 The equations A*x = b are probably +* compatible. Norm(A*x - b) is as small as +* seems reasonable on this machine. +* +* 5 The system A*x = b is probably not +* compatible. A least-squares solution has +* been obtained that is as accurate as seems +* reasonable on this machine. +* +* 6 Cond('Abar') seems to be so large that there is +* no point in doing further iterations, +* given the precision of this machine. +* There could be an error in subroutine APROD. +* +* 7 The iteration limit 'max_iter' was reached. +* +* num_iters output The number of iterations performed. +* +* frob_mat_norm output An estimate of the Frobenius norm of 'Abar'. +* This is the square-root of the sum of squares +* of the elements of 'Abar'. +* If 'damp_val' is small and if the columns of 'A' +* have all been scaled to have length 1.0, +* 'frob_mat_norm' should increase to roughly +* sqrt('n'). +* A radically different value for 'frob_mat_norm' +* may indicate an error in subroutine APROD (there +* may be an inconsistency between modes 1 and 2). +* +* mat_cond_num output An estimate of cond('Abar'), the condition +* number of 'Abar'. A very high value of +* 'mat_cond_num' +* may again indicate an error in APROD. +* +* resid_norm output An estimate of the final value of norm('rbar'), +* the function being minimized (see notation +* above). This will be small if A*x = b has +* a solution. +* +* mat_resid_norm output An estimate of the final value of +* norm( Abar(transpose)*rbar ), the norm of +* the residual for the usual normal equations. +* This should be small in all cases. +* ('mat_resid_norm' +* will often be smaller than the true value +* computed from the output vector 'x'.) +* +* sol_norm output An estimate of the norm of the final +* solution vector 'x'. +* +* sol_vec output The vector which returns the computed solution +* 'x'. +* This vector has a length of 'n' (i.e., +* 'num_cols'). +* +* std_err_vec output The vector which returns the standard error +* estimates for the components of 'x'. +* This vector has a length of 'n' +* (i.e., 'num_cols'). For each i, std_err_vec(i) +* is set to the value +* 'resid_norm' * sqrt( sigma(i,i) / T ), +* where sigma(i,i) is an estimate of the i-th +* diagonal of the inverse of Abar(transpose)*Abar +* and T = 1 if m <= n, +* T = m - n if m > n and damp_val = 0, +* T = m if damp_val != 0. +* +*------------------------------------------------------------------------------ +*/ + +typedef struct LSQR_OUTPUTS { + long term_flag; + long num_iters; + double frob_mat_norm; + double mat_cond_num; + double resid_norm; + double mat_resid_norm; + double sol_norm; + dvec *sol_vec; + dvec *std_err_vec; +} lsqr_output; + +/* +*------------------------------------------------------------------------------ +* +* Workspace Quantities +* -------------------- +* +* bidiag_wrk_vec workspace This float vector is a workspace for the +* current iteration of the +* Lanczos bidiagonalization. +* This vector has length 'n' (i.e., 'num_cols'). +* +* srch_dir_vec workspace This float vector contains the search direction +* at the current iteration. This vector has a +* length of 'n' (i.e., 'num_cols'). +* +*------------------------------------------------------------------------------ +*/ + +typedef struct LSQR_WORK { + dvec *bidiag_wrk_vec; + dvec *srch_dir_vec; +} lsqr_work; + +/* +*------------------------------------------------------------------------------ +* +* Functions Called +* ---------------- +* +* mat_vec_prod functions A pointer to a function that performs the +* matrix-vector multiplication. The function +* has the calling parameters: +* +* APROD ( mode, x, y, prod_data ), +* +* and it must perform the following functions: +* +* If MODE = 0, compute y = y + A*x, +* If MODE = 1, compute x = x + A^T*y. +* +* The vectors x and y are input parameters in +* both cases. +* If mode = 0, y should be altered without +* changing x. +* If mode = 2, x should be altered without +* changing y. +* The argument prod_data is a pointer to a +* user-defined structure that contains +* any data need by the function APROD that is +* not used by LSQR. If no additional data is +* needed by APROD, then prod_data should be +* the NULL pointer. +*------------------------------------------------------------------------------ +*/ + +typedef struct LSQR_FUNC { + void (*mat_vec_prod) (long, dvec *, dvec *, void *); +} lsqr_func; + +/*---------------------*/ +/* Function prototypes */ +/*---------------------*/ + +void lsqr_error( char *, int ); + +lvec *alloc_lvec( long ); +void free_lvec( lvec * ); + +dvec *alloc_dvec( long ); +void free_dvec( dvec * ); + +void alloc_lsqr_mem( lsqr_input **, lsqr_output **, lsqr_work **, lsqr_func **, + long, long ); +void free_lsqr_mem( lsqr_input *, lsqr_output *, lsqr_work *, lsqr_func * ); + +lsqr_input *alloc_lsqr_in( long, long ); +void free_lsqr_in( lsqr_input * ); + +lsqr_output *alloc_lsqr_out( long, long ); +void free_lsqr_out( lsqr_output * ); + +lsqr_work *alloc_lsqr_wrk( long, long ); +void free_lsqr_wrk( lsqr_work * ); + +lsqr_func *alloc_lsqr_fnc( ); +void free_lsqr_fnc( lsqr_func * ); + +void lsqr( lsqr_input *, lsqr_output *, lsqr_work *, lsqr_func *, void * ); + +double dvec_norm2( dvec * ); +void dvec_scale( double, dvec * ); +void dvec_copy( dvec *, dvec * ); + diff --git a/clsqr/lsqr_c.README b/clsqr/lsqr_c.README new file mode 100644 index 0000000000000..40e57b7589219 --- /dev/null +++ b/clsqr/lsqr_c.README @@ -0,0 +1,64 @@ +lsqr_c.README + +The software for LSQR (C version) is provided by SOL, Stanford University +under the terms of the OSI Common Public License (CPL): +http://www.opensource.org/licenses/cpl1.0.php + + +08 Sep 1999: First set of files provided by James W. Howse . + +11 Oct 2000: Files unpacked by Michael Saunders. + Lines longer than 80 characters had extraneous carriage returns. + Tried to repair them, but not sure if I/O formats will be correct. + +09 Dec 2005: Beware that this C version of LSQR allows x0 to be input. + I RECOMMEND THAT YOU ALWAYS USE x0 = 0 (an n-vector of zeros). + + Otherwise, you must be conscious of the fact that LSQR will be + solving A dx = r0 = b - A x0 and adding parts of dx to x0 + AS IT GOES. If x0 is very close to a solution (that is, if dx + is very small compared to x0), many digits of accuracy in dx + will be lost. It is safer to solve A dx = r0 (with x0 = 0) + and then compute x = x0 + dx yourself at the end. + + In either case, providing x0 will generally not save work + (will not reduce the number of LSQR iterations) unless you + use larger tolerances atol and btol than you would for Ax = b. + +31 Aug 2007: Some bugs fixed (see lsqr.c). + Note that some of the rectangular test problems + at the end of test_prog.c are really too ill-conditioned + for any least-squares method to give an accurate solution. + Ignore the "LSQR seems to have failed" messages if ||A'r|| + is quite small. + +31 Aug 2007: Simplified C version of LSQR contributed by + Michael Friedlander. Added as clsqr2 to + http://www.stanford.edu/group/SOL/lsqr.html + +02 Aug 2007: Dima Sorkin recommends changes + to improve portability. Also the following compile command: + gcc -ansi -pedantic -Wall -c lsqr.c + +Please send comments to Michael Saunders, SOL, Stanford University + saunders@stanford.edu 650-723-1875 +----------------------------------------------------------------------------- + +The C version of LSQR involves the following files: + + lsqr.c + lsqr.h + makefile + test_lsqr.c + test_lsqr.h + test_lstp.c + test_mult.c + test_prog.c + +test_lstp.c contains lstp, a least-square test problem generator. + It constructs data for a matrix A and a vector b. +test_mult.c computes products Ax and A'y. +test_lsqr.c calls lstp to generate a problem, + then calls lsqr to solve it. +test_prog.c calls test_lsqr several times to generate and solve + various problems. diff --git a/clsqr/test_lsqr.c b/clsqr/test_lsqr.c new file mode 100644 index 0000000000000..4b3e8b090f0f8 --- /dev/null +++ b/clsqr/test_lsqr.c @@ -0,0 +1,240 @@ +/* +* test_lsqr.c +* For testing C version of LSQR. +* +* 08 Sep 1999: First version from James W. Howse +* 02 Sep 2007: "max" redefined here to be "lsqr_max". +* test_in->rel_mat_err changed from 1e-10 to 1e-15 and +* test_in->rel_rhs_err changed from 1e-10 to 1e-15 +* to ensure "success" on the ill-conditioned rectangular +* test cases. +*/ + +#include "lsqr.h" + +#include "test_lsqr.h" + +# define lsqr_max(a,b) ( (a) < (b) ? (b) : (a) ) + +void test_lsqr( long num_rows, + long num_cols, + long duplicate_param, + long power_param, + double damp_param ) +/* +* ------------------------------------------------------------------ +* This is an example driver routine for running LSQR. +* It generates a test problem, solves it, and examines the results. +* Note that subroutine APROD must be declared 'extern' if it is +* used only in the call to LSQR. +* ------------------------------------------------------------------ +*/ +{ + double dvec_norm2( dvec * ); + + long col_indx; + + double unorm, + enorm, + etol, + act_mat_cond_num, + act_resid_norm; + + dvec *act_rhs_vec, + *act_sol_vec; + + lsqr_input *test_in; + lsqr_output *test_out; + lsqr_work *test_work; + lsqr_func *test_func; + prod_data *test_prod; + + extern void test_mult( long, dvec *, dvec *, void * ); + extern void lstp_mult( dvec *, dvec *, dvec * ); + + alloc_lsqr_mem( &test_in, &test_out, &test_work, &test_func, num_rows, + num_cols ); + + test_func->mat_vec_prod = test_mult; + test_in->lsqr_fp_out = stdout; +/* +* Allocate the the data structure of type 'prod_data'. +*/ + test_prod = (prod_data *) malloc( sizeof(prod_data) ); + + test_prod->d_vec = (dvec *) alloc_dvec( num_cols ); + if (!test_prod->d_vec) lsqr_error("test_prog: vector \'d\' allocation failure " +"in function lsqr_test()", -1); + + test_prod->hy_vec = (dvec *) alloc_dvec( num_rows ); + if (!test_prod->hy_vec) lsqr_error("test_prog: vector \'hy\' allocation " +"failure in function lsqr_test()", -1); + + test_prod->hz_vec = (dvec *) alloc_dvec( num_cols ); + if (!test_prod->hz_vec) lsqr_error("test_prog: vector \'hz\' allocation " +"failure in function lsqr_test()", -1); + + test_prod->work_vec = (dvec *) alloc_dvec( lsqr_max( num_rows, num_cols ) ); + if (!test_prod->work_vec) lsqr_error("test_prog: vector \'work\' allocation " +"failure in function lsqr_test()", -1); +/* +* Allocate the other needed vectors. +*/ + act_rhs_vec = (dvec *) alloc_dvec( num_rows ); + if (!act_rhs_vec) lsqr_error("test_prog: vector \'b\' allocation failure in " +"function lsqr_test()", -1); + + act_sol_vec = (dvec *) alloc_dvec( num_cols ); + if (!act_sol_vec) lsqr_error("test_prog: vector \'x_ans\' allocation failure " +"in function lsqr_test()", -1); +/* +* Set the true solution for the test problem. +*/ + for( col_indx = 0; col_indx < num_cols; col_indx++ ) + act_sol_vec->elements[col_indx] = (double)( num_cols - (col_indx + 1) ); +/* +* Call the routine LSTP which generates the least squares test problem. +*/ + lstp( duplicate_param, power_param, damp_param, act_sol_vec, act_rhs_vec, + test_prod, &act_mat_cond_num, &act_resid_norm ); +/* +* Copy the right-hand side vector generated for the test problem by LSTP +* into the right-hand side vector for LSQR. +*/ + dvec_copy( act_rhs_vec, test_in->rhs_vec ); +/* +* Set the initial guess for LSQR. +*/ + for( col_indx = 0; col_indx < num_cols; col_indx++ ) + test_in->sol_vec->elements[col_indx] = 0.0; +/* +* Print information about the test problem. +*/ + fprintf( test_in->lsqr_fp_out, + +"--------------------------------------------------------------------\n" ); + fprintf( test_in->lsqr_fp_out, + "Least-Squares Test Problem P( %5li %5li %5li " + "%5li %12.2e )\n\n", + num_rows, num_cols, duplicate_param, power_param, damp_param ); + fprintf( test_in->lsqr_fp_out, + "Condition No. = %12.4e Residual Function = " + "%17.9e\n", + act_mat_cond_num, act_resid_norm ); + fprintf( test_in->lsqr_fp_out, + +"--------------------------------------------------------------------\n\n" ); +/* +* Set the input parameters for LSQR. +*/ + test_in->num_rows = num_rows; + test_in->num_cols = num_cols; + test_in->damp_val = damp_param; + test_in->rel_mat_err = 1.0e-15; /* Previously 1.0e-10; */ + test_in->rel_rhs_err = 1.0e-15; /* Previously 1.0e-10; */ + test_in->cond_lim = 10.0 * act_mat_cond_num; + test_in->max_iter = num_rows + num_cols + 50; +/* +* Solve the test problem generated by LTSP by calling the routine LSQR. +*/ + lsqr( test_in, test_out, test_work, test_func, test_prod ); + +/* +* Examine the results. +* Print the residual norms RNORM and ARNORM given by LSQR, and then compute +* their true values in terms of the solution X obtained by LSQR. +* At least one of these norms should be small. +*/ + fprintf( test_in->lsqr_fp_out, "\t\t\tResidual Norm Residual Norm " +"Solution Norm\n" ); + fprintf( test_in->lsqr_fp_out, "\t\t\t||A x - b||_2 ||A^T A x - A^T b||_2 " +"||x - x0||_2\n\n" ); + fprintf( test_in->lsqr_fp_out, "Estimated by LSQR %17.5e %17.5e " +"%17.5e\n", + test_out->resid_norm, test_out->mat_resid_norm, test_out->sol_norm ); +/* +* Compute U = A*x - b. +* This is the negative of the usual residual vector. It will be close to +* zero only if b is a compatible rhs and x is close to a solution. +*/ + dvec_copy( act_rhs_vec, test_in->rhs_vec ); + dvec_scale( (-1.0), test_in->rhs_vec ); + test_mult( 0, test_out->sol_vec, test_in->rhs_vec, test_prod ); +/* +* Compute v = A^T*u + DAMP**2 * x. +* This will be close to zero in all cases if x is close to a solution. +*/ + dvec_copy( test_out->sol_vec, test_work->bidiag_wrk_vec ); + dvec_scale( ( lsqr_sqr(damp_param) ), test_work->bidiag_wrk_vec ); + test_mult( 1, test_work->bidiag_wrk_vec, test_in->rhs_vec, test_prod ); +/* +* Compute the norms associated with x, u, v. +*/ + test_out->sol_norm = dvec_norm2( test_out->sol_vec ); + unorm = dvec_norm2( test_in->rhs_vec ); + test_out->resid_norm = sqrt( lsqr_sqr( unorm ) + lsqr_sqr( damp_param ) * + lsqr_sqr( test_out->sol_norm ) ); + test_out->mat_resid_norm = dvec_norm2( test_work->bidiag_wrk_vec ); + fprintf( test_in->lsqr_fp_out, "Computed from X %17.5e %17.5e " +"%17.5e\n\n", + test_out->resid_norm, test_out->mat_resid_norm, test_out->sol_norm ); +/* +* Print the solution and standard error estimates from LSQR. +*/ + fprintf( test_in->lsqr_fp_out, "Solution X\n" ); + for( col_indx = 0; col_indx < num_cols; col_indx++ ) + { + fprintf( test_in->lsqr_fp_out, "%6li %14.6g", col_indx, + test_out->sol_vec->elements[col_indx] ); + if( ( (col_indx + 1) % 4 ) == 0 ) + fprintf( test_in->lsqr_fp_out, "\n" ); + } + + fprintf( test_in->lsqr_fp_out, "\n\n" ); + + fprintf( test_in->lsqr_fp_out, "Standard Errors SE\n" ); + for( col_indx = 0; col_indx < num_cols; col_indx++ ) + { + fprintf( test_in->lsqr_fp_out, "%6li %14.6g", col_indx, + test_out->std_err_vec->elements[col_indx] ); + if( ( (col_indx + 1) % 4 ) == 0 ) + fprintf( test_in->lsqr_fp_out, "\n" ); + } + fprintf( test_in->lsqr_fp_out, "\n\n" ); +/* +* Print information about the accuracy of the LSQR solution. +*/ + for( col_indx = 0; col_indx < num_cols; col_indx++ ) + test_work->srch_dir_vec->elements[col_indx] = +test_out->sol_vec->elements[col_indx] + - act_sol_vec->elements[col_indx]; + + enorm = dvec_norm2( test_work->srch_dir_vec ) / + ( 1.0 + dvec_norm2( act_sol_vec ) ); + etol = 1.0e-5; + + if( enorm <= etol ) + fprintf( test_in->lsqr_fp_out, "LSQR appears to be successful. Relative " +"error in X = %10.2e\n\n", + enorm ); + if( enorm > etol ) + fprintf( test_in->lsqr_fp_out, "LSQR appears to have failed. Relative " +"error in X = %10.2e\n\n", + enorm ); +/* +* Free the memory allocated for LSQR. +*/ + free_lsqr_mem( test_in, test_out, test_work, test_func ); + + free_dvec( test_prod->d_vec ); + free_dvec( test_prod->hy_vec ); + free_dvec( test_prod->hz_vec ); + free_dvec( test_prod->work_vec ); + free( (prod_data *) (test_prod) ); + + free_dvec( act_rhs_vec ); + free_dvec( act_sol_vec ); + + return; +} + diff --git a/clsqr/test_lsqr.h b/clsqr/test_lsqr.h new file mode 100644 index 0000000000000..684e8bf5f2972 --- /dev/null +++ b/clsqr/test_lsqr.h @@ -0,0 +1,26 @@ +/* +* test_lsqr.h +* +* 08 Sep 1999: First version from James W. Howse +*/ + +/*------------------*/ +/* Type definitions */ +/*------------------*/ + +typedef struct PROD_DATA { + dvec *d_vec; + dvec *hy_vec; + dvec *hz_vec; + dvec *work_vec; +} prod_data; + +/*---------------------*/ +/* Function prototypes */ +/*---------------------*/ + +void test_lsqr( long, long, long, long, double ); +void lstp( long, long, double, dvec *, dvec *, prod_data *, + double *, double * ); +void test_mult( long, dvec *, dvec *, void * ); +void lstp_mult( dvec *, dvec *, dvec * ); diff --git a/clsqr/test_lstp.c b/clsqr/test_lstp.c new file mode 100644 index 0000000000000..d8a585b4a292b --- /dev/null +++ b/clsqr/test_lstp.c @@ -0,0 +1,119 @@ +/* +* test_lstp.c +* For testing lstp, the LSQR test-problem generator. +* +* 08 Sep 1999: First version from James W. Howse +* 02 Sep 2007: Changed 4.0 * PI to 12.566370614359172. +*/ + +#include "lsqr.h" + +#include "test_lsqr.h" + +void lstp( long duplicate_param, + long power_param, + double damp_param, + dvec *act_sol_vec, + dvec *act_rhs_vec, + prod_data *test_prod, + double *act_mat_cond_num, + double *act_resid_norm ) +/* +* ------------------------------------------------------------------ +* LSTP generates a sparse least-squares test problem of the form +* +* ( A )*X = ( B ) +* ( DAMP*I ) ( 0 ) +* +* having a specified solution X. The matrix A is constructed +* in the form A = HY*D*HZ, where D is an 'num_rows' by 'num_cols' +* diagonal matrix, and HY and HZ are Householder transformations. +* +* The first 4 parameters are input to LSTP. The remainder are +* output. LSTP is intended for use with 'num_rows' >= 'num_cols'. +* ------------------------------------------------------------------ +*/ +{ + double dvec_norm2( dvec * ); + + long num_rows, + num_cols, + row_indx, + col_indx, + lwork; + + double mnorm, + nnorm, + dwork; + + num_rows = act_rhs_vec->length; + num_cols = act_sol_vec->length; +/* +* Make two vectors of norm 1.0 for the Householder transformations. +* 02 Sep 2007: Changed 4.0 * PI to 12.566370614359172. +*/ + for( row_indx = 0; row_indx < num_rows; row_indx++ ) + test_prod->hy_vec->elements[row_indx] = + sin( (double)(row_indx + 1) * 12.566370614359172 + / ( (double)(num_rows) ) ); + + for( col_indx = 0; col_indx < num_cols; col_indx++ ) + test_prod->hz_vec->elements[col_indx] = + cos( (double)(col_indx + 1) * 12.566370614359172 + / ( (double)(num_cols) ) ); + + mnorm = dvec_norm2( test_prod->hy_vec ); + nnorm = dvec_norm2( test_prod->hz_vec ); + dvec_scale( (-1.0 / mnorm), test_prod->hy_vec ); + dvec_scale( (-1.0 / nnorm), test_prod->hz_vec ); +/* +* Set the diagonal matrix D. These are the singular values of A. +*/ + for( col_indx = 0; col_indx < num_cols; col_indx++ ) + { + lwork = ( col_indx + duplicate_param ) / duplicate_param; + dwork = (double) lwork * (double) duplicate_param; + dwork /= (double) num_cols; + test_prod->d_vec->elements[col_indx] = pow( dwork, ( (double) power_param +) ); + } + + (*act_mat_cond_num) = + sqrt((lsqr_sqr(test_prod->d_vec->elements[num_cols - 1]) + + lsqr_sqr(damp_param)) / + (lsqr_sqr(test_prod->d_vec->elements[0]) + + lsqr_sqr(damp_param))); +/* +* Compute the residual vector, storing it in B. +* It takes the form HY*( S ) +* ( T ) +* where S is obtained from D*S = DAMP**2 * HZ * X and T can be anything. +*/ + lstp_mult( test_prod->hz_vec, act_sol_vec, act_rhs_vec ); + + for( col_indx = 0; col_indx < num_cols; col_indx++ ) + act_rhs_vec->elements[col_indx] *= lsqr_sqr( damp_param ) / + test_prod->d_vec->elements[col_indx]; + + dwork = 1.0; + for( row_indx = num_cols; row_indx < num_rows; row_indx++ ) + { + lwork = row_indx - (num_cols - 1); + act_rhs_vec->elements[row_indx] = ( dwork * (double)(lwork) ) / + ( (double)(num_rows) ); + dwork = -dwork; + } + + lstp_mult( test_prod->hy_vec, act_rhs_vec, act_rhs_vec ); +/* +* Now compute the true B = RESIDUAL + A*X. +*/ + mnorm = dvec_norm2( act_rhs_vec ); + nnorm = dvec_norm2( act_sol_vec ); + (*act_resid_norm) = sqrt( lsqr_sqr( mnorm ) + + lsqr_sqr( damp_param ) * lsqr_sqr( nnorm ) ); + + test_mult( 0, act_sol_vec, act_rhs_vec, test_prod ); + + return; +} diff --git a/clsqr/test_mult.c b/clsqr/test_mult.c new file mode 100644 index 0000000000000..a2cb6371ed9fc --- /dev/null +++ b/clsqr/test_mult.c @@ -0,0 +1,100 @@ +/* +* test_mult.c +* For testing the matrix-vector product routine for the LSQR test problems. +* +* 08 Sep 1999: First version from James W. Howse +*/ + +#include "lsqr.h" + +#include "test_lsqr.h" + +void test_mult( long mode, + dvec *x, + dvec *y, + void *prod ) +/* +* ------------------------------------------------------------------ +* This is the matrix-vector product routine required by LSQR +* for a test matrix of the form A = HY*D*HZ. The quantities +* defining D, HY, HZ and the work array W are in the structure data. +* ------------------------------------------------------------------ +*/ +{ + long vec_indx, + num_rows, + num_cols; + + prod_data *data; + + data = (prod_data *) prod; + + num_cols = x->length; + num_rows = y->length; +/* +* Compute Y = Y + A*X +*/ + if( mode == 0 ) + { + lstp_mult( data->hz_vec, x, data->work_vec ); + + for( vec_indx = 0; vec_indx < num_cols; vec_indx++ ) + data->work_vec->elements[vec_indx] *= data->d_vec->elements[vec_indx]; + + for( vec_indx = num_cols; vec_indx < num_rows; vec_indx++ ) + data->work_vec->elements[vec_indx] = 0.0; + + lstp_mult( data->hy_vec, data->work_vec, data->work_vec ); + + for( vec_indx = 0; vec_indx < num_rows; vec_indx++ ) + y->elements[vec_indx] += data->work_vec->elements[vec_indx]; + } +/* +* Compute X = X + A^T*Y +*/ + if( mode == 1 ) + { + lstp_mult( data->hy_vec, y, data->work_vec ); + + for( vec_indx = 0; vec_indx < num_cols; vec_indx++ ) + data->work_vec->elements[vec_indx] *= data->d_vec->elements[vec_indx]; + + lstp_mult( data->hz_vec, data->work_vec, data->work_vec ); + + for( vec_indx = 0; vec_indx < num_cols; vec_indx++ ) + x->elements[vec_indx] += data->work_vec->elements[vec_indx]; + } + + return; +} + + +void lstp_mult( dvec *h, + dvec *x, + dvec *y ) +/* +* ------------------------------------------------------------------ +* HPROD applies a Householder transformation stored in H to get +* Y = ( I - 2*HZ*HZ(transpose) ) * X. +* ------------------------------------------------------------------ +*/ +{ + long vec_indx, + vec_length; + + double dwork; + + vec_length = h->length; + dwork = 0.0; + + for( vec_indx = 0; vec_indx < vec_length; vec_indx++ ) + dwork += h->elements[vec_indx] * x->elements[vec_indx]; + + dwork += dwork; + + for( vec_indx = 0; vec_indx < vec_length; vec_indx++ ) + y->elements[vec_indx] = x->elements[vec_indx] - dwork * +h->elements[vec_indx]; + + return; +} diff --git a/clsqr/test_prog.c b/clsqr/test_prog.c new file mode 100644 index 0000000000000..d3aa32cee51fb --- /dev/null +++ b/clsqr/test_prog.c @@ -0,0 +1,36 @@ +/* +* test_prog.c +* Tests C version of LSQR. +* +* 08 Sep 1999: First version from James W. Howse +*/ + +#include "lsqr.h" + +#include "test_lsqr.h" + +void main() +{ + double damp1, + damp2, + damp3, + damp4; + + damp1 = 0.1; + damp2 = 0.01; + damp3 = 0.001; + damp4 = 0.0001; + + test_lsqr( 1, 1, 1, 1, 0.0 ); + test_lsqr( 2, 1, 1, 1, 0.0 ); + test_lsqr( 40, 40, 4, 4, 0.0 ); + test_lsqr( 40, 40, 4, 4, damp2 ); + test_lsqr( 80, 40, 4, 4, damp2 ); + + test_lsqr( 10, 10, 1, 8, 0.0 ); + test_lsqr( 40, 40, 4, 7, 0.0 ); + test_lsqr( 80, 40, 4, 6, 0.0 ); + test_lsqr( 20, 10, 1, 6, 0.0 ); + test_lsqr( 20, 10, 1, 8, 0.0 ); + +} /* end main */ diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 380442bd3a2bc..a164eb9fa7620 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -20,6 +20,8 @@ ${PCRE_INCLUDES} ${ZLIB_INCLUDE_DIR} ${SSL_INCLUDE_DIRS} ${CMAKE_BINARY_DIR}/sql +${CMAKE_SOURCE_DIR}/clsqr +${CMAKE_BINARY_DIR}/clsqr ) SET(GEN_SOURCES @@ -110,7 +112,7 @@ ADD_LIBRARY(sql STATIC ${SQL_SOURCE}) ADD_DEPENDENCIES(sql GenServerSource) DTRACE_INSTRUMENT(sql) TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS} - mysys mysys_ssl dbug strings vio pcre ${LIBJEMALLOC} + mysys mysys_ssl dbug strings vio pcre clsqr ${LIBJEMALLOC} ${LIBWRAP} ${LIBCRYPT} ${LIBDL} ${CMAKE_THREAD_LIBS_INIT} ${SSL_LIBRARIES}) diff --git a/sql/opt_costmodel.h b/sql/opt_costmodel.h index b855ca28b3930..ced39771cbccb 100644 --- a/sql/opt_costmodel.h +++ b/sql/opt_costmodel.h @@ -5,6 +5,7 @@ class handler; +#define MAX_EQUATIONS 20 #define MAX_CONSTANTS 130 #define MAX_GLOBAL_CONSTANTS 2 #define TIME_FOR_COMPARE 0 diff --git a/sql/sp_head.cc b/sql/sp_head.cc index 8a9e8ddc816f2..1ac9efb49656b 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -1322,6 +1322,7 @@ sp_head::execute(THD *thd, bool merge_da_on_success) thd->profiling.finish_current_query(); thd->profiling.start_new_query("continuing inside routine"); #endif + thd->utime_before_query= 0; /* get_instr returns NULL when we're done. */ i = get_instr(ip); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 924605f18ed95..a8df98bdb340d 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -68,6 +68,7 @@ #include +#include "lsqr.h" /* The following is used to initialise Table_ident with a internal table name @@ -899,7 +900,8 @@ THD::THD() main_da(0, false, false), m_stmt_da(&main_da), thd_cost_factors(cost_factors), - total_time(0) + equation_no(0), + utime_before_query(0) { ulong tmp; @@ -6434,21 +6436,74 @@ bool Discrete_intervals_list::append(Discrete_interval *new_interval) void THD::build_equation() { - total_time= utime_after_query - utime_before_query; - solve_equation(); + if(utime_before_query != 0) + { + coefficients[equation_no][MAX_CONSTANTS].value= utime_after_query - utime_before_query; + equation_no++; + } + if(equation_no == MAX_EQUATIONS) + { + solve_equation(); + equation_no= 0; + } } -void THD::solve_equation() +// Helper function, TODO:move it in lsqr +void mult(long mode, dvec *x, dvec *y, void *prod) { + long num_cols = x->length; + long num_rows = y->length; + long i, j; + dvec *A = (dvec *)prod; /* - Currently just dump all the coefficients to a file - */ - std::ofstream datafile; - char file_name[100]; - my_snprintf(file_name, 100, "/tmp/mariadb_cost_coefficients_%lu.txt", thread_id); - datafile.open(file_name, std::ios::app); - for(int i=0; i < MAX_CONSTANTS; i++) - datafile << coefficients[i].value << " "; - datafile << total_time << "\n"; - datafile.close(); + * Compute Y = Y + A*X + */ + if(mode == 0) + { + for(i=0; i < num_rows; i++) + { + for(j=0; j < num_cols; j++) + y->elements[i] += A->elements[i*num_cols+j]*x->elements[j]; + } + } + /* + * Compute X = X + A^T*Y + */ + if(mode == 1) + { + for(i=0; i < num_cols; i++) + { + for(j=0; j < num_rows; j++) + x->elements[i] += A->elements[j*num_cols+i]*y->elements[j]; + } + } +} + +void THD::solve_equation() +{ + lsqr_input *in; + lsqr_output *out; + lsqr_work *work; + lsqr_func *func; + dvec *prod; + long num_rows= equation_no; + long num_cols= MAX_CONSTANTS; + long row_indx, col_indx; + alloc_lsqr_mem( &in, &out, &work, &func, num_rows, num_cols ); + func->mat_vec_prod= mult; + prod= (dvec *) alloc_dvec(num_rows*num_cols); + for(row_indx= 0; row_indx < num_rows; row_indx++) + for(col_indx= 0; col_indx < num_cols; col_indx++) + prod->elements[row_indx*num_cols+col_indx]= coefficients[row_indx][col_indx].value; + for(row_indx=0; row_indx < num_rows; row_indx++) + in->rhs_vec->elements[row_indx]= coefficients[row_indx][MAX_CONSTANTS].value; + in->num_rows = num_rows; + in->num_cols = num_cols; + in->rel_mat_err = 1.0e-15; + in->rel_rhs_err = 1.0e-15; + in->max_iter = num_rows + num_cols + 50; + lsqr(in, out, work, func, prod); + // Solution is in out->sol_vec->elements + free_lsqr_mem(in, out, work, func); + free_dvec(prod); } diff --git a/sql/sql_class.h b/sql/sql_class.h index e8d6491c89163..1523f72681080 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3731,13 +3731,13 @@ class THD :public Statement, } private: Cost_factors thd_cost_factors; - eq_coefficient coefficients[MAX_CONSTANTS]; - ulonglong total_time; + eq_coefficient coefficients[MAX_EQUATIONS][MAX_CONSTANTS+1]; + uint equation_no; public: ulonglong utime_before_query; inline void inc_coefficient(uint factor, int engine_index= -1) { - coefficients[get_factor_index(factor, engine_index)].value++; + coefficients[equation_no][get_factor_index(factor, engine_index)].value++; } void build_equation(); void solve_equation(); diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 71c01afffc375..ccbf47930fcca 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -757,7 +757,7 @@ static void handle_bootstrap_impl(THD *thd) thd->profiling.start_new_query(); thd->profiling.set_query_source(thd->query(), length); #endif - + thd->utime_before_query= 0; /* We don't need to obtain LOCK_thread_count here because in bootstrap mode we have only one thread. diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 8832083ceccd9..33dfde54df6b0 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -2356,6 +2356,7 @@ void JOIN::save_explain_data(Explain_query *output, bool can_overwrite, void JOIN::exec() { + //DBUG_ASSERT(thd->utime_before_query == 0); thd->utime_before_query= microsecond_interval_timer(); DBUG_EXECUTE_IF("show_explain_probe_join_exec_start", if (dbug_user_var_equals_int(thd,