diff --git a/python/pytest/test_all.py b/python/pytest/test_all.py index 5333cf101..5d534ff89 100644 --- a/python/pytest/test_all.py +++ b/python/pytest/test_all.py @@ -150,7 +150,7 @@ def test_poisson(self): # group = np.linspace(1, p, p) # model.fit(data.x, data.y, group=group) - model2 = abessPoisson(path_type="seq", sequence=sequence, ic_type='ebic', is_screening=False, screening_size=30, + model2 = abessPoisson(path_type="seq", sequence=sequence, ic_type='ebic', is_screening=True, screening_size=100, K_max=10, epsilon=10, powell_path=2, s_min=1, s_max=p, lambda_min=0.01, lambda_max=100, is_cv=False, K=5, exchange_num=2, tau=0.1 * np.log(n*p) / n, primary_model_fit_max_iter=80, primary_model_fit_epsilon=1e-6, early_stop=False, approximate_Newton=False, ic_coef=1., thread=1) diff --git a/python/src/model_fit.cpp b/python/src/model_fit.cpp index e11b82c3f..8b1378917 100644 --- a/python/src/model_fit.cpp +++ b/python/src/model_fit.cpp @@ -1,565 +1 @@ -// -// Created by jk on 2020/3/18. -// -#ifndef SRC_MODELFIT_H -#define SRC_MODELFIT_H -#endif - -#ifdef R_BUILD - -#include -#include -//[[Rcpp::depends(RcppEigen)]] -using namespace Rcpp; - -#else - -#include -#include "List.h" - -#endif - -#include -#include "utilities.h" -#include -#include "model_fit.h" -using namespace std; - -void multinomial_fit(Eigen::MatrixXd &x, Eigen::MatrixXd &y, Eigen::VectorXd &weights, Eigen::MatrixXd &beta, Eigen::VectorXd &coef0, double loss0, bool approximate_Newton, int primary_model_fit_max_iter, double primary_model_fit_epsilon, double tau, double lambda) -{ -#ifdef TEST - clock_t t1 = clock(); -#endif - // cout << "primary_fit-----------" << endl; - // if (X.cols() == 0) - // { - // coef0 = -log(y.colwise().sum().eval() - 1.0); - // return; - // } - -#ifdef TEST - std::cout << "primary_model_fit 1" << endl; -#endif - int n = x.rows(); - int p = x.cols(); - int M = y.cols(); - Eigen::MatrixXd X = Eigen::MatrixXd::Ones(n, p + 1); - X.rightCols(p) = x; - // Eigen::MatrixXd X_new = Eigen::MatrixXd::Zero(n, p + 1); - // Eigen::MatrixXd X_new_transpose = Eigen::MatrixXd::Zero(p + 1, n); - Eigen::MatrixXd beta0 = Eigen::MatrixXd::Zero(p + 1, M); -#ifdef TEST - std::cout << "primary_model_fit 2" << endl; -#endif - beta0.row(0) = coef0; - beta0.block(1, 0, p, M) = beta; -#ifdef TEST - std::cout << "primary_model_fit 3" << endl; -#endif - // Eigen::VectorXd one = Eigen::VectorXd::Ones(n); - Eigen::MatrixXd Pi; - pi(X, y, beta0, Pi); - Eigen::MatrixXd log_Pi = Pi.array().log(); - array_product(log_Pi, weights, 1); - double loglik1 = DBL_MAX, loglik0 = (log_Pi.array() * y.array()).sum(); -#ifdef TEST - std::cout << "primary_model_fit 4" << endl; -#endif - - int j; - if (approximate_Newton) - { - Eigen::MatrixXd one = Eigen::MatrixXd::Ones(n, M); - double t = 2 * (Pi.array() * (one - Pi).array()).maxCoeff(); - Eigen::MatrixXd res = X.transpose() * (y - Pi) / t; - // ConjugateGradient cg; - // cg.compute(X.adjoint() * X); - Eigen::MatrixXd XTX = X.transpose() * X; - Eigen::MatrixXd invXTX = XTX.ldlt().solve(Eigen::MatrixXd::Identity(p + 1, p + 1)); - - Eigen::MatrixXd beta1; - for (j = 0; j < primary_model_fit_max_iter; j++) - { -#ifdef TEST - std::cout << "primary_model_fit 3: " << j << endl; -#endif - - // beta1 = beta0 + cg.solve(res); - beta1 = beta0 + invXTX * res; - // cout << "beta1: " << beta1 << endl; - - // double app_loss0, app_loss1, app_loss2; - // app_loss0 = ((y - Pi) / t).squaredNorm(); - // app_loss1 = (-X * beta0 - (y - Pi) / t).squaredNorm(); - // app_loss2 = (X * (beta1 - beta0) - (y - Pi) / t).squaredNorm(); - // cout << "app_loss0: " << app_loss0 << endl; - // cout << "app_loss1: " << app_loss1 << endl; - // cout << "app_loss2: " << app_loss2 << endl; - - pi(X, y, beta1, Pi); - log_Pi = Pi.array().log(); - array_product(log_Pi, weights, 1); - loglik1 = (log_Pi.array() * y.array()).sum(); - // cout << "loglik1: " << loglik1 << endl; - // cout << "loglik0: " << loglik0 << endl; - - // cout << "j=" << j << " loglik: " << loglik1 << endl; - // cout << "j=" << j << " loglik diff: " << loglik0 - loglik1 << endl; - bool condition1 = -(loglik1 + (primary_model_fit_max_iter - j - 1) * (loglik1 - loglik0)) + tau > loss0; - // bool condition1 = false; - bool condition2 = abs(loglik0 - loglik1) / (0.1 + abs(loglik1)) < primary_model_fit_epsilon; - bool condition3 = abs(loglik1) < min(1e-3, tau); - bool condition4 = loglik1 < loglik0; - // bool condition4 = false; - if (condition1 || condition2 || condition3 || condition4) - { - break; - } - loglik0 = loglik1; - for (int m1 = 0; m1 < M; m1++) - { - beta0.col(m1) = beta1.col(m1) - beta1.col(M - 1); - } - // beta0 = beta1; - t = 2 * (Pi.array() * (one - Pi).array()).maxCoeff(); - res = X.transpose() * (y - Pi) / t; - } - } - else - { - /* Newton */ - Eigen::MatrixXd W(M * n, M * n); - Eigen::VectorXd one = Eigen::VectorXd::Ones(n); - for (int m1 = 0; m1 < M; m1++) - { - for (int m2 = m1; m2 < M; m2++) - { - if (m1 == m2) - { - W.block(m1 * n, m2 * n, n, n) = Eigen::MatrixXd::Zero(n, n); -#ifdef TEST - std::cout << "primary_model_fit 5" << endl; - -#endif - Eigen::VectorXd PiPj = Pi.col(m1).array() * (one - Pi.col(m1).eval()).array(); - // cout << "PiPj: " << PiPj << endl; - for (int i = 0; i < PiPj.size(); i++) - { - if (PiPj(i) < 0.001) - { - PiPj(i) = 0.001; - } - } - W.block(m1 * n, m2 * n, n, n).diagonal() = PiPj; - -#ifdef TEST - std::cout << "primary_model_fit 6" << endl; - cout << "W m1 m2: " << W.block(m1 * n, m2 * n, n, n) << endl; -#endif - } - else - { - W.block(m1 * n, m2 * n, n, n) = Eigen::MatrixXd::Zero(n, n); -#ifdef TEST - std::cout << "primary_model_fit 5" << endl; - -#endif - Eigen::VectorXd PiPj = Pi.col(m1).array() * Pi.col(m2).array(); - // cout << "PiPj: " << PiPj << endl; - for (int i = 0; i < PiPj.size(); i++) - { - if (PiPj(i) < 0.001) - { - PiPj(i) = 0.001; - } - } - W.block(m1 * n, m2 * n, n, n).diagonal() = -PiPj; - W.block(m2 * n, m1 * n, n, n) = W.block(m1 * n, m2 * n, n, n); - - // cout << "W m1 m2: " << W.block(m1 * n, m2 * n, n, n) << endl; - } - } - } - -#ifdef TEST - std::cout << "primary_model_fit 7" << endl; -#endif - // cout << "W: " << W << endl; - - Eigen::MatrixXd XTWX(M * (p + 1), M * (p + 1)); - Eigen::MatrixXd XTW(M * (p + 1), M * n); - for (int m1 = 0; m1 < M; m1++) - { - for (int m2 = m1; m2 < M; m2++) - { - XTW.block(m1 * (p + 1), m2 * n, (p + 1), n) = X.transpose() * W.block(m1 * n, m2 * n, n, n); - XTWX.block(m1 * (p + 1), m2 * (p + 1), (p + 1), (p + 1)) = XTW.block(m1 * (p + 1), m2 * n, (p + 1), n) * X; - XTW.block(m2 * (p + 1), m1 * n, (p + 1), n) = XTW.block(m1 * (p + 1), m2 * n, (p + 1), n); - XTWX.block(m2 * (p + 1), m1 * (p + 1), (p + 1), (p + 1)) = XTWX.block(m1 * (p + 1), m2 * (p + 1), (p + 1), (p + 1)); - } - } - -#ifdef TEST - std::cout << "primary_model_fit 8" << endl; -#endif - - // Eigen::Matrix res(M, 1); - Eigen::VectorXd res(M * n); - for (int m1 = 0; m1 < M; m1++) - { - res.segment(m1 * n, n) = y.col(m1).eval() - Pi.col(m1).eval(); - } - -#ifdef TEST - std::cout << "primary_model_fit 9" << endl; - cout << "res: " << res << endl; -#endif - - Eigen::VectorXd Xbeta(M * n); - for (int m1 = 0; m1 < M; m1++) - { - Xbeta.segment(m1 * n, n) = X * beta0.col(m1).eval(); - } - -#ifdef TEST - std::cout << "primary_model_fit 10" << endl; - cout << "Xbeta: " << Xbeta << endl; -#endif - - Eigen::VectorXd Z = Xbeta + W.ldlt().solve(res); -#ifdef TEST - std::cout << "primary_model_fit 11" << endl; -#endif - -#ifdef TEST - std::cout << "primary_model_fit 2" << endl; -#endif - - Eigen::MatrixXd beta1; - Eigen::VectorXd beta0_tmp; - for (j = 0; j < primary_model_fit_max_iter; j++) - { -#ifdef TEST - std::cout << "primary_model_fit 3: " << j << endl; -#endif - beta0_tmp = XTWX.ldlt().solve(XTW * Z); - for (int m1 = 0; m1 < M; m1++) - { - beta0.col(m1) = beta0_tmp.segment(m1 * (p + 1), (p + 1)) - beta0_tmp.segment((M - 1) * (p + 1), (p + 1)); - } - // cout << "beta0" << beta0 << endl; - - pi(X, y, beta0, Pi); - log_Pi = Pi.array().log(); - array_product(log_Pi, weights, 1); - loglik1 = (log_Pi.array() * y.array()).sum(); - // cout << "loss" << loglik1 << endl; - // cout << "j=" << j << " loglik: " << loglik1 << endl; - // cout << "j=" << j << " loglik diff: " << loglik0 - loglik1 << endl; - bool condition1 = -(loglik1 + (primary_model_fit_max_iter - j - 1) * (loglik1 - loglik0)) + tau > loss0; - // bool condition1 = false; - bool condition2 = abs(loglik0 - loglik1) / (0.1 + abs(loglik1)) < primary_model_fit_epsilon; - bool condition3 = abs(loglik1) < min(1e-3, tau); - bool condition4 = loglik1 < loglik0; - if (condition1 || condition2 || condition3 || condition4) - { - // cout << "condition1:" << condition1 << endl; - // cout << "condition2:" << condition2 << endl; - // cout << "condition3:" << condition3 << endl; - break; - } - loglik0 = loglik1; - - for (int m1 = 0; m1 < M; m1++) - { - for (int m2 = m1; m2 < M; m2++) - { - if (m1 == m2) - { - // W(m1, m2) = Eigen::MatrixXd::Zero(n, n); - // W(m1, m2).diagonal() = Pi.col(m1).array() * (one - Pi.col(m1).eval()).array(); - - W.block(m1 * n, m2 * n, n, n) = Eigen::MatrixXd::Zero(n, n); - Eigen::VectorXd PiPj = Pi.col(m1).array() * (one - Pi.col(m1).eval()).array(); - for (int i = 0; i < PiPj.size(); i++) - { - if (PiPj(i) < 0.001) - { - PiPj(i) = 0.001; - } - } - W.block(m1 * n, m2 * n, n, n).diagonal() = PiPj; - } - else - { - W.block(m1 * n, m2 * n, n, n) = Eigen::MatrixXd::Zero(n, n); - Eigen::VectorXd PiPj = Pi.col(m1).array() * Pi.col(m2).array(); - for (int i = 0; i < PiPj.size(); i++) - { - if (PiPj(i) < 0.001) - { - PiPj(i) = 0.001; - } - } - W.block(m1 * n, m2 * n, n, n).diagonal() = -PiPj; - W.block(m2 * n, m1 * n, n, n) = W.block(m1 * n, m2 * n, n, n); - } - } - } - - for (int m1 = 0; m1 < M; m1++) - { - for (int m2 = m1; m2 < M; m2++) - { - XTW.block(m1 * (p + 1), m2 * n, (p + 1), n) = X.transpose() * W.block(m1 * n, m2 * n, n, n); - XTWX.block(m1 * (p + 1), m2 * (p + 1), (p + 1), (p + 1)) = XTW.block(m1 * (p + 1), m2 * n, (p + 1), n) * X; - XTW.block(m2 * (p + 1), m1 * n, (p + 1), n) = XTW.block(m1 * (p + 1), m2 * n, (p + 1), n); - XTWX.block(m2 * (p + 1), m1 * (p + 1), (p + 1), (p + 1)) = XTWX.block(m1 * (p + 1), m2 * (p + 1), (p + 1), (p + 1)); - } - } - - for (int m1 = 0; m1 < M; m1++) - { - res.segment(m1 * n, n) = y.col(m1).eval() - Pi.col(m1).eval(); - } - - for (int m1 = 0; m1 < M; m1++) - { - Xbeta.segment(m1 * n, n) = X * beta0.col(m1).eval(); - } - - Z = Xbeta + W.ldlt().solve(res); - } - } - -#ifdef TEST - clock_t t2 = clock(); - std::cout << "primary fit time: " << ((double)(t2 - t1) / CLOCKS_PER_SEC) << endl; - cout << "primary fit iter : " << j << endl; -#endif - - beta = beta0.block(1, 0, p, M); - coef0 = beta0.row(0).eval(); -}; - -void multigaussian_fit(Eigen::MatrixXd &x, Eigen::MatrixXd &y, Eigen::VectorXd &weights, Eigen::MatrixXd &beta, Eigen::VectorXd &coef0, double loss0, bool approximate_Newton, int primary_model_fit_max_iter, double primary_model_fit_epsilon, double tau, double lambda) -{ - Eigen::ConjugateGradient cg; - cg.compute(x.adjoint() * x + lambda * Eigen::MatrixXd::Identity(x.cols(), x.cols())); - beta = cg.solveWithGuess(x.adjoint() * y, beta); -} - -void lm_fit(Eigen::MatrixXd &x, Eigen::VectorXd &y, Eigen::VectorXd &weights, Eigen::VectorXd &beta, double &coef0, double loss0, bool approximate_Newton, int primary_model_fit_max_iter, double primary_model_fit_epsilon, double tau, double lambda) -{ - // beta = (X.adjoint() * X + this->lambda_level * Eigen::MatrixXd::Identity(X.cols(), X.cols())).colPivHouseholderQr().solve(X.adjoint() * y); - - // beta = (X.adjoint() * X + this->lambda_level * Eigen::MatrixXd::Identity(X.cols(), X.cols())).ldlt().solve(X.adjoint() * y); - - // if (X.cols() == 0) - // { - // coef0 = y.mean(); - // return; - // } - - // CG - Eigen::ConjugateGradient cg; - cg.compute(x.adjoint() * x + lambda * Eigen::MatrixXd::Identity(x.cols(), x.cols())); - beta = cg.solveWithGuess(x.adjoint() * y, beta); -#ifdef TEST - cout << "xx: " << x.adjoint() * x << endl; - cout << "xy: " << x.adjoint() * y << endl; -#endif -}; - -void cox_fit(Eigen::MatrixXd &x, Eigen::VectorXd &y, Eigen::VectorXd &weight, Eigen::VectorXd &beta, double &coef0, double loss0, bool approximate_Newton, int primary_model_fit_max_iter, double primary_model_fit_epsilon, double tau, double lambda) -{ -#ifdef TEST - clock_t t1 = clock(); -#endif - if (x.cols() == 0) - { - coef0 = 0.; - return; - } - - // cout << "primary_fit-----------" << endl; - int n = x.rows(); - int p = x.cols(); - Eigen::VectorXd theta(n); - Eigen::MatrixXd one = (Eigen::MatrixXd::Ones(n, n)).triangularView(); - Eigen::MatrixXd x_theta(n, p); - Eigen::VectorXd xij_theta(n); - Eigen::VectorXd cum_theta(n); - Eigen::VectorXd g(p); - Eigen::VectorXd beta0 = beta, beta1; - Eigen::VectorXd cum_eta(n); - Eigen::VectorXd cum_eta2(n); - Eigen::VectorXd cum_eta3(n); - Eigen::MatrixXd h(p, p); - Eigen::VectorXd eta; - - Eigen::VectorXd d(p); - double loglik1, loglik0 = loglik_cox(x, y, weight, beta0); - // beta = Eigen::VectorXd::Zero(p); - - double step = 1.0; - int l; - for (l = 1; l <= primary_model_fit_max_iter; l++) - { - - eta = x * beta0; - for (int i = 0; i <= n - 1; i++) - { - if (eta(i) < -30.0) - eta(i) = -30.0; - if (eta(i) > 30.0) - eta(i) = 30.0; - } - eta = weight.array() * eta.array().exp(); - cum_eta(n - 1) = eta(n - 1); - for (int k = n - 2; k >= 0; k--) - { - cum_eta(k) = cum_eta(k + 1) + eta(k); - } - cum_eta2(0) = (y(0) * weight(0)) / cum_eta(0); - for (int k = 1; k <= n - 1; k++) - { - cum_eta2(k) = (y(k) * weight(k)) / cum_eta(k) + cum_eta2(k - 1); - } - cum_eta3(0) = (y(0) * weight(0)) / pow(cum_eta(0), 2); - for (int k = 1; k <= n - 1; k++) - { - cum_eta3(k) = (y(k) * weight(k)) / pow(cum_eta(k), 2) + cum_eta3(k - 1); - } - h = -cum_eta3.replicate(1, n); - h = h.cwiseProduct(eta.replicate(1, n)); - h = h.cwiseProduct(eta.replicate(1, n).transpose()); - for (int i = 0; i < n; i++) - { - for (int j = i + 1; j < n; j++) - { - h(j, i) = h(i, j); - } - } - h.diagonal() = cum_eta2.cwiseProduct(eta) + h.diagonal(); - g = weight.cwiseProduct(y) - cum_eta2.cwiseProduct(eta); - - if (approximate_Newton) - { - d = (x.transpose() * g).cwiseQuotient((x.transpose() * h * x).diagonal()); - } - else - { - d = (x.transpose() * h * x).ldlt().solve(x.transpose() * g); - } - - beta1 = beta0 + step * d; - - loglik1 = loglik_cox(x, y, weight, beta1); - - while (loglik1 < loglik0 && step > primary_model_fit_epsilon) - { - step = step / 2; - beta1 = beta0 + step * d; - loglik1 = loglik_cox(x, y, weight, beta1); - } - - bool condition1 = -(loglik1 + (primary_model_fit_max_iter - l - 1) * (loglik1 - loglik0)) + tau > loss0; - if (condition1) - { - loss0 = -loglik0; - // beta = beta0; - break; - } - - if (loglik1 > loglik0) - { - beta0 = beta1; - loglik0 = loglik1; - } - - if (step < primary_model_fit_epsilon) - { - loss0 = -loglik0; - // beta = beta0; - // cout << "condition2" << endl; - break; - } - } -#ifdef TEST - clock_t t2 = clock(); - std::cout << "primary fit time: " << ((double)(t2 - t1) / CLOCKS_PER_SEC) << endl; - cout << "primary fit iter : " << l << endl; -#endif - - beta = beta0; -}; - -void poisson_fit(Eigen::MatrixXd &x, Eigen::VectorXd &y, Eigen::VectorXd &weights, Eigen::VectorXd &beta, double &coef0, double loss0, bool approximate_Newton, int primary_model_fit_max_iter, double primary_model_fit_epsilon, double tau, double lambda) -{ -#ifdef TEST - clock_t t1 = clock(); - std::cout << "primary_fit-----------" << endl; -#endif - if (x.cols() == 0) - { - coef0 = log(y.mean()); - return; - } - - int n = x.rows(); - int p = x.cols(); - Eigen::MatrixXd X = Eigen::MatrixXd::Ones(n, p + 1); - X.rightCols(p) = x; - Eigen::MatrixXd X_trans = X.transpose(); - Eigen::MatrixXd temp = Eigen::MatrixXd::Zero(p + 1, n); - Eigen::VectorXd beta0 = Eigen::VectorXd::Zero(p + 1); - beta0.tail(p) = beta; - beta0(0) = coef0; - Eigen::VectorXd eta = X * beta0; - Eigen::VectorXd expeta = eta.array().exp(); - Eigen::VectorXd z = Eigen::VectorXd::Zero(n); - double loglik0 = (y.cwiseProduct(eta) - expeta).dot(weights); - double loglik1; - - int j; - for (j = 0; j < primary_model_fit_max_iter; j++) - { - for (int i = 0; i < n; i++) - { - temp.col(i) = X_trans.col(i) * expeta(i) * weights(i); - } - z = eta + (y - expeta).cwiseQuotient(expeta); - beta0 = (temp * X).ldlt().solve(temp * z); - eta = X * beta0; - for (int i = 0; i < n; i++) - { - if (eta(i) < -30.0) - eta(i) = -30.0; - if (eta(i) > 30.0) - eta(i) = 30.0; - } - expeta = eta.array().exp(); - loglik1 = (y.cwiseProduct(eta) - expeta).dot(weights); - bool condition1 = -(loglik1 + (primary_model_fit_max_iter - j - 1) * (loglik1 - loglik0)) + tau > loss0; - // bool condition1 = false; - bool condition2 = abs(loglik0 - loglik1) / (0.1 + abs(loglik1)) < primary_model_fit_epsilon; - bool condition3 = abs(loglik1) < min(1e-3, tau); - if (condition1 || condition2 || condition3) - { - // cout << "condition1:" << condition1 << endl; - // cout << "condition2:" << condition2 << endl; - // cout << "condition3:" << condition3 << endl; - break; - } - loglik0 = loglik1; - } -#ifdef TEST - clock_t t2 = clock(); - std::cout << "primary fit time: " << ((double)(t2 - t1) / CLOCKS_PER_SEC) << endl; - cout << "primary fit iter : " << j << endl; -#endif - beta = beta0.tail(p).eval(); - coef0 = beta0(0); -};