Skip to content

Commit

Permalink
Merge 0568cc4 into a5b46d7
Browse files Browse the repository at this point in the history
  • Loading branch information
jburos committed Aug 8, 2016
2 parents a5b46d7 + 0568cc4 commit 3b65e68
Show file tree
Hide file tree
Showing 9 changed files with 531 additions and 100 deletions.
6 changes: 3 additions & 3 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# .coveragerc to control coverage.py
[run]
omit =
test/*
versioneer.py
survivalstan/_version.py
test/*
versioneer.py
survivalstan/_version.py
4 changes: 1 addition & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,7 @@ install:
- pip install coveralls
script:
- ./lint.sh
- travis_wait 120 nosetests -w test -s --with-coverage --cover-package=survivalstan
# travis_wait 120 nosetests -w test -s test_survival_models.py:test_pem_varcoef_model --with-coverage --cover-package=survivalstan
# travis_wait 120 nosetests -w test -s test_survival_models.py:test_pem_model --with-coverage --cover-package=survivalstan
- nosetests -w test -sv --with-coverage --cover-package=survivalstan
after_success:
coveralls
deploy:
Expand Down
30 changes: 18 additions & 12 deletions survivalstan/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,28 @@

resource_package = __name__ ## Could be any module/package name.

_pem_survival_path = os.path.join('stan', 'pem_survival_model.stan')
_pem_survival_varcoef_path = os.path.join('stan', 'pem_survival_model_varying_coefs.stan')
_pem_survival_varcoef_path2 = os.path.join('stan', 'pem_survival_model_varying_coefs2.stan')
_pem_survival_varcoef_path4 = os.path.join('stan', 'pem_survival_model_varying_coefs4.stan')
_pem_survival_unstructured_path = os.path.join('stan', 'pem_survival_model_unstructured.stan')
_pem_survival_randomwalk_path = os.path.join('stan', 'pem_survival_model_randomwalk.stan')
_weibull_survival_path = os.path.join('stan', 'weibull_survival_model.stan')
_exp_survival_path = os.path.join('stan', 'exp_survival_model.stan')

# varying-coefs models
_weibull_survival_varcoef_path = os.path.join('stan', 'weibull_survival_model_varying_coefs.stan')
_pem_survival_varcoef_path = os.path.join('stan', 'pem_survival_model_varying_coefs.stan')

pem_survival_model = pkg_resources.resource_string(
resource_package, _pem_survival_path).decode("utf-8")
pem_survival_model_varying_coefs = pkg_resources.resource_string(
resource_package, _pem_survival_varcoef_path).decode("utf-8")
pem_survival_model_varying_coefs2 = pkg_resources.resource_string(
resource_package, _pem_survival_varcoef_path2).decode("utf-8")
pem_survival_model_varying_coefs4 = pkg_resources.resource_string(
resource_package, _pem_survival_varcoef_path4).decode("utf-8")
resource_package, _pem_survival_unstructured_path).decode("utf-8")
pem_survival_model_unstructured = pkg_resources.resource_string(
resource_package, _pem_survival_unstructured_path).decode("utf-8")
pem_survival_model_randomwalk = pkg_resources.resource_string(
resource_package, _pem_survival_randomwalk_path).decode("utf-8")
weibull_survival_model = pkg_resources.resource_string(
resource_package, _weibull_survival_path).decode("utf-8")
exp_survival_model = pkg_resources.resource_string(
resource_package, _exp_survival_path).decode("utf-8")

# varying-coefs models
pem_survival_model_varying_coefs = pkg_resources.resource_string(
resource_package, _pem_survival_varcoef_path).decode("utf-8")
weibull_survival_model_varying_coefs = pkg_resources.resource_string(
resource_package, _weibull_survival_varcoef_path).decode("utf-8")
resource_package, _weibull_survival_varcoef_path).decode("utf-8")
69 changes: 69 additions & 0 deletions survivalstan/stan/exp_survival_model.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
functions {
vector sqrt_vec(vector x) {
vector[dims(x)[1]] res;

for (m in 1:dims(x)[1]){
res[m] = sqrt(x[m]);
}

return res;
}

vector bg_prior_lp(real r_global, vector r_local) {
r_global ~ normal(0.0, 10.0);
r_local ~ inv_chi_square(1.0);

return r_global * sqrt_vec(r_local);
}
}
data {
// dimensions
int<lower=0> N; // number of observations
int<lower=1> M; // number of predictors

// observations
matrix[N, M] x; // predictors for observation n
vector[N] y; // time for observation n
vector[N] event; // event status (1:event, 0:censor) for obs n
}
parameters {
real<lower=0> tau_s_raw;
vector<lower=0>[M] tau_raw;
vector[M] beta_raw;
real alpha;
}
transformed parameters {
vector[M] beta;
vector[N] lp;

beta = bg_prior_lp(tau_s_raw, tau_raw) .* beta_raw;
for (n in 1:N) {
lp[n] = exp(dot_product(x[n], beta));
}
}
model {
// priors
target += normal_lpdf(beta_raw | 0.0, 1.0);
target += normal_lpdf(alpha | 0.0, 1.0);

// likelihood
for (n in 1:N) {
if (event[n]==1)
target += exponential_lpdf(y[n] | (lp[n] * alpha));
else
target += exponential_lccdf(y[n] | (lp[n] * alpha));
}
}
generated quantities {
vector[N] yhat_uncens;
vector[N] log_lik;

for (n in 1:N) {
yhat_uncens[n] = exponential_rng((lp[n] * alpha));
if (event[n]==1) {
log_lik[n] = exponential_log(y[n], (lp[n] * alpha));
} else {
log_lik[n] = exponential_ccdf_log(y[n], (lp[n] * alpha));
}
}
}
73 changes: 35 additions & 38 deletions survivalstan/stan/pem_survival_model_randomwalk.stan
Original file line number Diff line number Diff line change
Expand Up @@ -5,81 +5,78 @@
T = max timepoint (number of timepoint ids)
M = number of covariates
// data
// main data matrix (per observed timepoint*record)
s = sample id for each obs
t = timepoint id for each obs
event = integer indicating if there was an event at time t for sample s
x = matrix of real-valued covariates at time t for sample n [N, X]
obs_t = observed end time for interval for timepoint for that obs
// timepoint-specific data (per timepoint, ordered by timepoint id)
t_obs = observed time since origin for each timepoint id (end of period)
t_dur = duration of each timepoint period (first diff of t_obs)
*/
// Jacqueline Buros Novik <jackinovik@gmail.com>


data {
// dimensions
int<lower=1> N;
int<lower=1> S;
int<lower=1> T;
int<lower=0> M;

// data matrix
int<lower=1, upper=N> s[N]; // sample id
int<lower=1, upper=T> t[N]; // timepoint id
int<lower=0, upper=1> event[N]; // 1: event, 0:censor
matrix[N, M] x; // explanatory vars
real<lower=0> obs_t[N]; // observed end time for each obs

// timepoint data
vector<lower=0>[T] t_obs;
vector<lower=0>[T] t_dur;
}
transformed data {
real<lower=0> t_dur[T]; // duration for each timepoint
real<lower=0> t_obs[T]; // observed end time for each timepoint
real c;
real r;

// baseline hazard params (fixed)
c <- 0.001;
r <- 0.1;

// capture observation time for each timepoint id t
for (i in 1:N) {
// assume these are constant per id across samples
t_obs[t[i]] <- obs_t[i];
}
vector[T] log_t_dur; // log-duration for each timepoint

// duration of each timepoint
// duration at first timepoint = t_obs[1] ( implicit t0 = 0 )
t_dur[1] <- t_obs[1];
for (i in 2:T) {
t_dur[i] <- t_obs[i] - t_obs[i-1];
}
log_t_dur = log(t_obs);
}
parameters {
vector<lower=0>[T] baseline_raw; // unstructured baseline hazard for each timepoint t
vector[M] beta; // beta for each covariate
vector[T] log_baseline_raw; // unstructured baseline hazard for each timepoint t
vector[M] beta; // beta for each covariate
real<lower=0> baseline_sigma;
real<lower=0> baseline_loc;
real log_baseline_mu;
}
transformed parameters {
vector<lower=0>[N] hazard;
vector<lower=0>[T] baseline;
vector[N] log_hazard;
vector[T] log_baseline;

for (i in 1:T) {
baseline[i] <- baseline_raw[i]*t_dur[i];
}
log_baseline = log_baseline_raw + log_t_dur;

for (n in 1:N) {
hazard[n] <- exp(x[n,]*beta)*baseline[t[n]];
log_hazard[n] = log_baseline_mu + log_baseline[t[n]] + x[n,]*beta;
}
}
model {
baseline_loc ~ normal(0, 1);
baseline_raw[1] ~ normal(baseline_loc, 1);
beta ~ cauchy(0, 2);
event ~ poisson_log(log_hazard);
log_baseline_mu ~ normal(0, 1);
baseline_sigma ~ normal(0, 1);
log_baseline_raw[1] ~ normal(0, 1);
for (i in 2:T) {
baseline_raw[i] ~ normal(baseline_raw[i-1], baseline_sigma);
log_baseline_raw[i] ~ normal(log_baseline_raw[i-1], baseline_sigma);
}
beta ~ cauchy(0, 2);
event ~ poisson(hazard);
}
generated quantities {
real log_lik[N];
//int yhat_uncens[N];
vector[T] baseline_raw;

// compute raw baseline hazard, for summary/plotting
baseline_raw = exp(log_baseline_raw);

for (n in 1:N) {
log_lik[n] <- poisson_log(event[n], hazard[n]);
//yhat_uncens[n] = poisson_log_rng(log_hazard[n]);
log_lik[n] <- poisson_log_log(event[n], log_hazard[n]);
}
}
79 changes: 79 additions & 0 deletions survivalstan/stan/pem_survival_model_unstructured.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/* Variable naming:
// dimensions
N = total number of observations (length of data)
S = number of sample ids
T = max timepoint (number of timepoint ids)
M = number of covariates
// main data matrix (per observed timepoint*record)
s = sample id for each obs
t = timepoint id for each obs
event = integer indicating if there was an event at time t for sample s
x = matrix of real-valued covariates at time t for sample n [N, X]
// timepoint-specific data (per timepoint, ordered by timepoint id)
t_obs = observed time since origin for each timepoint id (end of period)
t_dur = duration of each timepoint period (first diff of t_obs)
*/
// Jacqueline Buros Novik <jackinovik@gmail.com>

data {
// dimensions
int<lower=1> N;
int<lower=1> S;
int<lower=1> T;
int<lower=0> M;

// data matrix
int<lower=1, upper=N> s[N]; // sample id
int<lower=1, upper=T> t[N]; // timepoint id
int<lower=0, upper=1> event[N]; // 1: event, 0:censor
matrix[N, M] x; // explanatory vars

// timepoint data
vector<lower=0>[T] t_obs;
vector<lower=0>[T] t_dur;
}
transformed data {
vector[T] log_t_dur; // log-duration for each timepoint

log_t_dur = log(t_obs);
}
parameters {
vector[T] log_baseline_raw; // unstructured baseline hazard for each timepoint t
vector[M] beta; // beta for each covariate
real<lower=0> baseline_sigma;
real log_baseline_mu;
}
transformed parameters {
vector[N] log_hazard;
vector[T] log_baseline; // unstructured baseline hazard for each timepoint t

log_baseline = log_baseline_raw + log_t_dur;

for (n in 1:N) {
log_hazard[n] = log_baseline_mu + log_baseline[t[n]] + x[n,]*beta;
}
}
model {
beta ~ cauchy(0, 2);
event ~ poisson_log(log_hazard);
log_baseline_mu ~ normal(0, 1);
baseline_sigma ~ normal(0, 1);
log_baseline_raw ~ normal(0, baseline_sigma);
}
generated quantities {
real log_lik[N];
//int yhat_uncens[N];
vector[T] baseline_raw;

// compute raw baseline hazard, for summary/plotting
baseline_raw = exp(log_baseline_raw);

// prepare yhat_uncens & log_lik
for (n in 1:N) {
//yhat_uncens[n] = poisson_log_rng(log_hazard[n]);
log_lik[n] = poisson_log_log(event[n], log_hazard[n]);
}
}

0 comments on commit 3b65e68

Please sign in to comment.