-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
221 additions
and
94 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Nelson-Aalen Estimator | ||
|
||
The [Nelson-Aalen estimator](https://en.wikipedia.org/wiki/Nelson%E2%80%93Aalen_estimator) | ||
is a nonparametric estimator of the cumulative hazard function. | ||
|
||
The estimate is given by | ||
|
||
```math | ||
\hat{H}(t) = \sum_{i: t_i < t} \frac{d_i}{n_i} | ||
``` | ||
|
||
where ``d_i`` is the number of observed events at time ``t_i`` and ``n_i`` is the | ||
number of subjects at risk for the event just before time ``t_i``. | ||
|
||
The pointwise standard error of the log of the survivor function can be computed | ||
directly as the standard error or a Bernoulli random variable with `d_i` successes | ||
from `n_i` samples: | ||
|
||
```math | ||
\text{SE}(\hat{H}(t)) = \sqrt{\sum_{i: t_i < t} \frac{d_i(n_i-d_i)}{n_i^3}} | ||
``` | ||
|
||
## API | ||
|
||
```@docs | ||
Survival.NelsonAalen | ||
StatsBase.fit(::Type{S}, | ||
times::AbstractVector{T}, | ||
status::AbstractVector{<:Integer}) where {S<:NonparametricEstimator, T} | ||
StatsBase.confint(na::NelsonAalen, α::Float64=0.05) | ||
``` | ||
|
||
## References | ||
|
||
* Nelson, W. (1969). *Hazard plotting for incomplete failure data*. | ||
Journal of Quality Technology 1, 27–52. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# Estimating functions with the following assumptions: | ||
# * The input is nonempty | ||
# * Time 0 is not included | ||
|
||
function _estimator(::Type{S}, tte::AbstractVector{T}, status::BitVector) where {S, T} | ||
nobs = length(tte) | ||
dᵢ = 0 # Number of observed events at time t | ||
cᵢ = 0 # Number of censored events at time t | ||
nᵢ = nobs # Number remaining at risk at time t | ||
es = estimator_start(S) # Estimator starting point | ||
gw = stderr_start(S) # Standard Error starting point | ||
|
||
times = T[] # The set of unique event times | ||
nevents = Int[] # Total observed events at each time | ||
ncensor = Int[] # Total censored events at each time | ||
natrisk = Int[] # Number at risk at each time | ||
estimator = Float64[] # Estimates | ||
stderr = Float64[] # Pointwise standard errors | ||
|
||
t_prev = zero(T) | ||
|
||
@inbounds for i = 1:nobs | ||
t = tte[i] | ||
s = status[i] | ||
# Aggregate over tied times | ||
if t == t_prev | ||
dᵢ += s | ||
cᵢ += !s | ||
continue | ||
elseif !iszero(t_prev) | ||
es = estimator_update(S, es, dᵢ, nᵢ) | ||
gw = stderr_update(S, gw, dᵢ, nᵢ) | ||
push!(times, t_prev) | ||
push!(nevents, dᵢ) | ||
push!(ncensor, cᵢ) | ||
push!(natrisk, nᵢ) | ||
push!(estimator, es) | ||
push!(stderr, sqrt(gw)) | ||
end | ||
nᵢ -= dᵢ + cᵢ | ||
dᵢ = s | ||
cᵢ = !s | ||
t_prev = t | ||
end | ||
|
||
# We need to do this one more time to capture the last time | ||
# since everything in the loop is lagged | ||
push!(times, t_prev) | ||
push!(nevents, dᵢ) | ||
push!(ncensor, cᵢ) | ||
push!(natrisk, nᵢ) | ||
push!(estimator, es) | ||
push!(stderr, sqrt(gw)) | ||
|
||
return S{T}(times, nevents, ncensor, natrisk, estimator, stderr) | ||
end | ||
|
||
""" | ||
fit(::Type{S}, times, status) where S<:NonparametricEstimator | ||
Given a vector of times to events and a corresponding vector of indicators that | ||
dictate whether each time is an observed event or is right censored, compute the | ||
model of type `S`. Return an object of type `S`: [`KaplanMeier`](@ref) and | ||
[`NelsonAalen`](@ref) are supported so far. | ||
""" | ||
function StatsBase.fit(::Type{S}, | ||
times::AbstractVector{T}, | ||
status::AbstractVector{<:Integer}) where {S<:NonparametricEstimator, T} | ||
nobs = length(times) | ||
if length(status) != nobs | ||
throw(DimensionMismatch("there must be as many event statuses as times")) | ||
end | ||
if nobs == 0 | ||
throw(ArgumentError("the sample must be nonempty")) | ||
end | ||
p = sortperm(times) | ||
t = times[p] | ||
s = BitVector(status[p]) | ||
return _estimator(S, t, s) | ||
end | ||
|
||
function StatsBase.fit(::Type{S}, ets::AbstractVector{<:EventTime}) where S<:NonparametricEstimator | ||
length(ets) > 0 || throw(ArgumentError("the sample must be nonempty")) | ||
x = sort(ets) | ||
# TODO: Refactor, since iterating over the EventTime objects directly in | ||
# the _km loop may actually be easier/more efficient than working with | ||
# the times and statuses as separate vectors. Plus it might be nice to | ||
# make this method the One True Method™ so that folks are encouraged to | ||
# use EventTimes instead of raw values. | ||
return fit(S, map(t->t.time, x), BitVector(map(t->t.status, x))) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
""" | ||
NelsonAalen | ||
An immutable type containing cumulative hazard function estimates computed | ||
using the Nelson-Aalen method. | ||
The type has the following fields: | ||
* `times`: Distinct event times | ||
* `nevents`: Number of observed events at each time | ||
* `ncensor`: Number of right censored events at each time | ||
* `natrisk`: Size of the risk set at each time | ||
* `chaz`: Estimate of the cumulative hazard at each time | ||
* `stderr`: Standard error of the cumulative hazard | ||
Use `fit(NelsonAalen, ...)` to compute the estimates and construct | ||
this type. | ||
""" | ||
struct NelsonAalen{T<:Real} <: NonparametricEstimator | ||
times::Vector{T} | ||
nevents::Vector{Int} | ||
ncensor::Vector{Int} | ||
natrisk::Vector{Int} | ||
chaz::Vector{Float64} | ||
stderr::Vector{Float64} | ||
end | ||
|
||
estimator_start(::Type{NelsonAalen}) = 0.0 # Estimator starting point | ||
stderr_start(::Type{NelsonAalen}) = 0.0 # StdErr starting point | ||
|
||
estimator_update(::Type{NelsonAalen}, es, dᵢ, nᵢ) = es + dᵢ / nᵢ # Estimator update rule | ||
stderr_update(::Type{NelsonAalen}, gw, dᵢ, nᵢ) = gw + dᵢ * (nᵢ - dᵢ) / (nᵢ^3) # StdErr update rule | ||
|
||
""" | ||
confint(na::NelsonAalen, α=0.05) | ||
Compute the pointwise confidence intervals for the cumulative hazard | ||
function as a vector of tuples. | ||
""" | ||
function StatsBase.confint(na::NelsonAalen, α::Float64=0.05) | ||
q = quantile(Normal(), 1 - α/2) | ||
return map(na.chaz, na.stderr) do srv, se | ||
srv - q * se, srv + q * se | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters