Skip to content

Commit

Permalink
* split files
Browse files Browse the repository at this point in the history
* try getting coverage
  • Loading branch information
jlapeyre committed Oct 2, 2016
1 parent e7eb3a3 commit be051ec
Show file tree
Hide file tree
Showing 3 changed files with 282 additions and 279 deletions.
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ julia:
- nightly
notifications:
email: false
after_success:
- julia -e 'Pkg.add("Coverage"); cd(Pkg.dir("EmpiricalCDFs")); using Coverage; Coveralls.submit(process_folder())'

280 changes: 1 addition & 279 deletions src/EmpiricalCDFs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,285 +5,7 @@ import Compat: String
export EmpiricalCDF, EmpiricalCDFHi, CDFfile, linprint, logprint, getinverse, getcdfindex, readcdf
export getheader, getversion, getcdf

"""
AbstractEmpiricalCDF
Concrete types are `EmpiricalCDF` and `EmpiricalCDFHi`.
"""
abstract AbstractEmpiricalCDF

immutable EmpiricalCDF{T <: Real} <: AbstractEmpiricalCDF
xdata::Array{T,1} # death times
end

doc"""
*EmpiricalCDF()*
Return an empirical CDF. The CDF must be sorted after inserting elements with `push!` or
`append!`, and before being accessed using any of the functions below.
`EmpiricalCDF(xmin::Real)`
When using `push!(cdf,x)` and `append!(cdf,a)`, points `x` for `x<xmin` will be rejected.
The CDF will be properly normalized, but will be lower-truncated.
This can be useful when generating too many points to store.
`print(io,cdf)` or `logprint(io,cdf,n=2000)` print (not more than) `n` log spaced points after sorting the data.
`print(io,cdf,a::AbstractArray)`, `print(cdf,a::AbstractArray, fname::String)` print cdf at points near those in a.
`push!(cdf,x)` add a point to `cdf`
`append!(cdf,a)` add points to `cdf`.
`sort!(cdf)` The data must be sorted before calling `cdf[x]`
`cdf[x::Real]` return the value of `cdf` at the point `x`.
`cdf[a::AbstractArray]` return the values of `cdf` at the points in `a`.
`length(cdf)` return the number of data points in `cdf`.
`rand(cdf)` return a sample from the probability distribution associated with `cdf`.
# Documented functions and objects
`getinverse(cdf,x)`, `linprint(io,cdf,n=2000)`, `logprint(io,cdf,n=2000)`,
`getcdfindex(cdf,x)`
"""
EmpiricalCDF() = EmpiricalCDF(Array(Float64,0))

"""
EmpiricalCDFHi{T <: Real} <: AbstractEmpiricalCDF
Empirical CDF with lower cutoff. That is, keep only the tail.
"""
immutable EmpiricalCDFHi{T <: Real} <: AbstractEmpiricalCDF
lowreject::T # reject counts smaller than this
rejectcounts::Array{Int,1} # how many rejections have we done
xdata::Array{T,1} # death times
end

function EmpiricalCDFHi(lowreject::Real)
cdf = EmpiricalCDFHi(lowreject, Array(Int,1), Array(Float64,0))
cdf.rejectcounts[1] = 0
cdf
end

EmpiricalCDF(lowreject::Real) = EmpiricalCDFHi(lowreject)

# Number of points accepted plus number of points rejected
_total_counts(cdf::EmpiricalCDF) = length(cdf.xdata)
_total_counts(cdf::EmpiricalCDFHi) = length(cdf.xdata) + _rejectcounts(cdf)

# Value of the cdf corresponding to index i
_val_at_index(cdf::EmpiricalCDF, i) = i/_total_counts(cdf)
_val_at_index(cdf::EmpiricalCDFHi, i) = (i+ _rejectcounts(cdf) )/_total_counts(cdf)

_increment_rejectcounts(cdf::EmpiricalCDFHi) = cdf.rejectcounts[1] += 1

_rejectcounts(cdf::EmpiricalCDFHi) = cdf.rejectcounts[1]


Base.push!(cdf::EmpiricalCDF,x) = (push!(cdf.xdata,x) ; cdf)

function _push!(cdf::EmpiricalCDFHi,x)
if x >= cdf.lowreject
push!(cdf.xdata,x)
else
_increment_rejectcounts(cdf)
end
end

function Base.push!(cdf::EmpiricalCDFHi, x)
if isinf(cdf.lowreject)
push!(cdf.xdata,x)
else
_push!(cdf,x)
end
cdf
end

Base.append!(cdf::EmpiricalCDF, times) = (append!(cdf.xdata,times); cdf)

function Base.append!(cdf::EmpiricalCDFHi, times)
if isinf(cdf.lowreject)
append!(cdf.xdata,times)
else
for x in times
_push!(cdf,x)
end
end
cdf
end

for f in ( :length, :sort!, :minimum, :maximum, :mean, :std, :quantile )
@eval begin
Base.$(f)(cdf::AbstractEmpiricalCDF,args...) = $(f)(cdf.xdata,args...)
end
end

for f in (:mle, :KSstatistic, :mleKS, :scanmle)
@eval begin
$(f)(cdf::AbstractEmpiricalCDF,args...) = $(f)(cdf.xdata,args...)
end
end

function _inverse(cdf::EmpiricalCDF, x)
ind = floor(Int, _total_counts(cdf)*x) + 1
cdf.xdata[ind]
end

function _inverse(cdf::EmpiricalCDFHi, x)
ind = floor(Int, _total_counts(cdf)*x) + 1 - _rejectcounts(cdf)
cdf.xdata[ind]
end

Base.rand(cdf::EmpiricalCDF) = _inverse(cdf,rand())

"`getinverse(cdf::EmpiricalCDF,x)` return the value of the functional inverse of `cdf` at the point `x`."
function getinverse(cdf::EmpiricalCDF,x)
(x < 0 || x >= 1) && throw(DomainError())
_inverse(cdf,x)
end

function getinverse(cdf::EmpiricalCDFHi,x)
(x < cdf.lowreject || x >= 1) && throw(DomainError())
_inverse(cdf,x)
end

getcdfindex(cdf::AbstractEmpiricalCDF, x::Real) = searchsortedlast(cdf.xdata, x)

#Base.getindex(cdf::EmpiricalCDF, x::Real) = searchsortedlast(cdf.xdata, x) / length(cdf.xdata)
Base.getindex(cdf::AbstractEmpiricalCDF, x::Real) = _val_at_index(cdf, getcdfindex(cdf, x))

# With several tests, this is about the same speed or faster than the routine borrowed from StatsBase
function Base.getindex{T <: Real}(cdf::AbstractEmpiricalCDF, v::AbstractArray{T})in
r = Array(eltype(cdf.xdata), size(v)...)
for (i,x) in enumerate(v)
r[i] = cdf[x]
end
r
end

function Base.show(io::IO, cdf::EmpiricalCDF)
print(io, string(typeof(cdf)))
print(io, "(n=")
print(io,length(cdf))
print(io,')')
end

function Base.show(io::IO, cdf::EmpiricalCDFHi)
print(io, string(typeof(cdf)))
print(io, "(n=")
print(io,length(cdf))
print(io,",lowreject=")
print(io,cdf.lowreject)
print(io,')')
end

function Base.print(cdf::AbstractEmpiricalCDF,fn::String)
io = open(fn,"w")
print(io,cdf)
close(io)
end

Base.print(io::IOStream, cdf::AbstractEmpiricalCDF) = logprint(io,cdf)

"`linprint(io::IO ,cdf::EmpiricalCDF, n=2000)` print (not more than) `n` linearly spaced points after sorting the data."
linprint(io::IOStream, cdf::AbstractEmpiricalCDF) = _printcdf(io,cdf,false, 2000)
linprint(io::IOStream, cdf::AbstractEmpiricalCDF, nprint_pts::Int) = _printcdf(io,cdf,false, nprint_pts)

"`logprint(io::IO, cdf::EmpiricalCDF, n=2000)` print (not more than) `n` log spaced points after sorting the data."
logprint(io::IOStream, cdf::AbstractEmpiricalCDF, nprint_pts::Int) = _printcdf(io,cdf,true, nprint_pts)
logprint(io::IOStream, cdf::AbstractEmpiricalCDF) = _printcdf(io,cdf,true, 2000)

Base.print(io::IOStream, cdf::AbstractEmpiricalCDF, nprint_pts::Int) = logprint(io,cdf,nprint_pts)

Base.print(io::IOStream, cdf::AbstractEmpiricalCDF, prpts::AbstractArray) = _printcdf(io,cdf,prpts)

function Base.print(cdf::AbstractEmpiricalCDF, prpts::AbstractArray, fname::String)
io = open(fname,"w")
print(io,cdf,prpts)
close(io)
end

function linprint(cdf::AbstractEmpiricalCDF,fn::String)
io = open(fn,"w")
linprint(io,cdf)
close(io)
end

function logprint(cdf::AbstractEmpiricalCDF,fn::String)
io = open(fn,"w")
logprint(io,cdf)
close(io)
end

# print many fields, prpts is iterable
function _printfull(io, cdf::AbstractEmpiricalCDF, prpts)
n = length(cdf)
println(io, "# log10(t) log10(P(t)) log10(1-P(t)) t 1-P(t) P(t)")
state = start(prpts)
(p,state) = next(prpts,state)
for i in 1:n-1 # don't print ordinate value of 1
xp = cdf.xdata[i]
if done(prpts,state)
break
end
if xp >= p
(p,state) = next(prpts,state)
cdf_val = _val_at_index(cdf,i)
@printf(io,"%e\t%e\t%e\t%e\t%e\t%e\n", log10(abs(xp)), log10(abs(1-cdf_val)), log10(abs(cdf_val)), xp, cdf_val, 1-cdf_val)
end
end
end

function _printextraheader(io,cdf::AbstractEmpiricalCDF)
end
function _printextraheader(io,cdf::EmpiricalCDFHi)
nreject = _rejectcounts(cdf)
n = length(cdf)
println(io, "# cdf: lowreject = ", cdf.lowreject)
println(io, "# cdf: lowreject counts = ", nreject)
@printf(io, "# cdf: fraction kept = %f\n", n/(n+nreject))
end

# Note that we sort in place
function _printcdf(io::IOStream, cdf::AbstractEmpiricalCDF, logprint::Bool, nprint_pts::Integer)
if length(cdf) == 0
error("Trying to print empty cdf")
end
x = cdf.xdata
sort!(x)
xmin = x[1]
xmax = x[end]
local prpts
if logprint && xmin > 0
println(io, "# cdf: log spacing of coordinate")
prpts = logspace(log10(xmin),log10(xmax), nprint_pts)
else
println(io, "# cdf: linear spacing of coordinate")
prpts = linspace(xmin,xmax, nprint_pts)
end
_printcdf(io, cdf, prpts)
end

# FIXME We need to avoid resorting
function _printcdf(io::IOStream, cdf::AbstractEmpiricalCDF, prpts::AbstractArray)
x = cdf.xdata
sort!(x)
n = length(x)
if n == 0
error("Trying to print empty cdf")
end
println(io, "# cdf of survival times")
println(io, "# cdf: total counts = ", _total_counts(cdf))
_printextraheader(io,cdf)
println(io, "# cdf: number of points in cdf: $n")
_printfull(io,cdf,prpts)
end

include("cdfs.jl")
include("io.jl")
include("mle.jl")

Expand Down

0 comments on commit be051ec

Please sign in to comment.