/
bayesian_gp.jl
127 lines (91 loc) · 4.28 KB
/
bayesian_gp.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""
gpfitbayes(..., niter::Int=5000, warmup::Int=2000)
Generate a sample from the GP parameters' posterior distribution.
Data provided must be the exceedances above the threshold, *i.e.* the data above the threshold minus
the threshold.
# Arguments
- `niter::Int = 5000`: The total number of MCMC iterations
- `warmup::Int = 2000`: The number of warmup iterations (burn-in).
# Implementation
The function uses the No-U-Turn Sampler (NUTS; [Hoffman and Gelman, 2014](http://jmlr.org/papers/v15/hoffman14a.html))
implemented in the [Mamba.jl](https://mambajl.readthedocs.io/en/latest/index.html)
package to generate a random sample from the posterior distribution.
Currently, only the improper uniform prior is implemented, *i.e.*
```math
f_{(β₂,β₃)}(β₂,β₃) ∝ 1,
```
where
```math
ϕ = X₂ × β₂,
```
```math
ξ = X₃ × β₃.
```
In the stationary case, this improper prior yields to a proper posterior if the
sample size is larger than 2 ([Northrop and Attalides, 2016](https://www.jstor.org/stable/24721296?seq=1)).
The covariates are standardized before estimating the parameters to help fit the
model. They are transformed back on their original scales before returning the
fitted model.
See also [`gpfitbayes`](@ref) for the other methods, [`gpfitpwm`](@ref), [`gpfit`](@ref) and [`ThresholdExceedance`](@ref).
# Reference
Hoffman M. D. and Gelman A. (2014). The No-U-Turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo. *Journal of Machine Learning Research*, 15:1593–1623.
Paul J. Northrop P. J. and Attalides N. (2016). Posterior propriety in Bayesian extreme value analyses using reference priors. *Statistica Sinica*, 26:721-743.
"""
function gpfitbayes end
"""
gpfitbayes(y,
logscalecov = Vector{Variable}(),
shapecov = Vector{Variable}(),
niter::Int=5000,
warmup::Int=2000
)
Generate a sample from the GP parameters' posterior distribution.
# Arguments
- `y::Vector{<:Real}`: The vector of exceedances.
- `logscalecov::Vector{<:DataItem} = Vector{Variable}()`: The covariates of the log-scale parameter.
- `shapecov::Vector{<:DataItem} = Vector{Variable}()`: The covariates of the shape parameter.
"""
function gpfitbayes(y::Vector{<:Real};
logscalecov::Vector{<:DataItem} = Vector{Variable}(),
shapecov::Vector{<:DataItem} = Vector{Variable}(),
niter::Int=5000, warmup::Int=2000)::BayesianAbstractExtremeValueModel
logscalecovstd = standardize.(logscalecov)
shapecovstd = standardize.(shapecov)
model = ThresholdExceedance(Variable("y", y), logscalecov = logscalecovstd, shapecov = shapecovstd)
fittedmodel = fitbayes(model, niter=niter, warmup=warmup)
return transform(fittedmodel)
end
"""
gpfitbayes(df::DataFrame,
datacol::Symbol,
logscalecovid = Vector{Symbol}(),
shapecovid = Vector{Symbol}(),
niter::Int=5000,
warmup::Int=2000)
Generate a sample from the GP parameters' posterior distribution.
# Arguments
- `df::DataFrame`: The dataframe containing the data.
- `datacol::Symbol`: The symbol of the column of `df` containing the exceedances.
- `logscalecovid::Vector{Symbol} = Vector{Symbol}()`: The symbols of the columns of `df` containing the covariates of the log-scale parameter.
- `shapecovid::Vector{Symbol} = Vector{Symbol}()`: The symbols of the columns of `df` containing the covariates of the shape parameter.
"""
function gpfitbayes(df::DataFrame, datacol::Symbol;
logscalecovid::Vector{Symbol}=Symbol[],
shapecovid::Vector{Symbol}=Symbol[],
niter::Int=5000, warmup::Int=2000)::BayesianAbstractExtremeValueModel
logscalecovstd = standardize.(buildVariables(df, logscalecovid))
shapecovstd = standardize.(buildVariables(df, shapecovid))
model = ThresholdExceedance(Variable(string(datacol), df[:, datacol]), logscalecov = logscalecovstd, shapecov = shapecovstd)
fittedmodel = fitbayes(model, niter=niter, warmup=warmup)
return transform(fittedmodel)
return fm
end
"""
gpfitbayes(model::ThresholdExceedance;
niter::Int=5000,
warmup::Int=2000)
Generate a sample from the GP parameters' posterior distribution.
"""
function gpfitbayes(model::ThresholdExceedance; niter::Int=5000, warmup::Int=2000)::BayesianAbstractExtremeValueModel
return fitbayes(model, niter=niter, warmup=warmup)
end