# BAT.jl: Background + Signal Analysis 

In [None]:
using Random, LinearAlgebra, Statistics, Distributions, StatsBase
using EponymTuples
using BAT, IntervalSets
using ValueShapes
using Plots
pyplot()

## Generate data:

In [None]:
bins=0:0.5:30
N_background = 1500
λ = 10

data_background = rand(Truncated(Exponential(λ), 0, 30), 1500)
hist_background = append!(Histogram(bins), data_background);

In [None]:
N_signal = 100
N_s_background = 1400
μ_true = 15.0
σ_true = 0.7

data_signal =  vcat(
    rand(Truncated(Exponential(λ), 0, 30), N_s_background),
    rand(Normal( μ_true, σ_true), N_signal)
)

hist_signal = append!(Histogram(bins), data_signal);

In [None]:
plot(normalize(hist_signal, mode=:density), alpha=0.5, color=:red, label="Signal Data")
plot!(normalize(hist_background, mode=:density),  seriestype = :steps, lw=2.5, alpha=0.9, color=:black, label="Background Data")
plot!(xlabel = "x", ylabel="P(x)", title="Data for Analysis")


## Define model functions: 

In [None]:
function background_model_1(@eponymargs(A), x::Real)
    A[1] + A[2]*x + A[3]*x^2
end

function background_model_2(@eponymargs(A), x::Real)
    A[1] + (1/A[2])*exp(-x/A[3])
end

function background_signal_model(@eponymargs(A, D, x_0, G), x::Real)
    A[1] + (1/A[2])*exp(-x/A[3]) + D[1]/((x - x_0[1])^2+G[1]^2)
end

## Define likelihood function with Poisson noise: 

In [None]:
struct HistogramLikelihood{H<:Histogram,F<:Function} <: AbstractDensity
    histogram::H
    fitfunc::F
end

In [None]:
function BAT.density_logval(
    likelihood::HistogramLikelihood,
    params::Union{NamedTuple,AbstractVector{<:Real}}
)
    # Histogram counts for each bin as an array:
    counts = likelihood.histogram.weights

    # Histogram binning, has length (length(counts) + 1):
    binning = likelihood.histogram.edges[1]
    
    log_likelihood::Float64 = 0.0
    for i in eachindex(counts)
        bin_left, bin_right = binning[i], binning[i+1]
        bin_width = bin_right - bin_left
        bin_center = (bin_right + bin_left) / 2
        observed_counts = counts[i]
        expected_counts = bin_width * likelihood.fitfunc(params, bin_center)
        if expected_counts > 0 
            log_likelihood += logpdf(Poisson(expected_counts), observed_counts)
        else
           log_likelihood += -Inf
        end 
    end

    return log_likelihood
end

In [None]:
algorithm = MetropolisHastings(MvTDistProposalSpec(1.0))
rngseed = BAT.Philox4xSeed()
nsamples = 5*10^5
max_nsteps = 5*10^5
nchains = 8

tuner_config = ProposalCovTunerConfig(
    λ = 0.5,
    α = 0.15..0.35,
    β = 1.5,
    c = 1e-4..1e2
)

convergence_test = BGConvergence(
    threshold = 1.1,
    corrected = false
)

init_strategy = MCMCInitStrategy(
    ninit_tries_per_chain = 8..128,
    max_nsamples_pretune = 25,
    max_nsteps_pretune = 250,
    max_time_pretune = Inf
)

burnin_strategy = MCMCBurninStrategy(
    max_nsamples_per_cycle = 10000,
    max_nsteps_per_cycle = 20000,
    max_time_per_cycle = Inf,
    max_ncycles = 100
);

ENV["JULIA_INFO"] = "BAT"

## Background model #1:  

In [None]:
likelihood_bm1 = HistogramLikelihood(hist_background, background_model_1);

In [None]:
prior_bm1 = NamedPrior(
    A = [-1000.0 .. 1000.0, -1000.0 .. 1000.0, -1000.0 .. 1000.0],
);

In [None]:
parshapes_bm1 = VarShapes(prior_bm1)
posterior_bm1 = PosteriorDensity(likelihood_bm1, prior_bm1);
chainspec_bm1 = MCMCSpec(algorithm, posterior_bm1, rngseed);

In [None]:
samples_bm1, sampleids_bm1, stats_bm1, chains_bm1 = rand(
    chainspec_bm1,
    nsamples,
    nchains,
    tuner_config = tuner_config,
    convergence_test = convergence_test,
    init_strategy = init_strategy,
    burnin_strategy = burnin_strategy,
    max_nsteps = max_nsteps,
    max_time = Inf,
    granularity = 1
);

In [None]:
println("Mode: $(stats_bm1.mode)")
println("Mean: $(stats_bm1.param_stats.mean)")
println("Covariance: $(stats_bm1.param_stats.cov)")

In [None]:
mode_parms_bm1 = parshapes_bm1(stats_bm1.mode)

In [None]:
plot(samples_bm1)

In [None]:
plot(normalize(hist_background, mode=:density),  seriestype = :steps, lw=2.5, alpha=0.9, color=:black, label="Background Data")
plot!(xlabel = "x", ylabel="P(x)", title="Background Analysis")

plot!(
    bins, x -> background_model_1(mode_parms_bm1, x),
    label = "Background Model 1", lw=1.5, color=2
)


In [None]:
data_bm1 = HMIData(samples_bm1)
        
hm_integrate!(data_bm1)

## Background model #2:  

In [None]:
likelihood_bm2 = HistogramLikelihood(hist_background, background_model_2)

In [None]:
prior_bm2 = NamedPrior(
    A = [-25.0 .. 10.0, -1.0 .. 1.0, 0.0 .. 20.0],
);


In [None]:
parshapes_bm2 = VarShapes(prior_bm2)
posterior_bm2 = PosteriorDensity(likelihood_bm2, prior_bm2)
chainspec_bm2 = MCMCSpec(algorithm, posterior_bm2, rngseed);

In [None]:
samples_bm2, sampleids_bm2, stats_bm2, chains_bm2 = rand(
    chainspec_bm2,
    nsamples,
    nchains,
    tuner_config = tuner_config,
    convergence_test = convergence_test,
    init_strategy = init_strategy,
    burnin_strategy = burnin_strategy,
    max_nsteps = max_nsteps,
    max_time = Inf,
    granularity = 1
);

In [None]:
println("Mode: $(stats_bm2.mode)")
println("Mean: $(stats_bm2.param_stats.mean)")
println("Covariance: $(stats_bm2.param_stats.cov)")

In [None]:
mode_parms_bm2 = parshapes_bm2(stats_bm2.mode)

In [None]:
plot(samples_bm2, params=[1,2,3])

In [None]:
plot(normalize(hist_background, mode=:density),  seriestype = :steps, lw=2.5, alpha=0.9, color=:black, label="Background Data")
plot!(xlabel = "x", ylabel="P(x)", title="Background Analysis")

plot!(
    bins, x -> background_model_2(mode_parms_bm2, x),
    label = "Background Model 2", lw=1.5, color=1
)


In [None]:
data_bm2 = HMIData(samples_bm2)
        
hm_integrate!(data_bm2)

## Comparison of background models: 

In [None]:
tot_vol_bm1 = data_bm1.integralestimates["analytic result"].final.estimate # Z 
tot_vol_bm2 = data_bm2.integralestimates["analytic result"].final.estimate;

In [None]:
@show tot_vol_bm2/tot_vol_bm1;

In [None]:
plot(normalize(hist_background, mode=:density),  seriestype = :steps, lw=2.5, alpha=0.9, color=:black, label="Background Data")
plot!(xlabel = "x", ylabel="P(x)", title="Background Analysis")

plot!(
    bins, x -> background_model_1(mode_parms_bm1, x),
    label = "Background Model 1", lw=1.5, color=2
)

plot!(
    bins, x -> background_model_2(mode_parms_bm2, x),
    label = "Background Model 2", lw=1.5, color=1
)


## Use histogram prior: 

In [None]:
samples = hcat(samples_bm2.params...);

A_1 = Histogram(-25.0:0.5:10.0)
A_2 = Histogram(0.005:0.00005:0.0075)
A_3 = Histogram(5:0.3:17)

append!(A_1, samples[1,:])
append!(A_2, samples[2,:])
append!(A_3, samples[3,:])

## Signal and background: 

In [None]:
likelihood_signal = HistogramLikelihood(hist_signal, background_signal_model);

prior_signal = NamedPrior(
    A = [BAT.HistogramAsUvDistribution(A_1), BAT.HistogramAsUvDistribution(A_2), BAT.HistogramAsUvDistribution(A_3)],
    D = [0.0..200], 
    x_0 = [13 .. 17], 
    G = [0.0 .. 5.0]
);


parshapes_signal = VarShapes(prior_signal)
posterior_signal = PosteriorDensity(likelihood_signal, prior_signal);
chainspec_signal = MCMCSpec(algorithm, posterior_signal, rngseed);

In [None]:
samples_signal, sampleids_signal, stats_signal, chains_signal = rand(
    chainspec_signal,
    nsamples,
    nchains,
    tuner_config = tuner_config,
    convergence_test = convergence_test,
    init_strategy = init_strategy,
    burnin_strategy = burnin_strategy,
    max_nsteps = max_nsteps,
    max_time = Inf,
    granularity = 1
);

In [None]:
plot(samples_signal, params=[1,2,3])

In [None]:
plot(samples_signal, params=[4,5,6])

In [None]:
mode_parms_signal = parshapes_signal(stats_signal.mode)

In [None]:
plot(normalize(hist_signal, mode=:density), alpha=0.5, color=:red, label="Signal Data")
plot!(normalize(hist_background, mode=:density),  seriestype = :steps, lw=2.5, alpha=0.5, color=:black, label="Background Data")
plot!(xlabel = "x", ylabel="P(x)")

plot!(
    bins, x -> background_model_2(mode_parms_bm2, x),
    label = "Background Model 2", lw=2, color=1
)

plot!(
    bins, x -> background_signal_model(mode_parms_signal, x),
    label = "Signal Model", lw=2, color=6
)



In [None]:
data_sig = HMIData(samples_signal)

hm_integrate!(data_sig)

## Signal vs. no signal: 

In [None]:
likelihood_nosignal = HistogramLikelihood(hist_signal, background_model_2);

prior_nosignal = NamedPrior(
    A = [-10.0 .. 10.0, -1.0 .. 1.0, 0.0 .. 30.0],
);


parshapes_nosignal = VarShapes(prior_nosignal)
posterior_nosignal = PosteriorDensity(likelihood_nosignal, prior_nosignal);
chainspec_nosignal = MCMCSpec(algorithm, posterior_nosignal, rngseed);

In [None]:
samples_nosignal, sampleids_nosignal, stats_nosignal, chains_nosignal = rand(
    chainspec_nosignal,
    nsamples,
    nchains,
    tuner_config = tuner_config,
    convergence_test = convergence_test,
    init_strategy = init_strategy,
    burnin_strategy = burnin_strategy,
    max_nsteps = max_nsteps,
    max_time = Inf,
    granularity = 1
);

In [None]:
mode_parms_nosignal = parshapes_nosignal(stats_nosignal.mode)

In [None]:
plot(normalize(hist_signal, mode=:density), alpha=0.5, color=:red, label="Signal Data")
plot!(xlabel = "x", ylabel="P(x)")

plot!(
    bins, x -> background_model_2(mode_parms_nosignal, x),
    label = "No signal assumption", lw=2, color=3
)

plot!(
    bins, x -> background_signal_model(mode_parms_signal, x),
    label = "Signal assumption", lw=2, color=6
)



In [None]:
data_nosign = HMIData(samples_nosignal)
        
hm_integrate!(data_nosign)

In [None]:
post_signal = data_sig.integralestimates["analytic result"].final.estimate
post_nosign = data_nosign.integralestimates["analytic result"].final.estimate;

@show post_signal/post_nosign;