# Сопряженное распределение к нормальному

Сопряженное априорное распределение для нормальной функции правдоподобия:

$$
P(model | data) = \frac{ P(data | model) P(model) }{P(data)} \propto P(data | model) P(model)
$$

$$
P(data | model) = P(x | \mu, \sigma_x) = 
\frac{1}{\sqrt{2 \pi \sigma_x^2}} e^{-\dfrac{(x - \mu)^2}{2 \sigma_x^2}}
$$

Один параметр: $\mu$ меняется, $\sigma$ фиксировано.  
Сопряженное априорное распределение - нормальное распределение.

$$
P(model) = Norm(\mu | \mu_0, \sigma_0) = 
\frac{1}{\sqrt{2 \pi \sigma_{0}^2}} e^{- \dfrac{(\mu-\mu_0)^2}{2 \sigma_{0}^2}} 
$$

$$
\begin{split}
P(model | data) 
& \propto
\frac{1}{\sqrt{2 \pi \sigma_x^2}} e^{-\dfrac{(x - \mu)^2}{2 \sigma_x^2}}
\frac{1}{\sqrt{2 \pi \sigma_{0}^2}} e^{- \dfrac{(\mu-\mu_0)^2}{2 \sigma_{0}^2}} 
& \propto
e^{-\dfrac{(\mu - \mu_1)^2}{2 \sigma_1^2}}
= Norm(\mu | \mu_1, \sigma_1)
\end{split}
$$

$$
\dfrac{x^2 - 2 \mu x + \mu^2}{2 \sigma_x^2} + \dfrac{\mu^2 - 2\mu \mu_0 + \mu_0^2}{2 \sigma_{0}^2}
=
\mu^2 \left[ \frac{1}{2 \sigma_x^2} + \frac{1}{2 \sigma_0^2} \right] -
2 \mu \left[ \frac{x}{2 \sigma_x^2} + \frac{\mu_0}{2 \sigma_{0}^2} \right]  + C
=
\dfrac{\mu^2 - 2 \mu \mu_1 + \mu_1^2}{2 \sigma_1^2} - \frac{\mu_1^2}{2 \sigma_1^2} + C
$$

$$
\frac{1}{2 \sigma_1^2} = \left[ \frac{1}{2 \sigma_x^2} + \frac{1}{2 \sigma_0^2} \right],
\quad
\frac{\mu_1}{2 \sigma_1^2} = \left[ \frac{x}{2 \sigma_x^2} + \frac{\mu_0}{2 \sigma_{0}^2} \right]
\\
\sigma_1^2 = \frac{\sigma_0^2 \sigma_x^2}{\sigma_0^2 + \sigma_x^2},
\quad
\mu_1 = \frac{x \sigma_0^2 + \mu_0 \sigma_x^2}{\sigma_0^2 + \sigma_x^2}
$$

Для $N$ точек:

$$
\begin{split}
P(model | data) 
& \propto
\prod_i^N
\frac{1}{\sqrt{2 \pi \sigma_x^2}} e^{-\dfrac{(x_i - \mu)^2}{2 \sigma_x^2}}
\frac{1}{\sqrt{2 \pi \sigma_{0}^2}} e^{- \dfrac{(\mu-\mu_0)^2}{2 \sigma_{0}^2}} 
& \propto
e^{-\dfrac{(\mu - \mu_N)^2}{2 \sigma_N^2}}
= Norm(\mu | \mu_N, \sigma_N)
\end{split}
$$

$$
\dfrac{\mu^2 - 2\mu \mu_0 + \mu_0^2}{2 \sigma_{0}^2} + \sum_i^N \dfrac{x_i^2 - 2 \mu x_i + \mu^2}{2 \sigma_x^2}
=
\mu^2 \left[ \frac{1}{2 \sigma_0^2} + \frac{N}{2 \sigma_x^2} \right] -
2 \mu \left[ \frac{\mu_0}{2 \sigma_{0}^2} + \sum_i^N \frac{x_i}{2 \sigma_x^2} \right]  + C
=
\dfrac{\mu^2 - 2 \mu \mu_N + \mu_N^2}{2 \sigma_N^2} - \frac{\mu_N^2}{2 \sigma_N^2} + C
$$

$$
\frac{1}{2 \sigma_N^2} = \left[ \frac{1}{2 \sigma_0^2} + \frac{N}{2 \sigma_x^2} \right],
\quad
\frac{\mu_N}{2 \sigma_N^2} = \left[ \frac{\mu_0}{2 \sigma_{0}^2} + \sum_i^N \frac{x_i}{2 \sigma_x^2} \right]
\\
\sigma_N^2 = \frac{\sigma_0^2 \sigma_x^2}{\sigma_x^2 + N \sigma_0^2},
\quad
\mu_N = \frac{\mu_0 \sigma_x^2 + \sigma_0^2 \sum_i^N x_i}{\sigma_x^2 + N \sigma_0^2 }
= \mu_0 \frac{\sigma_N^2}{\sigma_0^2} + \frac{\sigma_N^2}{\sigma_x^2} \sum_i^N x_i
$$

In [None]:
ConjugateNormalParams = namedtuple('ConjugateNormalParams', 'mu sigma sx')

def initial_params_normal(mu, sigma, sx):
    return ConjugateNormalParams(mu=mu, sigma=sigma, sx=sx)

def posterior_params_normal(data, initial_pars):
    N = len(data)
    sigma_n_2 = (initial_pars.sigma**2 * initial_pars.sx**2) / (initial_pars.sx**2 + N * initial_pars.sigma**2)
    mu_n = initial_pars.mu * sigma_n_2 / initial_pars.sigma**2 + np.sum(data) * sigma_n_2 / initial_pars.sx**2    
    return ConjugateNormalParams(mu=mu_n, sigma=np.sqrt(sigma_n_2), sx=initial_pars.sx)

def posterior_mu_dist(params):
    return stats.norm(loc=params.mu, scale=params.sigma)

def posterior_rvs(params, nsamp):
    mus = stats.norm.rvs(loc=params.mu, scale=params.sigma, size=nsamp)
    return stats.norm.rvs(loc=mus, scale=params.sx, size=nsamp)
    
# def posterior_binom_approx_95pdi(post_dist):
#     lower = post_dist.ppf(0.025)
#     upper = post_dist.ppf(0.975)
#     return lower, upper

# def prob_pb_gt_pa(post_dist_A, post_dist_B, post_samp=100_000):
#     sa = post_dist_A.rvs(size=post_samp)
#     sb = post_dist_B.rvs(size=post_samp)
#     b_gt_a = np.sum(sb > sa)
#     return b_gt_a / post_samp

mu = 3
sigma = 1
nsample = 1000

exact_dist = stats.norm(loc=mu, scale=sigma)
data = exact_dist.rvs(nsample)

# todo: avoid setting from data
sx = np.std(data)
mu0 = data[0]
sigma0 = sx

pars = initial_params_normal(mu=mu0, sigma=sigma0, sx=sx)
pars = posterior_params_normal(data[1:], pars)

post_mu = posterior_mu_dist(pars)

npostsamp = 10000
post_samp = posterior_rvs(pars, npostsamp)

x = np.linspace(0, 10, 1000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=exact_dist.pdf(x), line_color='black', name='Точное'))
fig.add_trace(go.Scatter(x=x, y=post_mu.pdf(x), line_color='blue', name='$\mbox{Оценка }\mu$'))
fig.add_trace(go.Scatter(x=[np.sum(data)/len(data), np.sum(data)/len(data)], y=[0, max(post_mu.pdf(x))], 
                         line_color='black', mode='lines', line_dash='dash', name='Среднее в выборке'))
fig.add_trace(go.Scatter(x=[exact_dist.mean(), exact_dist.mean()], y=[0, max(post_mu.pdf(x))*1.05], 
                         line_color='red', mode='lines', line_dash='dash', name='Точное среднее'))
fig.add_trace(go.Histogram(x=post_samp, histnorm='probability density', name='Апострериорное', nbinsx=100,
                           marker_color='green', opacity=0.5))
fig.update_layout(title='Апостериорное распределение',
                  xaxis_title='$\mu$',
                  yaxis_title='Плотность вероятности',
                  #xaxis_range=[0, 10],
                  barmode='overlay',
                  hovermode="x",
                  height=500)                  
fig.show()

Меняется среднее и дисперсия. Среднее за счет $\mu$, дисперсия за счет $\lambda$.  

$$
P(data | model) = N(x ; \mu, \frac{\sigma_0^2}{\lambda}) = 
\left( \frac{\lambda}{2 \pi \sigma_0^2} \right)^{1/2} e^{- \tfrac{\lambda}{2 \sigma_0^2}(x - \mu)^2}.
$$

Сопряженное априорное распределение - произведение нормального и гамма-распределений.

$$
P(model) = 
Norm(\mu | \lambda; \mu_0, k_0, \sigma_0) Gamma(\lambda; a, b) = 
\left(\frac{\lambda k_0}{2 \pi \sigma_0^2}\right)^{1/2} e^{- \tfrac{\lambda k_0}{2 \sigma_0^2} (\mu-\mu_0)^2}
\frac{b^a}{\Gamma(a)} \lambda^{a-1} e^{-b \lambda}, \quad \lambda>0, \quad a,b>0 .
$$


$$
\begin{split}
P(model | data) 
& \propto
P(data | model) P(model) 
\\
& \propto
\left( \frac{\lambda}{2 \pi \sigma_0^2} \right)^{1/2} e^{- \tfrac{\lambda}{2 \sigma_0^2} (x-\mu)^2}
\left(\frac{\lambda k_0}{2 \pi \sigma_0^2}\right)^{1/2} e^{- \tfrac{\lambda k_0}{2 \sigma_0^2} (\mu-\mu_0)^2}
\frac{b^a}{\Gamma(a)} \lambda^{a-1} e^{-b \lambda}
\\
& \propto
\lambda^{1/2} e^{- \tfrac{\lambda k_1}{2 \sigma_0^2} (\mu-\mu_1)^2}
\lambda^{a_1-1} e^{-b_1 \tau}
\\
& = Norm(\mu | \lambda; \mu_1, k_1, \sigma_0) Gamma(\lambda; a_1, b_1)
\end{split}
$$

$$
a_1 = a + 1/2
$$

$$
\frac{1}{2 \sigma_0^2}(x-\mu)^2 + \frac{k_0}{2 \sigma_0^2} (\mu-\mu_0)^2 + b =
\\
\frac{1}{2 \sigma_0^2} \left( \mu^2 (k_0 + 1) - 2 \mu (x + k_0 \mu_0) + x^2 + k_0 \mu_0^2 \right) + b = 
\\
\frac{1}{2 \sigma_0^2} \left(
(k_0 + 1) \left[ \mu^2 - 2 \mu \frac{x + k_0 \mu_0}{k_0 + 1} + \left(\frac{x + k_0 \mu_0}{k_0 + 1}\right)^2 \right]
- \frac{(x + k_0 \mu_0)^2}{k_0 + 1} + x^2 + k_0 \mu_0^2 \right) + b
\\
= \frac{k_1}{2 \sigma_0^2} (\mu-\mu_1)^2 + b_1
$$

$$
k_1 = k_0 + 1
\\
\mu_1 = \frac{x + k_0 \mu_0}{k_0 + 1}
\\
b_1 = b + \frac{1}{2 \sigma_0^2} \left( x^2 + k_0 \mu_0^2 - \frac{(x + k_0 \mu_0)^2}{k_0 + 1} \right)
\\
= b + \frac{1}{2 \sigma_0^2} \left( k_0 \mu_0^2 - k_1 \mu_1^2 + x^2 \right)
$$

$$
b_1 = b + \frac{1}{2 \sigma_0^2} \frac{x^2 k_0 + x^2 + k_0^2 \mu_0^2 + k_0 \mu_0^2 - x^2 - 2xk_0\mu_0 - k_0^2 \mu_0^2}{k_0+1}
= b + \frac{1}{2 \sigma_0^2} \frac{x^2 k_0 + k_0 \mu_0^2 - 2xk_0\mu_0}{k_0+1}
\\
= b + \frac{1}{2 \sigma_0^2} \frac{k_0}{k_0+1}(x - \mu_0)^2
$$

Для $N$ точек:

$$
\begin{split}
P(model | data) 
& \propto
P(data | model) P(model) 
\\
& \propto
\prod_i^N \left(\frac{\lambda}{2 \pi \sigma_0^2}\right)^{1/2} e^{- \tfrac{\lambda}{2 \sigma_0^2} (x_i-\mu)^2}
\left(\frac{\lambda k_0}{2 \pi \sigma_0^2}\right)^{1/2} e^{- \tfrac{\lambda k_0}{2 \sigma_0^2} (\mu-\mu_0)^2}
\frac{b^a}{\Gamma(a)} \lambda^{a-1} e^{-b \lambda}
\\
& \propto
\lambda^{1/2} e^{- \tfrac{\lambda k_N}{2 \sigma_0^2} (\mu-\mu_N)^2}
\lambda^{a_N-1} e^{-b_N \lambda}
\\
& = Norm(\mu | \lambda; \mu_N, k_N, \sigma_0) Gamma(\lambda; a_N, b_N)
\end{split}
$$

$$
a_N = a + N/2
$$

$$
\frac{\sum_i^N (x_i - \mu)^2}{2 \sigma_0^2} + \frac{k_0}{2 \sigma_0^2} (\mu-\mu_0)^2 + b =
\\
\frac{1}{2 \sigma_0^2} 
\left( \mu^2 (k_0 + N) - 2 \mu (k_0 \mu_0 + \sum_i^N x_i) + \sum_i^N x_i^2 + k_0 \mu_0^2 \right) + b = 
\\
\frac{1}{2 \sigma_0^2} 
\left( 
(k_0 + N) \left[ \mu^2 - 2 \mu \frac{k_0 \mu_0 + \sum_i^N x_i}{k_0 + N} + 
\left(\frac{k_0 \mu_0 + \sum_i^N x_i}{k_0 + N}\right)^2 \right]
- \frac{(k_0 \mu_0 + \sum_i^N x_i)^2}{k_0 + N} + \sum_i^N x_i^2 + k_0 \mu_0^2 \right) + b
\\
= \frac{k_N}{2 \sigma_0^2} (\mu-\mu_N)^2 + b_N
$$

$$
k_N = k_0 + N
\\
\mu_N = \frac{k_0 \mu_0 + \sum_i^N x_i}{k_0 + N}
\\
b_N = b + \frac{1}{2 \sigma_0^2} 
\left( \sum_i^N x_i^2 + k_0 \mu_0^2 - \frac{(k_0 \mu_0 + \sum_i^N x_i)^2}{k_0 + N} \right)
\\
b_N = b + \frac{1}{2 \sigma_0^2} 
\left( k_0 \mu_0^2 - k_N \mu_N^2 + \sum_i^N x_i^2 \right)
$$

Маржинальные распределения

По $\lambda$ гамма-распределение

$$
P(\lambda | data) = Gamma(\lambda; a_N, b_N)
$$

По $\mu$ $t$-распределение  
https://en.wikipedia.org/wiki/Student%27s_t-distribution#Location-scale_t_distribution  

$$
\begin{split}
P(\mu | data) & = \int d\lambda P(\mu, \lambda | data)
\\
& = \int d\lambda Norm(\mu | \lambda; \mu_N, k_N, \sigma_0) Gamma(\lambda; a_N, b_N)
\\
& \propto_{\mu}
\int d\lambda 
\lambda^{1/2} e^{- \frac{\lambda k_N}{2 \sigma_0^2} (\mu-\mu_N)^2}
\lambda^{a_N-1} e^{-b_N \lambda}
\\
& \propto_{\mu}
\int d\lambda
\lambda^{a_N + 1/2 -1} e^{-\lambda (b_N + \tfrac{k_N}{2 \sigma_0^2} (\mu-\mu_N)^2)}
\\
& \propto_{\mu}
\frac{\Gamma(a_N + 1/2)}{(b_N + \tfrac{k_N}{2 \sigma_0^2} (\mu-\mu_N)^2)^{a_N + 1/2}}
\\
& \propto_{\mu}
\left(1 + \frac{1}{2 a_N} \frac{2 a_N k_N}{2 \sigma_0^2 b_N} (\mu-\mu_N)^2\right)^{-(2 a_N + 1)/2}
\\
& = lst \left( \mu; \nu = 2 a_N, \mu_t = \mu_N, \tau= \left( \frac{b_N \sigma_0^2}{a_N k_N} \right)^{1/2} \right)
\end{split}
$$

In [None]:
ConjugateNormalParamsLmd = namedtuple('ConjugateNormalParamsLmd', 'mu k a b s0')

def initial_params_normal_lmd(mu, k, a, b, s0):
    return ConjugateNormalParamsLmd(mu=mu, k=k, a=a, b=b, s0=s0)

def posterior_params_normal_lmd(data, initial_pars):
    N = len(data)
    k_n = initial_pars.k + N
    a_n = initial_pars.a + N / 2
    mu_n = (initial_pars.mu * initial_pars.k + np.sum(data)) / k_n
    b_n = initial_pars.b + 1/(2 * initial_pars.s0**2) * (initial_pars.k * initial_pars.mu**2 - k_n * mu_n**2 + np.dot(data, data))
    return ConjugateNormalParamsLmd(mu=mu_n, k=k_n, a=a_n, b=b_n, s0=initial_pars.s0)

def posterior_normal_lmd_rvs(params, nsamp):
    lmd = stats.gamma.rvs(a=params.a, scale=1/params.b, size=nsamp)
    sigma_m = params.s0 / np.sqrt(lmd * params.k)
    mu = stats.norm.rvs(loc=params.mu, scale=sigma_m, size=nsamp)
    sigma_x = params.s0 / np.sqrt(lmd)
    x = stats.norm.rvs(loc=mu, scale=sigma_x, size=nsamp)
    return x

def posterior_normal_lmd_marginal_mu_dist(params):
    nu = 2 * params.a
    mu_t = params.mu
    tau = np.sqrt(params.b * params.s0**2 / (params.a * params.k))
    return stats.t(df=nu, loc=mu_t, scale=tau)

loc = 1
scale = 5
nsample = 300

exact_dist_norm = stats.norm(loc=loc, scale=scale)
data_norm = exact_dist_norm.rvs(nsample)

#todo: avoid setting from data
s0 = np.std(data_norm)
pars_norm = initial_params_normal_lmd(mu=1, k=1, a=2, b=1, s0=s0)
pars_norm = posterior_params_normal_lmd(data_norm, pars_norm)

post_samp_normal = posterior_normal_lmd_rvs(pars_norm, nsamp=200000)
postdist_normal_mu = posterior_normal_lmd_marginal_mu_dist(pars_norm)
#postdist_normal_tau = posterior_normal_marginal_tau_dist(pars_norm)
#postdist

x = np.linspace(-20, 20, 20000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=exact_dist_norm.pdf(x), name='Norm'))
fig.add_trace(go.Histogram(x=post_samp_normal, histnorm='probability density', name='Post Samp Hist', nbinsx=1000))
fig.update_layout(title='Normal Distribution',
                  xaxis_title='$x$',
                  yaxis_title='Prob Density',
                  #xaxis_range=[0,10],
                  hovermode="x",
                  height=550)
fig.show()


x = np.linspace(-1, 10, 20000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=postdist_normal_mu.pdf(x), mode='lines', name=f'Posterior Mu'))
fig.add_vline(exact_dist_norm.mean(), name='Original Distribution Mean')
fig.update_layout(title='Mu Distribution',
                  xaxis_title='$x$',
                  yaxis_title='Prob Density',
                  hovermode="x",
                  height=550)
fig.show()

# x = np.linspace(-3, 3, 20000)
# fig = go.Figure()
# fig.add_trace(go.Scatter(x=x, y=postdist_normal_tau.pdf(x), mode='lines', name=f'Posterior Tau'))
# fig.add_vline(1/(2 * exact_dist_norm.std()**2), name='Original Distribution Sigma')
# fig.update_layout(title='Tau Distribution',
#                   xaxis_title='$x$',
#                   yaxis_title='Prob Density',
#                   hovermode="x",
#                   height=550)
# fig.show()

In [None]:
mu = 3
sigma = 1
nsample = 1000
npostsamp = 50000

exact_dist = stats.norm(loc=mu, scale=sigma)
data = exact_dist.rvs(nsample)

# todo: avoid setting from data
sx = np.std(data)
mu0 = data[0]
sigma0 = sx
pars = initial_params_normal(mu=mu0, sigma=sigma0, sx=sx)
pars = posterior_params_normal(data[1:], pars)
post_mu = posterior_mu_dist(pars)
post_samp = posterior_rvs(pars, npostsamp)

# todo: avoid setting from data
s0 = np.std(data)
pars_norm_lmd = initial_params_normal_lmd(mu=1, k=1, a=2, b=1, s0=s0)
pars_norm_lmd = posterior_params_normal_lmd(data, pars_norm_lmd)
postdist_normal_mu_lmd = posterior_normal_lmd_marginal_mu_dist(pars_norm_lmd)
post_samp_lmd = posterior_normal_lmd_rvs(pars_norm_lmd, nsamp=npostsamp)



x = np.linspace(0, 10, 1000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=post_mu.pdf(x), line_color='blue', name='$\mbox{Оценка }\mu$'))
fig.add_trace(go.Scatter(x=x, y=postdist_normal_mu_lmd.pdf(x), line_color='green', name='$\mbox{Оценка }\mu\mbox{ с }\lambda$'))
fig.add_trace(go.Scatter(x=[np.sum(data)/len(data), np.sum(data)/len(data)], y=[0, max(post_mu.pdf(x))], 
                         line_color='black', mode='lines', line_dash='dash', name='Среднее в выборке'))
fig.add_trace(go.Scatter(x=[exact_dist.mean(), exact_dist.mean()], y=[0, max(post_mu.pdf(x))*1.05], 
                         line_color='red', mode='lines', line_dash='dash', name='Точное среднее'))
fig.update_layout(title='$\mbox{Оценка }\mu$',
                  xaxis_title='$\mu$',
                  yaxis_title='Плотность вероятности',
                  #xaxis_range=[0, 10],
                  barmode='overlay',
                  hovermode="x",
                  height=500)                  
fig.show()


fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=exact_dist.pdf(x), line_color='black', name='Точное'))
fig.add_trace(go.Histogram(x=post_samp, histnorm='probability density', name='$\mbox{Апострериорное, }\mu$', nbinsx=300,
                           marker_color='green', opacity=0.2))
fig.add_trace(go.Histogram(x=post_samp_lmd, histnorm='probability density', name='$\mbox{Апострериорное, }\mu, \lambda$', nbinsx=300,
                           marker_color='blue', opacity=0.2))
fig.update_layout(title='Апостериорное распределение',
                  xaxis_title='x',
                  yaxis_title='Плотность вероятности',
                  #xaxis_range=[0, 10],
                  barmode='overlay',
                  hovermode="x",
                  height=500)                  
fig.show()

In [None]:
def reshape_and_compute_means(sample, n_split):
    n_means = len(sample) // n_split
    samp_reshaped = np.reshape(sample[0 : n_means * n_split], (n_means, n_split))
    means = np.array([x.mean() for x in samp_reshaped])
    return means

def exact_clt_dist(exact_dist, n_split):
    clt_mu = exact_dist.mean()
    clt_stdev = exact_dist.std() / np.sqrt(n_split)
    return stats.norm(loc=clt_mu, scale=clt_stdev)

def sample_clt_dist(means):
    clt_mu = means.mean()
    clt_std = means.std()
    return stats.norm(loc=clt_mu, scale=clt_std)

nsample = 30000
npostsamp = 50000

a = 4
b = 2
exact_dist = stats.gamma(a=a, scale=1/b)
data = exact_dist.rvs(nsample)

nsplit = 30
means = reshape_and_compute_means(data, nsplit)
clt_dist_exact = exact_clt_dist(exact_dist, nsplit)
#clt_dist_samp = sample_clt_dist(means)


x = np.linspace(0, 10, 1000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=exact_dist.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='Исходное распределение'))
fig.add_trace(go.Histogram(x=data, histnorm='probability density', name='Выборка', nbinsx=500,
                           marker_color='black', opacity=0.3))
#fig.add_vline(exact_dist.mean(), name='Точное среднее')
fig.add_trace(go.Scatter(x=x, y=clt_dist_exact.pdf(x), 
                         mode='lines', line_color='black', line_dash='dash', name='ЦПТ-распределение'))
fig.add_trace(go.Histogram(x=means, histnorm='probability density', name='Выборочные средние', nbinsx=50,
                           marker_color='green', opacity=0.5))
fig.update_layout(title='Выборочные средние',
                  xaxis_title='x',
                  yaxis_title='Плотность вероятности',
                  barmode='overlay',
                  hovermode="x",
                  height=550)
fig.update_layout(xaxis_range=[0, 5])
fig.show()



# todo: avoid setting from data
sx = np.std(means)
mu0 = means[0]
sigma0 = sx
pars = initial_params_normal(mu=mu0, sigma=sigma0, sx=sx)
pars = posterior_params_normal(means[1:], pars)
post_mu = posterior_mu_dist(pars)
post_samp = posterior_rvs(pars, npostsamp)

# todo: avoid setting from data
s0 = np.std(means)
pars_norm_lmd = initial_params_normal_lmd(mu=1, k=1, a=2, b=1, s0=s0)
pars_norm_lmd = posterior_params_normal_lmd(means, pars_norm_lmd)
postdist_normal_mu_lmd = posterior_normal_lmd_marginal_mu_dist(pars_norm_lmd)
post_samp_lmd = posterior_normal_lmd_rvs(pars_norm_lmd, nsamp=npostsamp)



x = np.linspace(0, 10, 10000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=post_mu.pdf(x), line_color='blue', name='$\mbox{Оценка }\mu$'))
fig.add_trace(go.Scatter(x=x, y=postdist_normal_mu_lmd.pdf(x), line_color='green', name='$\mbox{Оценка }\mu\mbox{ с }\lambda$'))
fig.add_trace(go.Scatter(x=[np.sum(data)/len(data), np.sum(data)/len(data)], y=[0, max(post_mu.pdf(x))], 
                         line_color='black', mode='lines', line_dash='dash', name='Среднее в выборке'))
fig.add_trace(go.Scatter(x=[exact_dist.mean(), exact_dist.mean()], y=[0, max(post_mu.pdf(x))*1.05], 
                         line_color='red', mode='lines', line_dash='dash', name='Точное среднее'))
fig.update_layout(title='$\mbox{Оценка }\mu$',
                  xaxis_title='$\mu$',
                  yaxis_title='Плотность вероятности',
                  #xaxis_range=[0, 10],
                  barmode='overlay',
                  hovermode="x",
                  height=500)                  
fig.show()


fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=exact_dist.pdf(x), line_dash='solid', line_color='black', name='Точное'))
fig.add_trace(go.Histogram(x=post_samp, histnorm='probability density', name='$\mbox{Апострериорное, }\mu$', nbinsx=300,
                           marker_color='green', opacity=0.2))
fig.add_trace(go.Histogram(x=post_samp_lmd, histnorm='probability density', name='$\mbox{Апострериорное, }\mu, \lambda$', nbinsx=300,
                           marker_color='blue', opacity=0.2))
fig.add_trace(go.Scatter(x=x, y=clt_dist_exact.pdf(x), 
                         mode='lines', line_color='black', line_dash='dash', name='ЦПТ-распределение'))
fig.add_trace(go.Histogram(x=means, histnorm='probability density', name='Выборочные средние', nbinsx=100,
                           marker_color='green', opacity=0.5))
fig.update_layout(title='Апостериорное распределение',
                  xaxis_title='x',
                  yaxis_title='Плотность вероятности',
                  #xaxis_range=[0, 10],
                  barmode='overlay',
                  hovermode="x",
                  height=500)                  
fig.show()

In [None]:
nsample = 30000
npostsamp = 50000

c = 3.3
exact_dist = stats.lomax(c=c)
data = exact_dist.rvs(nsample)

nsplit = 30
means = reshape_and_compute_means(data, nsplit)
#clt_dist_exact = exact_clt_dist(exact_dist, nsplit)
clt_dist_samp = sample_clt_dist(means)


xaxis_max=10
x = np.linspace(0, xaxis_max, 2000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=exact_dist.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='Исходное распределение'))
fig.add_trace(go.Histogram(x=data[data < xaxis_max], histnorm='probability density', name='Выборка', nbinsx=500,
                           marker_color='black', opacity=0.3))
#fig.add_vline(exact_dist.mean(), name='Точное среднее')
fig.add_trace(go.Scatter(x=x, y=clt_dist_samp.pdf(x), 
                         mode='lines', line_color='black', line_dash='dash', name='ЦПТ-подобное распределение'))
fig.add_trace(go.Histogram(x=means[means<xaxis_max], histnorm='probability density', name='Выборочные средние', nbinsx=100,
                           marker_color='green', opacity=0.5))
fig.update_layout(title='Выборочные средние',
                  xaxis_title='x',
                  yaxis_title='Плотность вероятности',
                  barmode='overlay',
                  hovermode="x",
                  xaxis_range=[0, xaxis_max],
                  height=550)
fig.update_layout(xaxis_range=[0, 5])
fig.show()



# todo: avoid setting from data
sx = np.std(means)
mu0 = means[0]
sigma0 = sx
pars = initial_params_normal(mu=mu0, sigma=sigma0, sx=sx)
pars = posterior_params_normal(means[1:], pars)
post_mu = posterior_mu_dist(pars)
post_samp = posterior_rvs(pars, npostsamp)

# todo: avoid setting from data
s0 = np.std(means)
pars_norm_lmd = initial_params_normal_lmd(mu=1, k=1, a=2, b=1, s0=s0)
pars_norm_lmd = posterior_params_normal_lmd(means, pars_norm_lmd)
postdist_normal_mu_lmd = posterior_normal_lmd_marginal_mu_dist(pars_norm_lmd)
post_samp_lmd = posterior_normal_lmd_rvs(pars_norm_lmd, nsamp=npostsamp)



x = np.linspace(0, 10, 10000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=post_mu.pdf(x), line_color='blue', name='$\mbox{Оценка }\mu$'))
fig.add_trace(go.Scatter(x=x, y=postdist_normal_mu_lmd.pdf(x), line_color='green', name='$\mbox{Оценка }\mu\mbox{ с }\lambda$'))
fig.add_trace(go.Scatter(x=[np.sum(data)/len(data), np.sum(data)/len(data)], y=[0, max(post_mu.pdf(x))], 
                         line_color='black', mode='lines', line_dash='dash', name='Среднее в выборке'))
fig.add_trace(go.Scatter(x=[exact_dist.mean(), exact_dist.mean()], y=[0, max(post_mu.pdf(x))*1.05], 
                         line_color='red', mode='lines', line_dash='dash', name='Точное среднее'))
fig.update_layout(title='$\mbox{Оценка }\mu$',
                  xaxis_title='$\mu$',
                  yaxis_title='Плотность вероятности',
                  #xaxis_range=[0, 10],
                  barmode='overlay',
                  hovermode="x",
                  height=500)                  
fig.show()


fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=exact_dist.pdf(x), line_dash='solid', line_color='black', name='Точное'))
fig.add_trace(go.Histogram(x=post_samp, histnorm='probability density', name='$\mbox{Апострериорное, }\mu$', nbinsx=300,
                           marker_color='green', opacity=0.2))
fig.add_trace(go.Histogram(x=post_samp_lmd, histnorm='probability density', name='$\mbox{Апострериорное, }\mu, \lambda$', nbinsx=300,
                           marker_color='blue', opacity=0.2))
fig.add_trace(go.Scatter(x=x, y=clt_dist_samp.pdf(x), 
                         mode='lines', line_color='black', line_dash='dash', name='ЦПТ-распределение'))
fig.add_trace(go.Histogram(x=means, histnorm='probability density', name='Выборочные средние', nbinsx=100,
                           marker_color='green', opacity=0.5))
fig.update_layout(title='Апостериорное распределение',
                  xaxis_title='x',
                  yaxis_title='Плотность вероятности',
                  #xaxis_range=[0, 10],
                  barmode='overlay',
                  hovermode="x",
                  height=500)                  
fig.show()

# Берри-Эссеен

In [None]:
import stats

def berry_esseen_n(data, diff=0.05):
    C = 0.5
    s = data.std()
    rho = stats.moment(data, order=3)
    n = int(((C * rho) / (s**3 * diff))**2)
    return n

nsample = 3000

a = 4
b = 2
exact_dist = stats.gamma(a=a, scale=1/b)
data = exact_dist.rvs(nsample)

berry_esseen_n(data)