In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import plotly.graph_objects as go

from collections import namedtuple

np.random.seed(7)

Распространение ошибок https://en.wikipedia.org/wiki/Propagation_of_uncertainty .  
В статистике называют дельта-методом https://en.wikipedia.org/wiki/Delta_method .

Относительная разность.

В итогах удобно видеть относительную разность

$$
\Delta = \frac{B - A}{A}
$$

For example, given a differentiable function $f(x, y)$ with real values, one can approximate $f(x, y)$ for $(x, y)$ close to $(a, b)$ by the formula 
$$
    f\left(x,y\right)\approx f\left(a,b\right) + \frac{\partial f}{\partial x} \left(a,b\right)\left(x-a\right) + \frac{\partial f}{\partial y} \left(a,b\right)\left(y-b\right).
$$

(вспомнить разложение в ряд функций случайной величины)

$$
\frac{B - A}{A} \approx \frac{\mu_B - \mu_A}{\mu_A} + \frac{1}{\mu_A} (B - \mu_B) - \frac{\mu_B}{\mu_A^2}(A - \mu_A)
\\
E\left[\frac{B - A}{A}\right] \approx E\left[\frac{\mu_B - \mu_A}{\mu_A} + \frac{1}{\mu_A} (B - \mu_B) - \frac{\mu_B}{\mu_A^2}(A - \mu_A)\right] = \frac{\mu_B - \mu_A}{\mu_A}
\\
Var\left[\frac{B - A}{A}\right] \approx Var\left[\frac{\mu_B - \mu_A}{\mu_A} + \frac{1}{\mu_A} (B - \mu_B) - \frac{\mu_B}{\mu_A^2}(A - \mu_A)\right]
\\
Var[\text{const}] = 0
\\
Var(aX + bY) = a^2 Var(X) + b^2 Var(Y) + 2ab \operatorname{Cov}(X,Y)
\\
Var\left[\frac{B - A}{A}\right] \approx \frac{1}{\mu_A^2} \operatorname{Var}{B} + 
\frac{\mu_B^2}{\mu_A^4} \operatorname{Var}{A} + 2 \frac{\mu_B}{\mu_A^3} \operatorname{Cov}(A,B)
\\
Var\left[\frac{B - A}{A}\right] \approx \frac{\mu_B^2}{\mu_A^2} \left(
\frac{\operatorname{Var}{A}}{\mu_A^2} + \frac{\operatorname{Var}{B}}{\mu_B^2} + 
\frac{2 \operatorname{Cov}(A,B)}{\mu_A \mu_B} \right)
$$


Если распределения $p_A$ и $p_B$ нормальные, то распределение $\Delta p$ также можно приближенно считать нормальным. 
Также нужно $\mathrm{Cov}(p_A, p_B) = 0$ и малая плотность вероятности $p_A$ вблизи 0.

$$
P_{p_A}(x) = \text{Norm}(x; \mu_A, \sigma^2_A),
\qquad
P_{p_B}(x) = \text{Norm}(x; \mu_B, \sigma^2_B)
\\
P_{\Delta p}(x) \approx \text{Norm}(x; \mu_{\Delta}, \sigma_{\Delta}^2), 
\quad
\mu_{\Delta} = \frac{\mu_B - \mu_A}{\mu_A},
\quad
\sigma_{\Delta} = \frac{|\mu_B|}{|\mu_A|}
\sqrt{
\frac{\sigma_{A}^{2}}{\mu_A^{2}}
+ \frac{\sigma_{B}^{2}}{\mu_B^{2}}
}
$$

Условие "малая плотность вероятности $p_A$ вблизи 0" может быть нарушено, например, при оценке средней выручки на пользователя с учетом неплатящих. 

In [None]:
mua = 1
sa = 0.1
mub = 1.1
sb = 0.1
mur = mub / mua - 1
sr = np.abs(mub / mua) * np.sqrt( (sa/mua)**2 + (sb/mub)**2 )

da = stats.norm(loc=mua, scale=sa)
db = stats.norm(loc=mub, scale=sb)
dr = stats.norm(loc=mur, scale=sr)

x = np.linspace(-1, 10, 1000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=da.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='А'))
fig.add_trace(go.Scatter(x=x, y=db.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='Б', opacity=0.3))
fig.add_vline(0, line_dash='dash', line_color='black')
fig.update_layout(title='Распределения А, Б',
                  xaxis_title='x',
                  yaxis_title='Плотность вероятности',
                  barmode='overlay',
                  hovermode="x",
                  height=550)
fig.update_layout(xaxis_range=[-0.5, 3])
fig.show()
fig.write_image("./rel_diff_orig.png", scale=2)

N = 100000
samp_a = da.rvs(N)
samp_b = db.rvs(N)
samp_r = (samp_b - samp_a) / samp_a

x = np.linspace(-3, 3, 1000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=dr.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='Распространение ошибок'))
fig.add_trace(go.Histogram(x=samp_r, histnorm='probability density', name='Сэмплирование', nbinsx=1000,
                           marker_color='green', opacity=0.5))
fig.update_layout(title='Относительная разность',
                  xaxis_title='x',
                  yaxis_title='Плотность вероятности',
                  barmode='overlay',
                  hovermode="x",
                  height=550)
fig.update_layout(xaxis_range=[-3, 3])
fig.show()
fig.write_image("./rel_diff_delta.png", scale=2)

In [None]:
mua = 0.1
sa = 0.07
mub = 0.13
sb = 0.07
mur = mub / mua - 1
sr = np.abs(mub / mua) * np.sqrt( (sa/mua)**2 + (sb/mub)**2 )

da = stats.norm(loc=mua, scale=sa)
db = stats.norm(loc=mub, scale=sb)
dr = stats.norm(loc=mur, scale=sr)

x = np.linspace(-1, 1, 1000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=da.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='А'))
fig.add_trace(go.Scatter(x=x, y=db.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='Б', opacity=0.3))
fig.add_vline(0, line_dash='dash', line_color='black')
fig.update_layout(title='Распределения А, Б',
                  xaxis_title='x',
                  yaxis_title='Плотность вероятности',
                  barmode='overlay',
                  hovermode="x",
                  height=550)
fig.update_layout(xaxis_range=[-1, 1])
fig.show()
fig.write_image("./rel_diff_near0.png", scale=2)

N = 100000
samp_a = da.rvs(N)
samp_b = db.rvs(N)
samp_r = (samp_b - samp_a) / samp_a

x = np.linspace(-10, 10, 1000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=dr.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='Распространение ошибок'))
fig.add_trace(go.Histogram(x=samp_r[(samp_r > -10) & (samp_r < 10)], histnorm='probability density', name='Сэмплирование', nbinsx=1000,
                           marker_color='green', opacity=0.5))
fig.update_layout(title='Относительная разность',
                  xaxis_title='x',
                  yaxis_title='Плотность вероятности',
                  barmode='overlay',
                  hovermode="x",
                  height=550)
fig.update_layout(xaxis_range=[-7, 10])
fig.show()
fig.write_image("./rel_diff_delta_near0.png", scale=2)

In [None]:
na = 10000
sa = 1000
nb = 10000
sb = 1100

rd = 0.01

p_dist_a = stats.beta(a=sa+1, b=na-sa+1)
p_dist_b = stats.beta(a=sb+1, b=nb-sb+1)

rd_mu = (p_dist_b.mean() - p_dist_a.mean()) / p_dist_a.mean()
rd_s = np.abs(p_dist_b.mean() / p_dist_a.mean()) * np.sqrt((p_dist_a.std() / p_dist_a.mean())**2 + (p_dist_b.std() / p_dist_b.mean())**2)

approx_reldiff_dist = stats.norm(loc=rd_mu, scale=rd_s)
approx_reldiff_p = 1 - approx_reldiff_dist.cdf(rd)

npost = 100000
samp_a = p_dist_a.rvs(size=npost)
samp_b = p_dist_b.rvs(size=npost)
samp_reldiff_p = np.sum((samp_b - samp_a)/ samp_a > rd) / npost

print(f"P((p_b-p_a)/p_a > {d}) rel diff dist: {approx_reldiff_p}")
print(f"P((p_b-p_a)/p_a > {d}) post samples: {samp_reldiff_p}")

# P((p_b-p_a)/p_a > 0.01) diff dist: 0.97604762812634
# P((p_b-p_a)/p_a > 0.01) post samples: 0.98068

In [None]:
na = 10000
sa = 1000
nb = 10000
sb = 1100

d = 0.01

p_dist_a = stats.beta(a=sa+1, b=na-sa+1)
p_dist_b = stats.beta(a=sb+1, b=nb-sb+1)

approx_diff_dist = stats.norm(loc=p_dist_b.mean() - p_dist_a.mean(), 
                              scale=np.sqrt(p_dist_b.std()**2 + p_dist_a.std()**2))
approx_diff_p = 1 - approx_diff_dist.cdf(d)

npost = 100000
samp_a = p_dist_a.rvs(size=npost)
samp_b = p_dist_b.rvs(size=npost)
samp_diff_p = np.sum(samp_b - samp_a > d) / npost

print(f"P(p_b - p_a > {d}) diff dist: {approx_diff_p}")
print(f"P(p_b - p_a > {d}) post samples: {samp_diff_p}")

# P(p_b - p_a > 0.01) diff dist: 0.49981600299255935
# P(p_b - p_a > 0.01) post samples: 0.50065

In [None]:
mua = 1
sa = 0.1
mub = 1.1
sb = 0.1

da = stats.norm(loc=mua, scale=sa)
db = stats.norm(loc=mub, scale=sb)

mur = mub / mua - 1
sr = (mub / mua) * np.sqrt( (sa/mua)**2 + (sb/mub)**2 )
dr = stats.norm(loc=mur, scale=sr)

murc = mub / mua * (1 + sa**2 / mua**2) - 1
drc = stats.norm(loc=murc, scale=sr)

x = np.linspace(0, 10, 1000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=da.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='A'))
fig.add_trace(go.Scatter(x=x, y=db.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='B', opacity=0.3))
fig.update_layout(title='A, B',
                  xaxis_title='x',
                  yaxis_title='Плотность вероятности',
                  barmode='overlay',
                  hovermode="x",
                  height=550)
fig.update_layout(xaxis_range=[0, 5])
fig.show()


N = 100000
samp_a = da.rvs(N)
samp_b = db.rvs(N)
samp_d = (samp_b - samp_a) / samp_a


x = np.linspace(-3, 3, 1000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=dr.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='Error Prop'))
fig.add_trace(go.Scatter(x=x, y=drc.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='EB/EA corrected', opacity=0.3))
fig.add_trace(go.Histogram(x=samp_d, histnorm='probability density', name='Samp', nbinsx=1000,
                           marker_color='green', opacity=0.5))
fig.update_layout(title='Rel Diff',
                  xaxis_title='x',
                  yaxis_title='Плотность вероятности',
                  barmode='overlay',
                  hovermode="x",
                  height=550)
fig.update_layout(xaxis_range=[-3, 3])
fig.show()

In [None]:
mua = 0.1
sa = 0.04
mub = 0.1
sb = 0.05

da = stats.norm(loc=mua, scale=sa)
db = stats.norm(loc=mub, scale=sb)

mur = mub / mua - 1
sr = (mub / mua) * np.sqrt( (sa/mua)**2 + (sb/mub)**2 )
dr = stats.norm(loc=mur, scale=sr)

murc = mub / mua * (1 + sa**2 / mua**2) - 1
sr = (mub / mua) * np.sqrt( (sa/mua)**2 + (sb/mub)**2 )
drc = stats.norm(loc=murc, scale=sr)


x = np.linspace(-1, 1, 1000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=da.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='A'))
fig.add_trace(go.Scatter(x=x, y=db.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='B', opacity=0.3))
fig.update_layout(title='A, B',
                  xaxis_title='x',
                  yaxis_title='Плотность вероятности',
                  barmode='overlay',
                  hovermode="x",
                  height=550)
fig.update_layout(xaxis_range=[-1, 1])
fig.show()


N = 100000
samp_a = da.rvs(N)
samp_b = db.rvs(N)
samp_d = (samp_b - samp_a) / samp_a


x = np.linspace(-3, 3, 1000)
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=dr.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', name='mua/mub'))
fig.add_trace(go.Scatter(x=x, y=drc.pdf(x), 
                         mode='lines', line_color='black', line_dash='solid', opacity=0.3, name='mua/mub corrected'))
fig.add_trace(go.Histogram(x=samp_d[(samp_d > -10) & (samp_d < 10)] , histnorm='probability density', name='Samp', nbinsx=10000,
                           marker_color='green', opacity=0.5))
fig.update_layout(title='Rel Diff',
                  xaxis_title='x',
                  yaxis_title='Плотность вероятности',
                  barmode='overlay',
                  hovermode="x",
                  height=550)
fig.update_layout(xaxis_range=[-10, 10])
fig.show()

$$
\begin{align}
R(a,b) &= \frac{b - a}{a}, \\[6pt]
\frac{\partial R}{\partial a} &= -\frac{b}{a^{2}}, \qquad
\frac{\partial R}{\partial b} = \frac{1}{a}, \\[10pt]
\sigma_R^{2}
&=
\left(\frac{\partial R}{\partial a}\right)^{2} \sigma_a^{2}
+ \left(\frac{\partial R}{\partial b}\right)^{2} \sigma_b^{2}
+ 2\,\frac{\partial R}{\partial a}\frac{\partial R}{\partial b}\,\mathrm{Cov}(a,b) \\[6pt]
&=
\left(\frac{b}{a^{2}}\right)^{2} \sigma_a^{2}
+ \left(\frac{1}{a}\right)^{2} \sigma_b^{2}
- 2\,\frac{b}{a^{3}}\,\mathrm{Cov}(a,b), \\[10pt]
\sigma_R
&=
\sqrt{
\frac{b^{2}}{a^{4}}\,\sigma_a^{2}
+ \frac{1}{a^{2}}\,\sigma_b^{2}
- \frac{2b}{a^{3}}\,\mathrm{Cov}(a,b)
}
\\
&=
\frac{|b|}{|a|}
\sqrt{
\frac{\sigma_a^{2}}{a^{2}}
+ \frac{\sigma_b^{2}}{b^{2}}
- 2\,\frac{\mathrm{Cov}(a,b)}{ab}
}.
\end{align}
$$

\begin{align}
\text{General definition:}\quad
\mathbb{E}\!\left[\frac{B}{A}\right]
&= \iint \frac{b}{a}\, f_{A,B}(a,b)\, da\, db. \\[10pt]
\text{Delta--method approximation:}\quad
\mathbb{E}\!\left[\frac{B}{A}\right]
&\approx
\frac{\mu_B}{\mu_A}
\left(
1
+ \frac{\sigma_A^{2}}{\mu_A^{2}}
- \frac{\sigma_{AB}}{\mu_A \mu_B}
\right), \\[6pt]
\text{where}\quad
\mu_A &= \mathbb{E}[A],\quad
\mu_B = \mathbb{E}[B],\quad
\sigma_A^{2} = \mathrm{Var}(A),\quad
\sigma_{AB}=\mathrm{Cov}(A,B). \\[12pt]
\text{If $A$ and $B$ are independent:}\quad
\mathbb{E}\!\left[\frac{B}{A}\right]
&\approx
\frac{\mu_B}{\mu_A}
\left(
1 + \frac{\sigma_A^{2}}{\mu_A^{2}}
\right). \\[12pt]
\text{Lognormal case (exact):}\quad
A &\sim \mathrm{Lognormal}(\mu_A,\sigma_A^{2}),\quad
B \sim \mathrm{Lognormal}(\mu_B,\sigma_B^{2}), \\[4pt]
\mathbb{E}\!\left[\frac{B}{A}\right]
&=
\exp\!\left(
(\mu_B - \mu_A) + \tfrac{1}{2}(\sigma_B^{2} + \sigma_A^{2})
\right).
\end{align}
