In [0]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sps

**Гипотеза 6.** Две независимые нормальные выборки, дисперсии равны, но не известны, т.е. $\sigma_x=\sigma_y$

$$x_1, x_2, ..., x_n \ \sim \ \mathcal{N}(\theta_x, \sigma^2)$$
$$y_1, y_2, ..., y_m \ \sim \ \mathcal{N}(\theta_y, \sigma^2)$$
$$\delta = \theta_x - \theta_y$$

Возьмем равномерный априор.


$$\sqrt{\frac{n+m-2}{\frac{1}{n} + \frac{1}{n}}}\frac{\delta - (\overline{X} - \overline{Y})}{\sqrt{nS_x^2 + mS_y^2}} \ |\  (X, Y) \sim T_{n+m-2}$$


In [0]:
class Hypotheses2NormEqualSigmaUniformPrior():
  def __init__(self, alternative="<>"):
    assert alternative in ('<>', '<', '>')

    self.alternative=alternative

  def test(self, X, Y, method='Lindley', level=0.05, *args, **kwargs):
    assert method in ('modify', 'Lindley', 'atom')
    assert 0 < level < 1

    self.level=level


    n = len(X)
    m = len(Y)
    self.df = n+m-2
    self.scale = np.sqrt(np.sum(X**2) + np.sum(Y**2)) / np.sqrt((n+m-2) / (1./n + 1./m))
    self.loc = X.mean() - Y.mean()
    posterior = sps.t(df=self.df, loc=self.loc, scale=self.scale)

    if method=='Lindley' or method=='modify':
      HDR = (posterior.ppf(level/2.),
              posterior.ppf(1 - level/2.))
      
    if method in ('modify'):
      if not 'eps' in args:
        eps = 1e-3
      if self.alternative== '<>':
        p0 = posterior.cdf(eps) - posterior.cdf(-eps)
        p1 = 2*posterior.cdf(-eps)
      if self.alternative== '<':
         p0 =  posterior.cdf(0) - posterior.cdf(-eps)
         p1 = posterior.cdf(-eps)
      if self.alternative== '>':
        p0 =  posterior.cdf(eps) - posterior.cdf(0)
        p1 = posterior.cdf(-eps)
      p0 /= p0 + p1
      p1 /= p0 + p1


    if method == 'Lindley':
      return {"posterior_df":self.df, 
              "posterior_loc":self.loc, 
              "posterior_scale":self.scale, "HDR":HDR}

    if method == 'modify':
      return {"posterior_df":self.df, "posterior_loc":self.loc, 
              "posterior_scale":self.scale, "HDR":HDR, "p0":p0, "p1" : p1}

In [4]:
# тестирование

X = sps.norm(1, 5).rvs(10)
Y = sps.norm(1, 5).rvs(20)

stat_test = Hypotheses2NormEqualSigmaUniformPrior()

stat_test.test(X, Y)

{'HDR': (-2.8678050663255195, 5.937863121601016),
 'posterior_df': 28,
 'posterior_loc': 1.5350290276377483,
 'posterior_scale': 2.1493940360433332}

In [5]:
stat_test.test(X, Y, method='modify')

{'HDR': (-2.8678050663255195, 5.937863121601016),
 'p0': 0.0005887005625250925,
 'p1': 0.9987769557486965,
 'posterior_df': 28,
 'posterior_loc': 1.5350290276377483,
 'posterior_scale': 2.1493940360433332}

In [6]:
X = sps.norm(1, 5).rvs(200)
Y = sps.norm(3, 5).rvs(200)

stat_test = Hypotheses2NormEqualSigmaUniformPrior()

print(stat_test.test(X, Y, method='Lindley'))

stat_test.test(X, Y, method='modify', eps=1e-5)

{'posterior_df': 398, 'posterior_loc': -2.3679625973302203, 'posterior_scale': 0.5476934922640309, 'HDR': (-3.4446964143384062, -1.2912287803220346)}


{'HDR': (-3.4446964143384062, -1.2912287803220346),
 'p0': 7.682242929407018e-08,
 'p1': 0.9999999615884106,
 'posterior_df': 398,
 'posterior_loc': -2.3679625973302203,
 'posterior_scale': 0.5476934922640309}

In [11]:
stat_test = Hypotheses2NormEqualSigmaUniformPrior(alternative="<")

stat_test.test(X, Y, method='modify', eps=1e-5)

{'HDR': (-3.4446964143384062, -1.2912287803220346),
 'p0': 7.711277295506089e-08,
 'p1': 0.9999999228864779,
 'posterior_df': 398,
 'posterior_loc': -2.3679625973302203,
 'posterior_scale': 0.5476934922640309}