## Лабораторная работа по мат. статистике №2


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from IPython.display import display, Latex

def pretty_print_confidence_interval(value_latex: str, left_bound: float, right_bound: float, confidence_prob: float):
    display(Latex(f"$P({left_bound} \leq {value_latex} \leq {right_bound}) = {confidence_prob}$"))

### 1. Построение доверительных интервалов для среднего и дисперсии


#### 1.1. Нормальное распеределение N(5, 10), n=500, α = 0.05


Генерирую выборку, строю ее график


In [None]:
n = 500
m = 5
v = np.sqrt(10)
alpha = 0.05

selection = np.random.normal(m, v, n)

count, bins, ignored = plt.hist(selection, 250, density=True)

plt.plot(
    bins,
    1 / (v * np.sqrt(2 * np.pi)) * np.exp(-((bins - m) ** 2) / (2 * v**2)),
    linewidth=2,
    color="r",
)
plt.axvline(x=m, ymax=max(selection), color="black")

plt.show()

Доверительный интервал для среднего


In [None]:
def get_mean_confidence_interval(sel, var, count, n, alpha):
    l_m = sum(np.prod([sel, count], 0)) / n - np.percentile(
        sel, 100 * (1 - alpha) / 2
    ) * var / np.sqrt(len(sel))

    r_m = sum(np.prod([sel, count], 0)) / n + np.percentile(
        sel, 100 * (1 - alpha) / 2
    ) * var / np.sqrt(len(sel))

    return l_m, r_m

l_m, r_m = get_mean_confidence_interval(selection, v, [1 for i in range(n)], n, alpha)
pretty_print_confidence_interval(r"\mu", l_m, r_m, 1-alpha)

Доверительный интервал для дисперсии


In [None]:
def get_var_confidence_interval(sel, counts, n, alpha=0.05):
    k = len(sel)
    std = np.prod([np.square(sel - np.mean(sel)), counts], 0) / n
    s = k * sum(std) / (k - 1)  # исправленная дисперсия

    chi2_lower = stats.chi2.ppf(1 - alpha / 2, df=k - 1)
    chi2_upper = stats.chi2.ppf(alpha / 2, df=k - 1)

    l_v = (k - 1) * s / chi2_lower
    r_v = (k - 1) * s / chi2_upper
    return l_v, r_v


l_v, r_v = get_var_confidence_interval(selection, [1 for i in range(n)], n, alpha)
pretty_print_confidence_interval(r"\sigma^2", l_v, r_v, 1-2*alpha)

#### 1.2 Группирование выборки, сгенерированной на предыдущем шаге


In [None]:
k = int(np.log2(n)) + 1
count, bins, ignored = plt.hist(selection, k, density=False)
middle_points = [(bins[i] + bins[i + 1]) / 2 for i in range(len(bins) - 1)]

l_m, r_m = get_mean_confidence_interval(middle_points, v, count, n, alpha)
pretty_print_confidence_interval(r"\mu", l_m, r_m, 1-alpha)

l_v, r_v = get_var_confidence_interval(middle_points, count, n, alpha)
pretty_print_confidence_interval(r"\sigma^2", l_v, r_v, 1-2*alpha)

#### 1.3 Биномиальное распеределение B(0.3, 500), n=500, α = 0.05


In [None]:
n = 500
n_e = 500
p = 0.3
alpha = 0.05

selection = np.random.binomial(n_e, p, n)

count, bins, ignored = plt.hist(selection, 250, density=True)

p_est = np.mean(selection)/n_e
t_alph = stats.norm.ppf( 1 - alpha / 2)


l_m = p_est - t_alph*np.sqrt(p_est*(1 - p_est)/n_e)
r_m = p_est + t_alph*np.sqrt(p_est*(1 - p_est)/n_e)

pretty_print_confidence_interval(r"p", l_m, r_m, 1-alpha)

#### 1.3' Биномиальное распеределение B(0.3, 50), n=500, α = 0.05


In [None]:
n_e = 50

selection = np.random.binomial(n_e, p, n)
count, bins, ignored = plt.hist(selection, 250, density=True)

p_est = np.mean(selection)/n_e
t_alph = stats.norm.ppf(1-alpha / 2)


l_m = n_e/(n_e + t_alph**2) * (p_est + t_alph**2/(2*n_e) - t_alph * np.sqrt(p_est*(1 - p_est)/n_e + t_alph**2/(2*n_e)))
r_m = n_e/(n_e + t_alph**2) * (p_est + t_alph**2/(2*n_e) + t_alph * np.sqrt(p_est*(1 - p_est)/n_e + t_alph**2/(2*n_e)))

pretty_print_confidence_interval(r"p", l_m, r_m, 1-alpha)

#### 1.4 Распеределение Пуассона P(2), n=500, α = 0.05


In [None]:
n = 500
lamb = 2
alpha = 0.05

selection = np.random.poisson(lamb, n)
count, bins, ignored = plt.hist(selection, 250, density=True)

mean_p = np.mean(selection)

t_alph = stats.norm.ppf(1 - alpha / 2)

l_m = mean_p - t_alph*np.sqrt(mean_p/n)
r_m = mean_p + t_alph*np.sqrt(mean_p/n)

pretty_print_confidence_interval(r"\lambda", l_m, r_m, 1-alpha)

#### 1.5 Экспоненциальное распеределение E(4), n=500, α = 0.05


In [None]:
n = 500
lamb = 4
alpha = 0.05

selection = np.random.exponential(lamb, n)
count, bins, ignored = plt.hist(selection, 250, density=True)

mean_p = np.mean(selection)

chi2_lower = stats.chi2.ppf(alpha / 2, df=2 * n)
chi2_upper = stats.chi2.ppf(1 - alpha / 2, df=2 * n)

l_m = 2 * sum(selection) / chi2_upper
r_m = 2 * sum(selection) / chi2_lower

pretty_print_confidence_interval(r"\lambda", l_m, r_m, 1-alpha)

### 2. Ядерные оценки


In [None]:
def get_pdf_ker(
    kernel_function,
    selection: np.ndarray,
    h_func=lambda sel: 1.05 * np.std(sel) / np.pow(len(sel), 1 / 5),
):
    h = h_func(selection)

    def pdf(x: np.ndarray) -> np.ndarray:
        res = (1 / (len(selection) * h)) * sum(
            [kernel_function((x - selection[i]) / h) for i in range(len(selection))]
        )
        return res * (res > 0)

    return pdf

### 2.1 Гауссово ядро для оценки плотности распеределения случайной выборки ~ N(5, 10)


In [None]:
n = 500
m = 5
v = np.sqrt(10)

selection = np.random.normal(m, v, n)

plt.hist(selection, 30, density=True)

x = np.linspace(np.min(selection) - 1, np.max(selection) + 1, 1000)

f_est_gauss = get_pdf_ker(
    kernel_function=lambda x: 1 / np.sqrt(2 * np.pi) * np.exp((-1) * x**2 / 2),
    selection=selection,
)
pdf_gauss = f_est_gauss(x)

f_est_triag = get_pdf_ker(
    kernel_function=lambda x: 1 - np.abs(np.where((x <= 1) & (x >= -1), x, 1)),
    selection=selection,
    h_func=lambda sel: 2,
)
pdf_triag = f_est_triag(x)

f_est_parabolic = get_pdf_ker(
    kernel_function=lambda x: 3 / 4 * (1 - (np.where((x <= 1) & (x >= -1), x, 1)) ** 2),
    selection=selection,
    h_func=lambda sel: 2,
)
pdf_parabolic = f_est_parabolic(x)

plt.plot(
    x,
    pdf_gauss,
    color="red",
    linewidth=2,
    label="Gauss",
)
# plt.plot(
#     x,
#     pdf_triag,
#     color="violet",
#     linewidth=2,
#     label="Triangle",
# )
# plt.plot(
#     x,
#     pdf_parabolic,
#     color="black",
#     linewidth=2,
#     label="Parabolic",
# )
plt.title("Ядерная оценка плотности распределения")
plt.xlabel("Значения")
plt.ylabel("Плотность")
plt.legend()
plt.grid(True)
plt.show()

### 2.2 Гауссово ядро для оценки плотности распеределения случайной выборки ~ U[1,2]


In [None]:
n = 500
a = 1
b = 2

selection = np.random.uniform(a, b, n)

plt.hist(selection, 30, density=True)

f_est_gauss = get_pdf_ker(
    kernel_function=lambda x: 1 / np.sqrt(2 * np.pi) * np.exp((-1) * x**2 / 2),
    selection=selection,
)
x = np.linspace(np.min(selection) - 1, np.max(selection) + 1, 1000)
pdf_gauss = f_est_gauss(x)
plt.plot(
    x,
    pdf_gauss,
    color="red",
    linewidth=2,
    label="Ядерная оценка (KDE)",
)

plt.show()