### Лабораторная работа по мат. статистике №1

In [None]:
import numpy as np
from scipy.stats import skew, kurtosis
from IPython.display import display, Latex
import matplotlib.pyplot as plt


cdot = r"\cdot"

**Получение данных**

In [None]:
def read_data_from_file(filename: str):
    bins, freqs = [], []
    with open(filename, "r", encoding="UTF-8") as f:
        lines = f.readlines()
        for line in lines:
            bin_, freq = line.split()
            bins.append(tuple(map(float, bin_.strip("()").split(";"))))
            freqs.append(int(freq))
    return bins, freqs


filename = input("Enter filename with data\n<(a,b) freq> in each line")

bins, freqs = read_data_from_file(filename)

midpoints = [(a + b) / 2 for a, b in bins]  # Средние значения групп
labels = [f"({a};{b})" for a, b in bins]  # Подписи интервалов
bin_lengths = [b - a for a, b in bins]  # Дельты групп
n = sum(freqs)  # Количество наблюдений
probs = [freq / n for freq in freqs]  # Вероятности
cumulative_probs = np.cumsum(probs)

data = np.repeat(
    midpoints, freqs
)  # Создание массива данных (для удобства применения функций пакета)

FileNotFoundError: [Errno 2] No such file or directory: ''

**Гистограмма**

In [None]:
# Создание гистограммы
plt.bar(midpoints, freqs, width=bin_lengths, edgecolor="black", color="skyblue")

# Настройка подписей
plt.xticks(midpoints, labels)  # Подписи интервалов на оси X
plt.xlabel("Интервалы")
plt.ylabel("Частота")
plt.title("Гистограмма распределения")

plt.grid(axis="y", linestyle="--", alpha=0.7)
for x, y in zip(midpoints, freqs):
    plt.text(x, y + 1, str(y), ha="center", va="bottom")
plt.show()

**Функция распределения**

In [None]:
plt.bar(midpoints, cumulative_probs, width=5, edgecolor="black", color="skyblue")

# Настройка подписей
plt.xticks(midpoints, labels)  # Подписи интервалов на оси X
plt.xlabel("Интервалы")
plt.ylabel(r"$F^*_\xi (x)$", rotation=0, size=11)
plt.title("Функция распределения")

plt.grid(axis="y", linestyle="--", alpha=0.7)
for x, y in zip(midpoints, cumulative_probs):
    plt.text(x, y + 0.01, str(np.round(y, 3)), ha="center", va="bottom")

plt.show()

**Оценка среднего**

In [None]:
def display_mean(data, freqs, midpoints):
    mean = np.round(np.mean(data), 3)
    mean_latex = (
        "\hat{m} = \overline{X}_n = \\frac{1}"
        + "{"
        + str(sum(freqs))
        + "}"
        + f"({' + '.join([str(freq) + cdot + str(mp) for freq, mp in zip(freqs, midpoints)])})"
        + f"= {mean}"
    )
    display(Latex(f"${mean_latex}$"))


display_mean(data, freqs, midpoints)

**Оценка дисперсии**
1) Выборочная неисправленная дисперсия

In [None]:
def display_var(data, freqs, midpoints):
    mean = np.round(np.mean(data), 3)
    variance = np.round(np.var(data, mean=mean), 3)

    mean_latex = (
        "S^2_n = \\hat{\\mu}_2= \\frac{1}"
        + "{"
        + str(n)
        + "}"
        + f"({' + '.join(['(' + str(mp) + '-' + str(mean) + ')^2' + cdot + str(freq)  for freq, mp in zip(freqs, midpoints)])})"
        + f"= {variance}"
    )
    display(Latex(f"${mean_latex}$"))


display_var(data, freqs, midpoints)

2) Выборочная исправленная дисперсия

In [None]:
def display_var_fixed(data, freqs, midpoints):
    variance = np.round((n / (n - 1)) * np.var(data), 3)

    mean_latex = (
        "s^2_n = \\hat{\\sigma}{^2} = \\frac"
        + "{"
        + str(n)
        + "}"
        + "{"
        + str(n - 1)
        + "}"
        + "S^2_n"
        + f"= {variance}"
    )
    display(Latex(f"${mean_latex}$"))


display_var_fixed(data, freqs, midpoints)

**Медиана**

In [None]:
def display_median(data, freqs, midpoints):
    median = np.median(data)

    median_latex = (
        "\\hat{u}_{\\frac{1}{2}} ="
        + "\\begin{cases} x_{[\\frac{1}{2}\\cdot n] + 1}, \\frac{1}{2}\\cdot n \\notin \Z \\\\ \\frac{x_{[\\frac{1}{2}\\cdot n]} + x_{[\\frac{1}{2}\\cdot n] + 1}}{2}, \\frac{1}{2}\\cdot n \\in \Z \\end{cases}"
        + f"= [n = {n}]"
        + f"= {median}"
    )
    display(Latex(f"${median_latex}$"))


display_median(data, freqs, midpoints)

**Коэффициент асимметрии**

In [None]:
def display_skewness(data, freqs, midpoints):
    mean = np.round(np.mean(data), 3)
    skewness = np.round(skew(data), 3)
    skewness_latex = (
        "\\hat{\\rho}"
        + "= \\frac{\\hat{\\mu}_3}{\\hat{\\mu}_2^\\frac{3}{2}}"
        + "= \\frac{"
        + "\\frac{1}{%s}" % n
        + f"({' + '.join(['(' + str(mp) + '-' + str(mean) + ')^3' + cdot + str(freq)  for freq, mp in zip(freqs, midpoints)])})"
        + "}{"
        + "(\\frac{1}{%s}" % n
        + f"({' + '.join(['(' + str(mp) + '-' + str(mean) + ')^2' + cdot + str(freq)  for freq, mp in zip(freqs, midpoints)])})"
        + ")^\\frac{3}{2}}"
        + f"= {skewness}"
    )
    display(Latex(f"${skewness_latex}$"))


display_skewness(data, freqs, midpoints)

**Коэффициент эксцесса**

In [None]:
def display_kurtosis(data, freqs, midpoints):
    mean = np.round(np.mean(data), 3)
    kurt = np.round(kurtosis(data), 3)
    kurt_latex = (
        "\\hat{\\kappa}"
        + "= \\frac{\\hat{\\mu}_4}{\\hat{\\mu}_2^2} - 3"
        + "= \\frac{"
        + "\\frac{1}{%s}" % n
        + f"({' + '.join(['(' + str(mp) + '-' + str(mean) + ')^4' + cdot + str(freq) for freq, mp in zip(freqs, midpoints)])})"
        + "}{"
        + "(\\frac{1}{%s}" % n
        + f"({' + '.join(['(' + str(mp) + '-' + str(mean) + ')^2' + cdot + str(freq) for freq, mp in zip(freqs, midpoints)])})"
        + ")^2} - 3"
        + f"= {kurt}"
    )
    display(Latex(f"${kurt_latex}$"))


display_kurtosis(data, freqs, midpoints)

**Нижний квартиль**

In [None]:
def display_q1(data, freqs, midpoints):
    q1 = np.percentile(data, 25)

    q1_latex = (
        "\\hat{u}_{\\frac{1}{4}} ="
        + "\\begin{cases} x_{[\\frac{1}{4}\\cdot n] + 1}, \\frac{1}{4}\\cdot n \\notin \Z \\\\ \\frac{x_{[\\frac{1}{4}\\cdot n]} + x_{[\\frac{1}{4}\\cdot n] + 1}}{2}, \\frac{1}{4}\\cdot n \\in \Z \\end{cases}"
        + f"= [n = {n}]"
        + f"= {q1}"
    )
    display(Latex(f"${q1_latex}$"))


display_q1(data, freqs, midpoints)

**Верхний квартиль**

In [None]:
def display_q3(data, freqs, midpoints):
    q3 = np.percentile(data, 75)

    q3_latex = (
        "\\hat{u}_{\\frac{3}{4}} ="
        + "\\begin{cases} x_{[\\frac{3}{4}\\cdot n] + 1}, \\frac{3}{4}\\cdot n \\notin \Z \\\\ \\frac{x_{[\\frac{3}{4}\\cdot n]} + x_{[\\frac{3}{4}\\cdot n] + 1}}{2}, \\frac{3}{4}\\cdot n \\in \Z \\end{cases}"
        + f"= [n = {n}]"
        + f"= {q3}"
    )
    display(Latex(f"${q3_latex}$"))


display_q3(data, freqs, midpoints)

**Добавление 10000 и получение оценок данных с выбросом**

In [None]:
new_bins = bins + [(10000, 10000)]
new_freqs = freqs + [1]
midpoints = [(a + b) / 2 for a, b in new_bins]
n = sum(new_freqs)

data = np.repeat(midpoints, new_freqs)

display_mean(data, new_freqs, midpoints)
display_var(data, new_freqs, midpoints)
display_var_fixed(data, new_freqs, midpoints)
display_median(data, new_freqs, midpoints)
display_skewness(data, new_freqs, midpoints)
display_kurtosis(data, new_freqs, midpoints)
display_q1(data, new_freqs, midpoints)
display_q3(data, new_freqs, midpoints)