## Лабораторная работа по мат. статистике №3

### Линейная регрессия


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from IPython.display import display, Latex

n = 500  # amount of samples


def pretty_print_coefficients(*coefs):
    tex = "\\newline".join([f"\\beta_{i} = {coefs[i]}" for i in range(len(coefs))])
    tex += "\\newline y \\approx " + "+".join(
        [
            str(coefs[i]) + (f"\\cdot x_{i}" if i > 1 else "\\cdot x" if i == 1 else "")
            for i in range(len(coefs))
        ]
    )
    display(Latex(f"${tex}$"))


def plot_regr(title, x, y, *coefs):
    plt.scatter(x, y, s=1)
    ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    xx = np.linspace(xlim[0], xlim[1], 30)
    yy = np.sum([coefs[i] * xx**i for i in range(len(coefs))], axis=0)
    label = (
        "$ y ="
        + "+".join(
            [
                f"\\beta_{i}" + (f"x^{i}" if i > 1 else "x" if i == 1 else "")
                for i in range(len(coefs))
            ]
        )
        + "$"
    )
    plt.plot(xx, yy, label=label, color="r")
    plt.title(title)
    plt.xlabel("x")
    plt.ylabel("y", rotation=0)
    plt.legend()

    plt.show()

### 1. Линейная регрессия


Строим случайную выборку $ x \in N(1,5) $


In [None]:
x = np.random.normal(1, np.sqrt(5), n)
y_base = 3 * x - 4

sx = sum((x - np.mean(x)) ** 2) / (n - 1)  # fixed variance

#### 1.1. $ \epsilon \in N(0, 10) $

In [None]:
eps = np.random.normal(0, np.sqrt(10), n)
y = y_base + eps

Вычисление коэффициентов регрессии ($ \beta_0, \beta_1 $)


In [None]:
b1 = np.dot((x - np.mean(x)), (y - np.mean(y))) / ((n - 1) * sx)
b0 = np.mean(y) - b1 * np.mean(x)
pretty_print_coefficients(b0, b1)

График


In [None]:
plot_regr("Линейная регрессия, $ \epsilon \in N(0, 10) $", x, y, b0, b1)

#### 1.2. $ \epsilon \in U(-15, 15) $

In [None]:
eps = np.random.uniform(15, -15, n)
y = y_base + eps

Вычисление коэффициентов регрессии ($ \beta_0, \beta_1 $)


In [None]:
b1 = np.dot((x - np.mean(x)), (y - np.mean(y))) / ((n - 1) * sx)
b0 = np.mean(y) - b1 * np.mean(x)
pretty_print_coefficients(b0, b1)

График


In [None]:
plot_regr("Линейная регрессия, $ \epsilon \in U(-15, 15) $", x, y, b0, b1)

#### 1.3. $ \epsilon = \frac{\sqrt{10}}{\pi(\epsilon^2 + 10)} $


In [None]:
eps = stats.cauchy(loc=0, scale=np.sqrt(10)).rvs(n)
lower = np.percentile(eps, 2)
upper = np.percentile(eps, 98)
eps = np.clip(eps, lower, upper)
y = y_base + eps

In [None]:
b1 = np.dot((x - np.mean(x)), (y - np.mean(y))) / ((n - 1) * sx)
b0 = np.mean(y) - b1 * np.mean(x)
pretty_print_coefficients(b0, b1)


In [None]:
plot_regr("Линейная регрессия, $ \epsilon \in K(0, 10) $", x, y, b0, b1)

### 2. Трехмерная линейная регрессия


Строим случайную выборку $ x_1 \in N(10,15), x_2 \in N(3, 7) $


In [None]:
x1 = np.random.normal(10, np.sqrt(15), n)
x2 = np.random.normal(3, np.sqrt(7), n)
x = np.stack([np.ones(n), x1, x2], axis=0)


y_base = 3 + 2 * x[1] - 4 * x[2]

#### 2.1. $ \epsilon \in N(0, 10) $

In [None]:
eps = np.random.normal(0, np.sqrt(10), n)
y = y_base + eps

In [None]:
coefficients = np.linalg.inv(x @ x.T) @ x @ y
pretty_print_coefficients(*coefficients)

#### 2.2. $ \epsilon \in U(-15, 15) $

In [None]:
eps = np.random.uniform(15, -15, n)
y = y_base + eps

In [None]:
coefficients = np.linalg.inv(x @ x.T) @ x @ y
pretty_print_coefficients(*coefficients)

#### 2.3. $ \epsilon = \frac{\sqrt{10}}{\pi(\epsilon^2 + 10)} $


In [None]:
eps = stats.cauchy(loc=0, scale=np.sqrt(10)).rvs(n)
lower = np.percentile(eps, 2)
upper = np.percentile(eps, 98)
eps = np.clip(eps, lower, upper)
y = y_base + eps

In [None]:
coefficients = np.linalg.inv(x @ x.T) @ x @ y
pretty_print_coefficients(*coefficients)