In [None]:
%matplotlib notebook
%matplotlib inline

from scipy.stats import chi2, t, norm
import matplotlib.pyplot as plt
import numpy as np

from ipywidgets import interact, interactive, fixed, interact_manual, IntSlider, FloatSlider
import ipywidgets as widgets
from IPython.display import display

# Freiheitsgrade der empirischen Standardabweichung

$$
\begin{aligned}
E\left(\frac{1}{n-1} \sum_{i = 1}^n \left(X_i - \overline{X}\right)^2\right) & = & \sigma^2 \\
\frac{1}{n-1} E\left(\sum_{i = 1}^n \left(X_i - \overline{X}\right)^2\right) & = & \sigma^2 \\
E\left(\sum_{i = 1}^n \left(X_i - \overline{X}\right)^2\right) & = & (n-1) \, \sigma^2
\end{aligned}
$$
Wir simulieren das Ziehen einer Stichprobe vom Umfang $n$ mit 
$$
X_i \sim N(\mu,\sigma)
$$
dann gilt
$$
\frac{1}{\sigma^2}E\left(\sum_{i = 1}^n \left(X_i - \overline{X}\right)^2\right) = n-1
$$



In [None]:
def simualte_dof_stdev(mu, sd, n, sample_count):
    y = np.empty([sample_count], dtype=float)
    rv = norm(loc = mu, scale = sd)
    for i in range(0,sample_count):
        rvals = rv.rvs(size = n)
        y[i] = sum((rvals - np.mean(rvals))**2)
    print(f'Freiheitsgrade der Simulation: {np.mean(y)/(sd**2)} ... erwartete Freiheitsgrade: {n - 1}')
    
interact(simualte_dof_stdev,
         mu = FloatSlider(min=-10.0, max=200.0, value=0.0, continuous_update=False),
         sd = FloatSlider(min=1.0, max=10.0, value=1.0, continuous_update=False),
         n = IntSlider(min=5, max=100, value=10, continuous_update=False),
         sample_count = IntSlider(min=100, max=300_000, value=1000, continuous_update=False))

# Grundlagen der Induktiven Statistik

## 1. Die $\chi^2$-Verteilung

Gilt für $n$ $X_i$, dass diese nach $X_i \sim N(0,1)$ verteilt sind, so ist
$$Z = \sum_{i=1}^n X_i^2$$
verteilt mit 

$$Z \sim \chi^2(n)$$.

mit der Dichtefunktion:

$$
x > 0: \, f_n(x) = \frac{1}{2^{\frac{n}{2}}\Gamma\left(\frac{n}{2} \right)}x^{\frac{n}{2} - 1} e^{-\frac{x}{2}}
$$

Gammafunktion:

$$
\Gamma(x) = \int_0^\infty t^{x-1}e^{-t}dt
$$

$$
\Gamma\left(\frac{1}{2}\right) = \sqrt{\pi}, \, \Gamma(1) = 1, \, \Gamma(r + 1) = r \cdot \Gamma(r), \, r \in \mathbb{R}^+
$$

Simulation von $Z$ mit $n$ verschiedenen $X_i$ die i.i.d. aus $N(0,1)$ gezogen werden.

In [None]:
def simulate_square_sum(n = 2, N = 10000, num_bins = 30):
    mu, sigma = 0, 1
    x = np.random.normal(mu,sigma,n * N)
    z = np.sum(np.reshape(x,(n,N))**2,axis = 0)
    fig, ax = plt.subplots(1, 1)
    count, bins, ignored = ax.hist(z, num_bins, density=True, histtype='stepfilled', alpha=0.6, label = "Simulation")
    y = np.linspace(min(bins), max(bins), 100)
    ax.plot(y,chi2.pdf(y,n), "r-", lw=2, alpha=0.9, label=f'Chi2({n}) PDF')
    ax.legend(loc='best', frameon=False)
    ax.grid(True)
    plt.show()

chi2_simulation_widget = interactive( simulate_square_sum,
                                      n = (1,30,1),
                                      N = (100,1000000,10000),
                                      num_bins = (10,80,5))
display(chi2_simulation_widget)

Wie hängt die $\chi^2$-Verteilung von den Freiheitsgraden ab:

In [None]:
def plot_chi2(df, x_1 = 0, x_2 = 50):
    fig, ax = plt.subplots(1, 1)
    x = np.linspace(x_1, x_2, 100)
    ax.set_xlim(x_1, x_2)
    ax.set_ylim(0,0.25)
    ax.plot(x, chi2.pdf(x, df), 'r-', lw=2, alpha=0.9, label=f'Chi2({df}) PDF')
    ax.legend(loc='best', frameon=False)
    ax.grid(True)
    plt.show()
    
chi2_widget = interactive(plot_chi2, 
                          df = (2,30,1),
                          x_1 = (0,10,1), x_2 = (15,150,5))
display(chi2_widget)

## 2. Die Student $t$-Verteilung

Gilt 
$$X \sim N(0,1), Z \sim \chi^2(n)$$ 
und $X,Z$ unabhängig so gilt für $$T=\frac{X}{\sqrt{\frac{1}{n}Z}} \sim t(n)$$

mit der Dichtefunktion:

$$
-\infty < x < \infty: \, f_n(x) = \frac{\Gamma\left(\frac{n + 1}{2} \right)}{\sqrt{n\pi} \, \Gamma\left(\frac{n}{2} \right)} \left(1 + \frac{x^2}{n}\right)^{-\frac{n + 1}{2}}
$$


In [None]:
def plot_t(df, x_b = 20):
    fig, ax = plt.subplots(1, 1)
    x = np.linspace(-x_b , x_b, 100)
    ax.set_xlim(-x_b, x_b)
    ax.set_ylim(0,0.5)
    ax.plot(x, norm.pdf(x), 'b-', lw=2, alpha=0.9, label='N(0,1) PDF')
    ax.plot(x, t.pdf(x, df), 'r-', lw=2, alpha=0.9, label=f'Student t({df}) PDF')
    ax.legend(loc='best', frameon=False)
    ax.grid(True)
    plt.show()
    
t_widget = interactive(plot_t, 
                          df = (1,30,1),
                          x_b = (3,80,5))
display(t_widget)