In [None]:
%matplotlib notebook
from scipy.stats import norm, uniform
#from scipy.signal import fftconvolve
from ipywidgets import interact, IntSlider, FloatSlider
import numpy as np
from numpy.random import multivariate_normal
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter

# Lageparameter

## Jensensche Ungleichung


In [None]:
sqrt = lambda x_vec: np.sqrt(x_vec)
sqr = lambda x_vec: x_vec**2

def plot_transformation(fun, sampler = 'norm', n = 1000):
    plt.close()
    # Sample random data
    if (sampler == 'norm'):
        mu, sig = 2.5, 1.0
        x = norm.rvs(mu, sig, n)
    elif (sampler == 'unif'):
        a,b = 0,10
        x = uniform.rvs(loc = a, scale = b - a, size = n)
    x = x[x>=0]
    y = fun(x)

    nullfmt = NullFormatter()         # no labels

    # definitions for the axes
    left, width = 0.05, 0.62
    bottom, height = 0.05, 0.62
    left_h = left + width + 0.02
    bottom_h = bottom + 0.2 + 0.1
    
    rect_scatter = [left, bottom_h, width, height]
    rect_histx = [left, bottom, width, 0.2]
    rect_histy = [left_h, bottom_h, 0.2, height]
    
    # start with a rectangular Figure
    plt.figure(1, figsize=(8, 8))
    
    axScatter = plt.axes(rect_scatter)
    axHistx = plt.axes(rect_histx)
    axHisty = plt.axes(rect_histy)
    
    # no labels
    axHistx.xaxis.set_major_formatter(nullfmt)
    axHisty.yaxis.set_major_formatter(nullfmt)
    
    # the scatter plot:
    axScatter.scatter(x, y)
    
    # Determine limits:
    xlim = np.max(x)
    ylim = np.max(y)
    
    x_mean = np.mean(x)
    y_mean = np.mean(y)
    
    x_line = np.linspace(0,xlim,100)
    axScatter.plot(x_line, fun(x_line), '--', color = 'red', lw=2, alpha=0.9, label=f'$g(x)$')
    axScatter.axvline(x_mean, color = 'red', ymax = fun(x_mean)/ylim, label=f'$E(x) \mapsto g(E(x))$')
    axScatter.axhline(fun(x_mean), color = 'red', xmin = x_mean/xlim)
    axScatter.legend(loc='best', frameon=False)
    
    axScatter.set_xlim((0, xlim))
    axScatter.set_ylim((0, ylim))
    
    bins = 100
    axHistx.hist(x, bins=bins, alpha = 0.8)
    axHistx.axvline(x_mean, color = 'red', label=f'$E(x)$')
    axHistx.legend(loc='best', frameon=False)
    
    axHisty.hist(y, bins=bins, orientation='horizontal', alpha = 0.8)
    axHisty.axhline(y_mean, color = 'limegreen', label=f'$E(g(x))$')
    axHisty.axhline(fun(x_mean), color = 'red', label=f'$g(E(x))$')
    axHisty.legend(loc='best', frameon=False)

    axHistx.set_xlim(axScatter.get_xlim())
    axHisty.set_ylim(axScatter.get_ylim())
    
    plt.show()
    
#plot_transformation(sqr)

* $g$ konvexe Funktion und $X$ Zufallsvariable:

$$ g(E(x)) \le E(g(X)) $$

* $g$ konkave Funktion und $X$ Zufallsvariable:

$$ g(E(x)) \ge E(g(X)) $$

In [None]:
interact(plot_transformation,
         fun = [('Square',sqr),('Square Root', sqrt)],
         sampler = ['norm','unif'],
         n = IntSlider(min=100, max=10000, value=1000, continuous_update=False))

# Mehrdimensionale Zufallsvariablen

## Stetige Mehrdimensionale Zufallsvariable

Für den Vektor von Zufallsvariablen $X_1,\cdots,X_n$ existiert die Funktion $f(x_1,\cdots,x_n)$, so dass sich die Verteilungsfunktion $F(x_1,\cdots,x_n)$ ergibt:

$$
F(x_1,\cdots,x_n) = \int_{-\infty}^{x_1} \cdots \int_{-\infty}^{x_n} f(t_1,\cdots,t_n) \; dt_n \cdots dt_1 
$$

 $f(x_1,\cdots,x_n)$ ist dann die gemeinsame Wahrscheinlichkeitsdichte von $(X_1,\cdots,X_n)$

In [None]:
def plot_multi(mu1, sigma1, mu2, sigma2, rho = 0, n = 1000):
    plt.close()
    bins = 100
    cov = sigma1 * sigma2 * rho
    # Sample random data
    data = multivariate_normal([mu1, mu2], [[sigma1**2, cov], [cov, sigma2**2]], size=n)

    nullfmt = NullFormatter()         # no labels

    # definitions for the axes
    left, width = 0.05, 0.62
    bottom, height = 0.05, 0.62
    left_h = left + width + 0.02
    bottom_h = bottom + 0.2 + 0.1
    
    rect_hist2d = [left, bottom_h, width, height]
    rect_histx = [left, bottom, width, 0.2]
    rect_histy = [left_h, bottom_h, 0.2, height]
    
    # start with a rectangular Figure
    plt.figure(1, figsize=(8, 8))
    
    axHist2d = plt.axes(rect_hist2d)
    axHistx = plt.axes(rect_histx)
    axHisty = plt.axes(rect_histy)
    
    # no labels
    axHistx.xaxis.set_major_formatter(nullfmt)
    axHisty.yaxis.set_major_formatter(nullfmt)
    
    # the scatter plot:
    axHist2d.hist2d(data[:,0], data[:,1], bins=bins)
    axHistx.hist(data[:,0], bins=bins, alpha = 0.8)
    axHisty.hist(data[:,1], bins=bins, orientation='horizontal', alpha = 0.8)
    axHistx.set_xlim(axHist2d.get_xlim())
    axHisty.set_ylim(axHist2d.get_ylim())
    
    plt.show()

### Beispiel zweier normalverteilter Zufallsvariablen



$$ 
X_1 \sim N(\mu_1, \sigma_1) \, \wedge \, X_2 \sim N(\mu_2, \sigma_2) \\
$$

Sind $X_1$ und $X_2$ abhängig, dann ist $\rho \ne 0$ und der Vektor $(X_1,X_2)$ folgt einer 2-dimensionalen Normalverteilung. Sind sie unabhängig, dann ist  $\rho = 0$ und es gilt 

$$
f(x_1,x_2) = f_{X_1}(x_1) \cdot f_{X_2}(x_2)  
$$

In [None]:
interact(plot_multi,
         mu1 = FloatSlider(min=-10.0, max=10.0, value=0.0, continuous_update=False),
         sigma1 = FloatSlider(min=0.1, max=10.0, value=1.0, continuous_update=False),
         mu2 = FloatSlider(min=-10.0, max=10.0, value=0.0, continuous_update=False),
         sigma2 = FloatSlider(min=0.1, max=10.0, value=1.0, continuous_update=False),
         rho = FloatSlider(min=-1.0, max=1.0, value=0.0, continuous_update=False),
         n = IntSlider(min=100, max=30000, value=1000, continuous_update=False))

# Zentraler Grenzwertsatz oder warum die Normalverteilung so bedeutend ist

## Verteilung der Summe zweier stetiger Zufallsvariablen

Gegeben seien zwei __unabhängige__ Zufallsvariable $X_1 \sim F_1(x)$ und $X_2 \sim F_2(x)$ mit den entsprechenden Wahrscheinlichkeitsdichten $f_1(x)$ und $f_2(x)$. Damit ergibt sich für die gemeinsam 

$$ 
f_{(X_1, X_2)}(x_1,x_2) = f_{X_1}(x_1) \cdot f_{X_2}(x_2) \quad \text{Unabhängigkeit}
$$

Welcher Wahrscheinlichkeitsdichte folgt dann die Summe $Z = X_1 + X_2$ der beiden Zufallsvariablen:

$$ 
\begin{aligned}
z = x_1 + x_2 \, \wedge \, x_1 &= t \, \mapsto \, x_2 = z - t \\ 
f_{X_1 + X_2}(z) &= \int_{-\infty}^{\infty}f_{X_1}(t) \cdot f_{X_2}(z-t) \; dt 
\end{aligned}
$$

## Verteilung der Summe mehrerer Zufallsvariablen

Die $n$ Zufallsvariablen $X_i$ sind _unabhänig identisch verteilt_ (_i.i.d._), dann gilt:

$$ \sum_{i=1}^n X_i \sim N(n\mu , \sigma \sqrt{n}) $$

In [None]:
def plot_sum_random_variables(n, num_bins, sample_size):
    y = np.empty([sample_size], dtype=float)
    a,b = 0,1
    rv = uniform(loc = a, scale = b - a)
    for i in range(0,sample_size):
        y[i] = sum(rv.rvs(size = n))
    fig, ax = plt.subplots()
    
    # the histogram of the data
    n, bins, patches = ax.hist(y, num_bins, density=True)
    
#plot_sum_random_variables(2,10000)
interact(plot_sum_random_variables,
         n = IntSlider(min=1, max=40, value=1, continuous_update=False),
         num_bins = IntSlider(min=20, max=120, value=50, continuous_update=False),
         sample_size = IntSlider(min=100, max=100000, value=1000, continuous_update=False))