# Continuous Probability

In [None]:
%matplotlib inline

In [None]:
from sympy import *
x, y = symbols('x y')

init_printing(use_unicode=True)
from sympy.plotting import plot
from sympy import symbols
from sympy.plotting import plot
import math
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interact
import ipywidgets as widgets
import seaborn as sns

In [None]:
import functools

In [None]:
from scipy.integrate import quad

In [None]:
dx = 0.05
xs = np.arange(-5,5+dx,dx)

#### Normal Distribution

In [None]:
def gauss(x, A=1, mu =0.0, s=1.0):
    return A*np.exp(-((x-mu)**2)/(2*s**2))/(s*np.sqrt(2*np.pi))


#### Partial Application of Functions

In [None]:
g1 = functools.partial(gauss, A=1.0, s=1.0, mu=0.0)
g2 = functools.partial(gauss, A=1.0, s=1.5, mu=0.0)
g3 = functools.partial(gauss, A=1.0, s=2.0, mu=0.0)

#### [Rayleigh Distribution](https://en.wikipedia.org/wiki/Rayleigh_distribution)

In [None]:
def rayleigh(x, s=0.5):
    return (x/s**2)*np.exp(-x**2/(2*s**2))

#### Partial Application of Functions

In [None]:
r1 = functools.partial(rayleigh, s=0.5)
r2 = functools.partial(rayleigh, s=1.0)
r3 = functools.partial(rayleigh, s=2.0)

### Probabilities are areas under curves
#### Integration

* Symbolic (SymPy)
* Numeric (Scipy)

### Visualizing Cumulative Probabilities

In [None]:
@interact(func={"gauss sigma=1.0":g1,
                "gauss sigma=1.5":g2,
                "gauss sigma=2.0":g3}, T=(-10,10,0.05))
def disp1a(func, T):
    dx = 0.05
    xs = np.arange(-5,5+dx,dx)
    ys = func(xs)
    rslts = quad(func, -np.inf, T)
    fig,ax = plt.subplots(1)
    ax.plot(xs,ys)
    ax.fill_between(xs,ys, where =xs < T)
    ax.set_title(r"$p(x<T)=$%0.4f"%rslts[0])


### Visualizing Interval Probabilities

In [None]:
@interact(func={"gauss sigma=1.0":g1,
                "gauss sigma=1.5":g2,
                "gauss sigma=2.0":g3}, 
         L=(-10,10,0.05),
         H=(-10, 10, 0.05))
def disp1b(func, L, H):
    dx = 0.05
    xs = np.arange(-5,5+dx,dx)
    ys = func(xs)
    rslts = quad(func, L, H)
    fig,ax = plt.subplots(1)
    ax.plot(xs,ys)
    ax.fill_between(xs,ys, where =(L < xs) & (xs < H))
    ax.set_title(r"$p(L < x<H)=$%0.4f"%rslts[0])

In [None]:
@interact(func={"rayleigh sigma=0.5":r1,
                "rayleigh sigma=1.0":r2,
                "rayleigh sigma=2.0":r3}, T=(0,20,0.1))
def disp2(func, T):
    fig,ax = plt.subplots(1)
    dx = 0.01
    xs = np.arange(0,10+dx,dx)
    ys = func(xs)
    rslts = quad(func, 0, T)
    ax.plot(xs,ys, color='k')
    ax.fill_between(xs,ys, where =xs < T, color=(1.0,0.0,0.0,0.2))
    ax.set_title(r"$p(x<T)=$%0.4f"%rslts[0])


In [None]:
quad(gauss, -np.inf, np.inf)

In [None]:
g1=functools.partial(gauss, A=0.5, s=1.0, mu=0.0)
g2 = functools.partial(gauss, A=0.8, s=1.5, mu=4.0)
g3 = functools.partial(gauss, A=0.7, s=1.1, mu=-3.0)

In [None]:
def f1(xs):
    return g1(xs)+g2(xs) + g3(xs)

In [None]:
x2 = np.arange(-10,10,0.01)
plt.plot(x2,f1(x2))

### How could we convert this into a PDF?

In [None]:
quad(f1, -np.inf, np.inf)

In [None]:
import pandas as pd
from scipy.optimize import curve_fit

In [None]:
age = pd.read_excel("usa_age.xlsx")
age.columns

In [None]:
ax = sns.lineplot(data=age, x="Age (interval start)", y='Male', legend="brief")
sns.lineplot(data=age, x="Age (interval start)", y='Female', legend="brief", ax=ax)
ax.set_ylabel("count (Millions)")
ax.legend(["Male", "Female"])

In [None]:
def age_func(x, a, b, c, d, e):
    return np.heaviside(x,0)*(a*x**4+b*x**3+c*x**2+d*x+e)

In [None]:
mpopt, mpcov = curve_fit(age_func, age["Age (interval start)"], age["Male"])

In [None]:
plt.plot(xs, np.heaviside(xs,0))

In [None]:
plt.plot(np.arange(0,105,0.5), age_func(np.arange(0,105,0.5), *popt))

### [How could we use this as a random number generator](http://matlabtricks.com/post-44/generate-random-numbers-with-a-given-distribution)
* [Also](https://en.wikipedia.org/wiki/Inverse_transform_sampling)