# [Introduction to Data Science](http://datascience-intro.github.io/1MS041-2022/)    
## 1MS041, 2022 
&copy;2022 Raazesh Sainudiin, Benny Avelin. [Attribution 4.0 International     (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/)

# Common discrete random variables

## Bernoulli random variable

Single trial with success probability $p$.

In [None]:
from Utils import plotEMF

In [None]:
p = 0.1
plotEMF([(0,p),(1,1-p)])

In [None]:
import numpy as np

In [None]:
np.random.randint(0,2,size=10)

In [None]:
from Utils import plotEDF

In [None]:
plotEDF([(0,0),(0,p),(1,1-p)])

## Binomial random variable

If we do $n$ trials with success probability $p$, then the binomial random variable is the number of successes. The PMF is
$$
    f(x) = {n \choose x} p^x (1-p)^{n-x}
$$
Can only produce numbers $0,1,\ldots,n$.

In [None]:
from scipy.special import binom as binomial
n = 20
p = 0.5
plotEMF([(i,binomial(n,i)*(p**i)*((1-p)**(n-i))) for i in range(n)])

In [None]:
np.random.binomial(20,0.5,size=10)

In [None]:
plotEDF([(i,binomial(n,i)*(p**i)*((1-p)**(n-i))) for i in range(n)])

## Poisson random variable
Pois($\lambda$) where $\lambda \in (0,\infty)$ is called the rate
$$
    f(x) = \frac{\lambda^x e^{-\lambda}}{x!}
$$

In [None]:
from scipy.special import factorial
from math import exp
l = 2
plotEMF([(i,l**i*exp(-l)/factorial(i)) for i in range(10)])

In [None]:
np.random.poisson(2,size=10)

In [None]:
plotEDF([(i,l**i*exp(-l)/factorial(i)) for i in range(10)])

## Empirical means

In [None]:
from random import randint

def X():
    """Produces a single random number from DeMoivre(1/3,1/3,1/3)"""
    return randint(0,2)

def empirical_mean(n=1):
    """Produces the empirical mean of n experiments of the X above"""
    Z = [X() for i in range(n)]
    return sum(Z)/n

In [None]:
# Run this to get an observation of X and rerun for another
X()

In [None]:
# Run this to get an observation of the empirical mean of X
# when doing 10 experiments
empirical_mean(100)

# Common continuous random variables

## The uniform [0,1] random variable
In this case we have

$$
    f(x) = 
    \begin{cases}
    1 & \text{if } 0 \leq x \leq 1 \\
    0 & \text{otherwise}
    \end{cases}
$$

Also, for $x \in [0,1]$ we have

$$
    F(x) = \int_{-\infty}^x f(v) dv = \int_0^x dv = x
$$

<table style="width:95%">
  <tr>
    <th><img src="https://upload.wikimedia.org/wikipedia/commons/thumb/9/96/Uniform_Distribution_PDF_SVG.svg/500px-Uniform_Distribution_PDF_SVG.svg.png" alt="500px-Uniform_Distribution_PDF_SVG.svg.png" width=250></th>
    <th><img src="https://upload.wikimedia.org/wikipedia/commons/thumb/6/63/Uniform_cdf.svg/500px-Uniform_cdf.svg.png" alt="wikipedia image 500px-Uniform_cdf.svg.png" width=250></th> 
  </tr>
</table>

In [None]:
import matplotlib.pyplot as plt

In [None]:
x = np.random.uniform(0,1,size=1000000)

In [None]:
plt.hist(x,density=True)

In [None]:
from Utils import makeEDF,makeEMF
plotEMF(makeEMF(np.random.uniform(size=100)))

In [None]:
import numpy as np
from Utils import makeEDF,makeEMF,plotEDF
plotEDF(makeEMF(np.random.uniform(size=100)))

## The Gaussian random variable (Normal)
In this case we have
$$
    f(x) = \frac{1}{\sigma \sqrt{2\pi}} e^{-\frac{1}{2} \left ( \frac{x-\mu}{\sigma}\right )}
$$
here we have two parameters, the mean $\mu$ and the standard deviation $\sigma$.

In [None]:
np.random.normal(size=10)

In [None]:
_=plt.hist(np.random.normal(size=100000),bins=200)