## Probability Distributions

In [1]:
# imports
import numpy as np
import scipy.stats as stats

### Random Variables

In [2]:
die_6 = range(1,7)
num_rolls = 5
rolls = np.random.choice(die_6, size=num_rolls, replace=True)
rolls

array([2, 6, 6, 1, 3])

### Mass Functions
- x: the value of interest
- n: the number of trials
- p: the probability of succes

In [3]:
# value of interest
x = 3
# sample size
n = 10

prob_1 = stats.binom.pmf(x, n, 0.5)
prob_1

0.11718750000000004

In [4]:
# probability of observing between 4 to 6 heads from 10 coin flips
prob_2 = stats.binom.pmf(4,n=10, p=.5) + stats.binom.pmf(5,n=10, p=.5) + stats.binom.pmf(6,n=10, p=.5)
print(prob_2)

0.6562500000000002


In [5]:
# probability of observing more than 2 heads from 10 coin flips
prob_3 = 1 - (stats.binom.pmf(0, n=10, p=0.5) + stats.binom.pmf(1, n=10, p=0.5) + stats.binom.pmf(2, n=10, p=0.5))
prob_3


0.9453124999999999

### Cumulative Distribution
we wanted to know the probability from 3 to 6, which includes 3. Mathematically, this looks like the following equation:

- P(X≤6) = CDF(X=6) = 0.83
- P(X≤2) = CDF(X=2) = 0.05

- P(X≤6) - P(X≤2) = P(3≤X≤6)
- CDF(X=6) - CDF(X=2) = P(3≤X≤6)
- 0.83 - 0.05 = 0.78

In [6]:
# 6 or fewer heads from 10 fair coin flips
stats.binom.cdf(6, 10, 0.5)

0.828125

In [7]:
# P(4 to 8 Heads) = P(0 to 8 Heads) − P(0 to 3 Heads)
stats.binom.cdf(8,10,0.5) - stats.binom.cdf(3,10,0.5)

0.8173828125

In [8]:
# more than 6 heads from 10 fair coin flips (Note that “more than 6 heads” does not include 6.)
1 - stats.binom.cdf(6, 10, 0.5)

0.171875

###  Density Functions
- x: the value of interest
- loc: the mean of the probability distribution
- scale: the standard deviation of the probability distribution

In [9]:
# the probability that a randomly chosen woman is less than 175 cm tall.
# stats.norm.cdf(x, loc, scale)
stats.norm.cdf(175, 167.64, 8)

0.8212136203856288

#### Probability Density Functions and Cumulative Distribution Function

In [10]:
# The weather in the Galapagos islands follows a Normal distribution with a mean of 20 degrees Celcius and a standard deviation of 3 degrees.
degrees_mean = 20
degrees_std = 3

# probability that the weather on a randomly selected day will be between 18 to 25 degrees Celcius 
stats.norm.cdf(25,degrees_mean,degrees_std) - stats.norm.cdf(18,degrees_mean, degrees_std)

0.6997171101802624

In [11]:
# probability that the weather on a randomly selected day will be greater than 24 degrees Celsius
1 - stats.norm.cdf(24,degrees_mean,degrees_std)

0.09121121972586788

### Poisson Distribution