In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
import math

In [2]:
np.random.seed(0)

In [3]:
mu = 170
sd = 7

In [4]:
# Generate samples from our distribution
x = norm.rvs(loc=mu, scale=sd, size=100)

In [5]:
# Maximum likelihood mean using mean()
x.mean()

170.41865610874137

In [6]:
# Maximum calc of mean likelihood as average of all values
sum(x) / len(x)

170.4186561087414

In [7]:
# Maximum likelihood variance
x.var()

49.77550434153163

In [8]:
# Manual calc of variance
((x - x.mean())**2).mean()

49.77550434153163

In [9]:
# Maximum likelihood std
x.std()

7.055175713016057

In [10]:
# Same as sqrt of previous result
math.sqrt(x.var())

7.055175713016057

In [11]:
# Unbiased variance
# ddof - delta degrees of freedom
# Here ddof = 1 is saying use (n-1) instead of n
x.var(ddof=1)

50.278287213668314

In [12]:
# This is worse than the Max Liklihood estimate

In [13]:
((x - x.mean())**2).sum() / (len(x) - 1)

50.278287213668314

In [14]:
# Unbiased std
x.std(ddof=1)

7.090718384879512

In [15]:
# At what height are you in the 95th percentile
norm.ppf(0.95, loc=mu, scale=sd)

181.5139753886603

### Get a percentile given a datapoint

In [16]:
# You are 160 cm tall. What percentile are you in?
norm.cdf(160, loc=mu, scale=sd) * 100

7.656372550983476

### Get a probability of being over/under a datapoint

In [17]:
# You are 180 cm tall. What is the probability that someone is taller than you?
(1 - norm.cdf(180, loc=mu, scale=sd)) * 100

7.656372550983481

In [18]:
# You are 170 cm tall. What is the probability that someone is taller than you?
(1 - norm.cdf(170, loc=mu, scale=sd)) * 100

50.0

In [19]:
# 1 - cdf is usually called the survival function
# Can be called directly with norm.sf

In [20]:
(norm.sf(180, loc=mu, scale=sd)) * 100

7.656372550983476