<a href="https://colab.research.google.com/github/jasondupree/jasondupree.github.io/blob/main/distilled_statistics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [71]:
import pandas as pd
import numpy as np
from fractions import Fraction
from numpy import sqrt
import math
import scipy as sp

### Independent Events

In [72]:
### When to Use:
# When you have two events happening that:
  # Are NOT mutually exclusive, and
  # Do not impact one another.

# P(A and B) = P(A) * P(B)

# Variables
a = 1/4 # Probability of it Raining
b = 1/6 # Probability of Getting a 1 on a Die

def indep_event(a,b):
  indep_prob = round(a * b,4)
  print('Independent Probability:', indep_prob)
  return

In [73]:
indep_event(a,b)

Independent Probability: 0.0417


### Dependent Events

In [None]:
### When to Use:
# When you have two events happening that:
  # Are NOT mutually exclusive, and
  # Do not impact one another.

# P(A and B) = P(A) * P(B)

# Variables
a = 1/4 # Probability of it Raining
b = 1/6 # Probability of Getting a 1 on a Die

def dep_event(a,b):
  dep_prob = round(a * b,4)
  print('Dependent Probability:', dep_prob)
  return

In [None]:
dep_event(a,b)

Independent Probability: 0.0417


### Conditional Probability

In [29]:
### When to Use:
# When you have two events happening that:
  # Are NOT mutually exclusive, and
  # DO impact one another.

# P(A|B) = P(A AND B) / P(B)

# Note that if A and B are independent, it doesn't matter that B occurred and the formula above becomes P(A|B) = P(A). Or, inversely, P(B|A) = P(B)

# Variables
a = 1/4 # Probability of it Raining
b = 1/6 # Probability of Getting Wet

def cond_prob(a,b):
  cond_prob = round(((a * b) / b),4)
  print('Conditional Probability:', cond_prob)
  return

In [30]:
cond_prob(a,b)

Conditional Probability: 0.25


### Standardized Data
Standardize for Normal Distribution

In [77]:
# Variables
x = 67 #
μ = 66 #
σ = 2 #

def standardize(x,μ,σ):
  # mean = 0
  # std = 1
  # Z-score = (x - μ) / σ // data point x minus mean divided by sigma
  # Z-score tells you have many standard deviations the data point is away from the mean
  z_score = (x - μ) / σ
  print('Z-score:', z_score)
  p_value = sp.special.ndtr(z_score)
  print('P-value:', p_value) # Means there is a 69.15% chance of having a z-score less than 0.5.
  return

In [78]:
standardize(x,μ,σ)

Z-score: 0.5
P-value: 0.6914624612740131


In [None]:
### When to Use:
# When you have two events happening that:
  # Are NOT mutually exclusive, and
  # DO impact one another.

# P(A|B) = P(A AND B) / P(B)

# Note that if A and B are independent, it doesn't matter that B occurred and the formula above becomes P(A|B) = P(A). Or, inversely, P(B|A) = P(B)

# Variables
a = 1/4 # Probability of it Raining
b = 1/6 # Probability of Getting Wet

def cond_prob(a,b):
  cond_prob = round(((a * b) / b),4)
  print('Conditional Probability:', cond_prob)
  return

In [None]:
cond_prob(a,b)

Conditional Probability: 0.25


### Normal Distribution

#### What's the prob. of getting free pizza?

In [104]:
### When to Use:
# Heights, weights, etc.


# Example:

# Your local pizza shop claims their large is at least 16 in. or it's free.
# Their pizza is normally distributed with μ = 16.3 in. and σ = 0.2 in.


# Variables
x = 16 #
μ = 16.3 #
σ = 0.2 #

def standardize(x,μ,σ):
  # mean = 0
  # std = 1
  # Z-score = (x - μ) / σ // data point x minus mean divided by sigma
  # Z-score tells you have many standard deviations the data point is away from the mean
  z_score = (x - μ) / σ
  print('Z-score:', z_score)
  p_value = sp.special.ndtr(z_score) # Means there's a z-score chance of getting a pizza smaller than 16 in. of -1.5
  print('P-value:', p_value) # Means there is a 6.68% chance of hgetting a free pizza.
  return

In [105]:
standardize(x,μ,σ)

Z-score: -1.5000000000000036
P-value: 0.0668072012688576


#### What's the prob. of getting a pizza over 16.5 in?

In [106]:
# Example:

# Your local pizza shop claims their large is at least 16 in. or it's free.
# Their pizza is normally distributed with μ = 16.3 in. and σ = 0.2 in.


# Variables
x = 16.5 # Since "over": Final Solution = 1 - p_value
μ = 16.3 #
σ = 0.2 #

def standardize(x,μ,σ):
  z_score = (x - μ) / σ
  print('Z-score:', z_score)
  p_value = sp.special.ndtr(z_score)
  p_value = 1-p_value # Subtracting here because we want area to the right of z-score.
  print('P-value:', p_value)
  return

In [107]:
standardize(x,μ,σ)

Z-score: 0.9999999999999964
P-value: 0.15865525393145785


#### What's the prob. of getting pizza between 15.95 and 16.63?

In [108]:
# Example:

# Your local pizza shop claims their large is at least 16 in. or it's free.
# Their pizza is normally distributed with μ = 16.3 in. and σ = 0.2 in.


# Variables
x1 = 15.95 #
x2 = 16.63 #
μ = 16.3 #
σ = 0.2 #

def standardize(x1,x2,μ,σ):
  # x1 Section
  z_score_1 = (x1 - μ) / σ
  print('Z-score_1:', z_score_1)
  p_value_1 = sp.special.ndtr(z_score_1)
  print('P-value_1:', p_value_1)
  # x2 Section
  z_score_2 = (x2 - μ) / σ
  print('Z-score_2:', z_score_2)
  p_value_2 = sp.special.ndtr(z_score_2)
  print('P-value_2:', p_value_2)
  p_value = round(p_value_2 - p_value_1,4)
  print('Range:', p_value)
  return

In [109]:
standardize(x1,x2,μ,σ)

Z-score_1: -1.750000000000007
P-value_1: 0.040059156863816475
Z-score_2: 1.6499999999999915
P-value_2: 0.950528531966351
Range: 0.9105


### Binominal Distribution

In [None]:
### When to Use:
# Remember BINS for Binomial Distribution
# *   B - Binary Outcomes
# *   I - Independent Trials
# *   N - Number of Trials
# *   S = Same Probability per Trial

# Variables
n = 4 # Four Dice Rolls
p = 1/6 # Probability of Getting a 1
x = 2 # Objective of Rolling 1 Twice
q = round(1 - p,2) # Complement (Probability of Failure)

def binom_dist(p,x,n,q):
  # Mean
  mean = round(n*p,2)
  print('Mean:', mean)
  # Variance
  var = n*p*(1-p)
  std = sqrt(n*p*(1-p))
  print('Variance:', var)
  print('Standard Deviation: ', std)
  # Factorial
  fact_num = math.factorial(n)
  fact_denom = math.factorial(n-x) * math.factorial(x)
  fraction = fact_num/fact_denom
  print('Factorial Numerator:', fact_num)
  print('Factorial Denominator:', fact_denom)
  print('n choose x:', fact_num/fact_denom)
  # Probability
  prob = round((p**x) * (q**(n-x)),4)
  print('Probability:', prob)
  # Binomial Distribution
  binom_dist = fraction * prob
  print('Binomial Distribution:', binom_dist)
  return

In [None]:
binom_dist(p,x,n,q)

Mean: 0.67
Variance: 0.5555555555555556
Standard Deviation:  0.7453559924999299
Factorial Numerator: 24
Factorial Denominator: 4
n choose x: 6.0
Probability: 0.0191
Binomial Distribution: 0.1146


### Poisson Distribution

In [None]:
### When to Use:
# Probability of x event happening over a given time period.
# Example: Number of raindrops over x minutes.
# Example: Probability of two car parts failing over next 50,000 miles.

# Variables
n = 4 # Four Dice Rolls
p = 1/6 # Probability of Getting a 1
x = 2 # Objective of Rolling 1 Twice
q = round(1 - p,2) # Complement (Probability of Failure)

def binom_dist(p,x,n,q):
  # # Mean
  # mean = round(n*p,2)
  # print('Mean:', mean)
  # # Variance
  # var = n*p*(1-p)
  # std = sqrt(n*p*(1-p))
  # print('Variance:', var)
  # print('Standard Deviation: ', std)
  # # Factorial
  # fact_num = math.factorial(n)
  # fact_denom = math.factorial(n-x) * math.factorial(x)
  # fraction = fact_num/fact_denom
  # print('Factorial Numerator:', fact_num)
  # print('Factorial Denominator:', fact_denom)
  # print('n choose x:', fact_num/fact_denom)
  # # Probability
  # prob = round((p**x) * (q**(n-x)),4)
  # print('Probability:', prob)
  # # Binomial Distribution
  # binom_dist = fraction * prob
  # print('Binomial Distribution:', binom_dist)
  return

In [19]:
poisson_dist(p,x,n,q)

Mean: 0.67
Variance: 0.5555555555555556
Standard Deviation:  0.7453559924999299
Factorial Numerator: 24
Factorial Denominator: 4
n choose x: 6.0
Probability: 0.0191
Binomial Distribution: 0.1146


### Uniform Distribution

In [39]:
### When to Use:


# Characterized as having a constant probability within a Domain
# Additionally, due to distribution symmetry, the mean = median.

# Example: Bus is uniformly late between 2 and 10 minutes.
# How long can you expect to wait?
# With what standard deviation?
# If it's > 7 mins late, you'll be late for work.
# What's the probability of you being later for work?

# Implicit quesiton - what's the probability of the bus being > 7 mins late?
# P(7 <= X <= 10)) =

# Variables
a = 2 # lowerbound threshold
b = 10 # upperbound threshold
c = 7 # Bus late lowerbound threshhold
d = 10 # Bus late upperbound threshold


def uniform_dist(a,b,c,d):
  # Mean
  mean = round((a+b)/2,2)
  print('Mean:', mean)
  # Variance + Standard Deviation
  var = round(((b-a)**2)/12,4)
  std = round(sqrt(((b-a)**2)/12),4)
  print('Variance:', var)
  print('Standard Deviation: ', std)
  # Probability Density Function
  prob_dens = 1/(b-a)
  print('Probability Density Function: ', prob_dens)
  # Conditional Probability
  prob = round(((d-c)/(b-a)),4)
  print('Probability:', prob)
  return

In [40]:
uniform_dist(a,b,c,d)

Mean: 6.0
Variance: 5.3333
Standard Deviation:  2.3094
Probability Density Function:  0.125
Probability: 0.375


### Geometric Distribution

In [52]:
### When to Use:


# Example: Coming home from work, you always seem to hit every light.
# You calculate the odds of making it through a light to be 0.2.
# How many lights can you expect to hit before making it through one?
# With what standard deviation?
# What's the probability of the 3rd light being the first one that's green?

# Variables
p = 0.2 # Probability of Green Light
q = round(1 - p,2) # Complement (Probability of Red Light)
x = 3 # Probability of first Green Light on Third Light

def geo_dist(p,q,x):
  # Mean
  mean = round(1/p,2)
  print('Mean:', mean)
  # Variance
  var = round((1-p)/p,4)
  std = round((sqrt(1-p))/p,4)
  print('Variance:', var)
  print('Standard Deviation: ', std)
  # Probability
  prob = round(q**(x-1) * p,4)
  print('Probability:', prob)
  return

In [53]:
geo_dist(p,q,x)

Mean: 5.0
Variance: 4.0
Standard Deviation:  4.4721
Probability: 0.128


### Bayes' Theorem

In [111]:
### Equation: P(A|B) = (P(B|A) * P(A)) / P(B)

# When to Use: When you have some additional information and you'd like to refine your assessment.
# Look for fact patterns where you have the inverse given to you -i.e., you're given P(B|A) and you need P(A|B)

# Example:

# You've been planning a family picnic. You're trying to decide whether to postpone due to rain.
# The chance of rain on any day is 15%.
# The morning of the picnic, it's cloudy.
# The prob. of it being cloudy is 25% and on days where it rains, it's cloudy in the morning 80% of the time.
# Should you postpone the picnic?


# Variables
a = 0.15 # P(rain)
b = 0.25 # P(cloudy)
c = 0.80 # P(cloudy|rain)

# P(rain|cloudy) = (P(cloudy|rain) * P(rain)) / P(cloudy)


def bayes_theorem(a,b,c):
  # bayes
  bayes_theorem = round((c * a) / b,4)
  print('Bayes Theorem:', bayes_theorem)
  return

In [112]:
bayes_theorem(a,b,c)

Bayes Theorem: 0.48
