<a href="https://colab.research.google.com/github/jasondupree/jasondupree.github.io/blob/main/distilled_statistics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import pandas as pd
import numpy as np
from fractions import Fraction
from numpy import sqrt
import math

### Independent Events

In [27]:
### When to Use:
# When you have two events happening that:
  # Are NOT mutually exclusive, and
  # Do not impact one another.

# P(A and B) = P(A) * P(B)

# Variables
a = 1/4 # Probability of it Raining
b = 1/6 # Probability of Getting a 1 on a Die

def indep_event(a,b):
  indep_prob = round(a * b,4)
  print('Independent Probability:', indep_prob)
  return

In [28]:
indep_event(a,b)

Independent Probability: 0.0417


### Dependent Events

In [None]:
### When to Use:
# When you have two events happening that:
  # Are NOT mutually exclusive, and
  # Do not impact one another.

# P(A and B) = P(A) * P(B)

# Variables
a = 1/4 # Probability of it Raining
b = 1/6 # Probability of Getting a 1 on a Die

def dep_event(a,b):
  dep_prob = round(a * b,4)
  print('Dependent Probability:', dep_prob)
  return

In [None]:
dep_event(a,b)

Independent Probability: 0.0417


### Conditional Probability

In [29]:
### When to Use:
# When you have two events happening that:
  # Are NOT mutually exclusive, and
  # DO impact one another.

# P(A|B) = P(A AND B) / P(B)

# Note that if A and B are independent, it doesn't matter that B occurred and the formula above becomes P(A|B) = P(A). Or, inversely, P(B|A) = P(B)

# Variables
a = 1/4 # Probability of it Raining
b = 1/6 # Probability of Getting Wet

def cond_prob(a,b):
  cond_prob = round(((a * b) / b),4)
  print('Conditional Probability:', cond_prob)
  return

In [30]:
cond_prob(a,b)

Conditional Probability: 0.25


### Normal Distribution

In [23]:
### When to Use:


# Variables
n = 4 # Four Dice Rolls
p = 1/6 # Probability of Getting a 1
x = 2 # Objective of Rolling 1 Twice
q = round(1 - p,2) # Complement (Probability of Failure)

def normal_dist(p,x,n,q):
  # # Mean
  # mean = round(n*p,2)
  # print('Mean:', mean)
  # # Variance
  # var = n*p*(1-p)
  # std = sqrt(n*p*(1-p))
  # print('Variance:', var)
  # print('Standard Deviation: ', std)
  # # Factorial
  # fact_num = math.factorial(n)
  # fact_denom = math.factorial(n-x) * math.factorial(x)
  # fraction = fact_num/fact_denom
  # print('Factorial Numerator:', fact_num)
  # print('Factorial Denominator:', fact_denom)
  # print('n choose x:', fact_num/fact_denom)
  # # Probability
  # prob = round((p**x) * (q**(n-x)),4)
  # print('Probability:', prob)
  # # Binomial Distribution
  # binom_dist = fraction * prob
  # print('Binomial Distribution:', binom_dist)
  return

In [18]:
normal_dist(p,x,n,q)

Mean: 0.67
Variance: 0.5555555555555556
Standard Deviation:  0.7453559924999299
Factorial Numerator: 24
Factorial Denominator: 4
n choose x: 6.0
Probability: 0.0191
Binomial Distribution: 0.1146


### Binominal Distribution

In [None]:
### When to Use:
# Remember BINS for Binomial Distribution
# *   B - Binary Outcomes
# *   I - Independent Trials
# *   N - Number of Trials
# *   S = Same Probability per Trial

# Variables
n = 4 # Four Dice Rolls
p = 1/6 # Probability of Getting a 1
x = 2 # Objective of Rolling 1 Twice
q = round(1 - p,2) # Complement (Probability of Failure)

def binom_dist(p,x,n,q):
  # Mean
  mean = round(n*p,2)
  print('Mean:', mean)
  # Variance
  var = n*p*(1-p)
  std = sqrt(n*p*(1-p))
  print('Variance:', var)
  print('Standard Deviation: ', std)
  # Factorial
  fact_num = math.factorial(n)
  fact_denom = math.factorial(n-x) * math.factorial(x)
  fraction = fact_num/fact_denom
  print('Factorial Numerator:', fact_num)
  print('Factorial Denominator:', fact_denom)
  print('n choose x:', fact_num/fact_denom)
  # Probability
  prob = round((p**x) * (q**(n-x)),4)
  print('Probability:', prob)
  # Binomial Distribution
  binom_dist = fraction * prob
  print('Binomial Distribution:', binom_dist)
  return

In [None]:
binom_dist(p,x,n,q)

Mean: 0.67
Variance: 0.5555555555555556
Standard Deviation:  0.7453559924999299
Factorial Numerator: 24
Factorial Denominator: 4
n choose x: 6.0
Probability: 0.0191
Binomial Distribution: 0.1146


### Poisson Distribution

In [None]:
### When to Use:
# Probability of x event happening over a given time period.
# Example: Number of raindrops over x minutes.
# Example: Probability of two car parts failing over next 50,000 miles.

# Variables
n = 4 # Four Dice Rolls
p = 1/6 # Probability of Getting a 1
x = 2 # Objective of Rolling 1 Twice
q = round(1 - p,2) # Complement (Probability of Failure)

def binom_dist(p,x,n,q):
  # # Mean
  # mean = round(n*p,2)
  # print('Mean:', mean)
  # # Variance
  # var = n*p*(1-p)
  # std = sqrt(n*p*(1-p))
  # print('Variance:', var)
  # print('Standard Deviation: ', std)
  # # Factorial
  # fact_num = math.factorial(n)
  # fact_denom = math.factorial(n-x) * math.factorial(x)
  # fraction = fact_num/fact_denom
  # print('Factorial Numerator:', fact_num)
  # print('Factorial Denominator:', fact_denom)
  # print('n choose x:', fact_num/fact_denom)
  # # Probability
  # prob = round((p**x) * (q**(n-x)),4)
  # print('Probability:', prob)
  # # Binomial Distribution
  # binom_dist = fraction * prob
  # print('Binomial Distribution:', binom_dist)
  return

In [19]:
poisson_dist(p,x,n,q)

Mean: 0.67
Variance: 0.5555555555555556
Standard Deviation:  0.7453559924999299
Factorial Numerator: 24
Factorial Denominator: 4
n choose x: 6.0
Probability: 0.0191
Binomial Distribution: 0.1146


### Uniform Distribution

In [39]:
### When to Use:


# Characterized as having a constant probability within a Domain
# Additionally, due to distribution symmetry, the mean = median.

# Example: Bus is uniformly late between 2 and 10 minutes.
# How long can you expect to wait?
# With what standard deviation?
# If it's > 7 mins late, you'll be late for work.
# What's the probability of you being later for work?

# Implicit quesiton - what's the probability of the bus being > 7 mins late?
# P(7 <= X <= 10)) =

# Variables
a = 2 # lowerbound threshold
b = 10 # upperbound threshold
c = 7 # Bus late lowerbound threshhold
d = 10 # Bus late upperbound threshold


def uniform_dist(a,b,c,d):
  # Mean
  mean = round((a+b)/2,2)
  print('Mean:', mean)
  # Variance + Standard Deviation
  var = round(((b-a)**2)/12,4)
  std = round(sqrt(((b-a)**2)/12),4)
  print('Variance:', var)
  print('Standard Deviation: ', std)
  # Probability Density Function
  prob_dens = 1/(b-a)
  print('Probability Density Function: ', prob_dens)
  # Conditional Probability
  prob = round(((d-c)/(b-a)),4)
  print('Probability:', prob)
  return

In [40]:
uniform_dist(a,b,c,d)

Mean: 6.0
Variance: 5.3333
Standard Deviation:  2.3094
Probability Density Function:  0.125
Probability: 0.375


### Bayes' Theorem

In [None]:
### When to Use:



# Variables
n = 4 # Four Dice Rolls
p = 1/6 # Probability of Getting a 1
x = 2 # Objective of Rolling 1 Twice
q = round(1 - p,2) # Complement (Probability of Failure)

def bayes_theorem(p,x,n,q):
  # # Mean
  # mean = round(n*p,2)
  # print('Mean:', mean)
  # # Variance
  # var = n*p*(1-p)
  # std = sqrt(n*p*(1-p))
  # print('Variance:', var)
  # print('Standard Deviation: ', std)
  # # Factorial
  # fact_num = math.factorial(n)
  # fact_denom = math.factorial(n-x) * math.factorial(x)
  # fraction = fact_num/fact_denom
  # print('Factorial Numerator:', fact_num)
  # print('Factorial Denominator:', fact_denom)
  # print('n choose x:', fact_num/fact_denom)
  # # Probability
  # prob = round((p**x) * (q**(n-x)),4)
  # print('Probability:', prob)
  # # Binomial Distribution
  # binom_dist = fraction * prob
  # print('Binomial Distribution:', binom_dist)
  return

In [None]:
bayes_theorem(p,x,n,q)