# Conditional Probability

In [None]:
import pandas as pd

In [None]:
# Create the dataframe
df = pd.DataFrame({'company': ['ford', 'chevy', 'ford', 'ford', 'ford', 'toyota'],
                     'model': ['mustang', 'camaro', 'fiesta', 'focus', 'taurus', 'camry'],
                     'rating': ['A', 'B', 'C', 'A', 'B', 'B'],
                     'type': ['coupe', 'coupe', 'sedan', 'sedan', 'sedan', 'sedan']})
df

In [None]:
#Find the probability of ratings A, B, C
print(df.groupby('rating').count()['model'])
print("-"*40)
print(df.groupby('rating').count()['model'] / len(df))

Prob(rating=A) = 0.333333 

Prob(rating=B) = 0.500000 

Prob(rating=C) = 0.166667 

In [None]:
 (df.groupby(['rating', 'type']).count() / df.groupby('rating').count())['model']

Prob(type=coupe|rating=A) = 0.500000 

Prob(type=sedan|rating=A) = 0.500000 

Prob(type=coupe|rating=B) = 0.333333 

Prob(type=sedan|rating=B) = 0.666667 

Prob(type=coupe|rating=C) = 0.000000 

Prob(type=sedan|rating=C) = 1.000000 

## Bayes Theorem

In [None]:
# calculate P(A|B) given P(B|A), P(A) and P(B)
def bayes_theorem(p_a, p_b, p_b_given_a):
    # calculate P(A|B) = P(B|A) * P(A) / P(B)
    p_a_given_b = (p_b_given_a * p_a) / p_b
    return p_a_given_b

Consider the case where an elderly person (over 80 years of age) falls; what is the probability that they will die from the fall?

Let’s assume that the base rate of someone elderly dying P(A) is 10%, and the base rate for elderly people falling P(B) is 5%, and from all elderly people, 7% of those that die had a fall P(B|A).

Let’s plug what we know into the theorem:

P(A|B) = P(B|A) * P(A) / P(B)
P(Die|Fall) = P(Fall|Die) * P(Die) / P(Fall)
or

P(Die|Fall) = 0.07 * 0.10 / 0.05
P(Die|Fall) = 0.14
That is, if an elderly person falls, then there is a 14 percent probability that they will die from the fall.

In [None]:
# P(A)
p_a = 0.1

# P(B)
p_b = 0.05

# P(B|A)
p_b_given_a = 0.07

# calculate P(A|B)
result = bayes_theorem(p_a, p_b, p_b_given_a)

# summarize
print('P(A|B) = %.3f%%' % (result * 100))

# Probability Distribution

## Uniform Distribution

In [None]:
%config IPCompleter.greedy=True

In [None]:
# import matplotlib
import matplotlib.pyplot as plt
# for inline plots in jupyter
%matplotlib inline
plt.rcParams["figure.figsize"] = (17,7)

# import seaborn
import seaborn as sns

# import uniform distribution
from scipy.stats import uniform

In [None]:
# random numbers from uniform distribution
n = 1000
start = 10
width = 20
uniform_data = uniform.rvs(size=n, loc = start, scale=width)

ax = sns.distplot(uniform_data,
                  bins=100,
                  kde=True,
                  color='blue',
                  hist_kws={"linewidth": 10,'alpha':1})
ax.set(xlabel='Uniform Distribution ', ylabel='Frequency')

# NEXT: Show how the plot changes by varying parameters (start, width)
# Encourage students to read help on these functions

## Poisson Distribution

In [None]:
from scipy.stats import poisson
poisson_data = poisson.rvs(mu=3, size=1000)

ax = sns.distplot(poisson_data,
                  bins=30,
                  kde=False,
                  color='red',
                  hist_kws={"linewidth": 10,'alpha':1})
ax.set(xlabel='Poisson Distribution', ylabel='Frequency')

# NEXT: Show how the plot changes by varying parameters (mu) of the poisson_data
# Encourage students to read help on these functions

## Binomial Distribution

In [None]:
from scipy.stats import binom
binom_data = binom.rvs(n=10,p=0.8,size=1000)

ax = sns.distplot(binom_data,
                  kde=False,
                  color='grey',
                  hist_kws={"linewidth": 10,'alpha':1})
ax.set(xlabel='Binomial Distribution', ylabel='Frequency')

# NEXT: Show how the plot changes by varying parameters (n, p)
# Encourage students to read help on these functions

## Normal Distribution

In [None]:
from scipy.stats import norm
# generate random numbers from N(0,1)
normal_data = norm.rvs(size=1000,loc=0,scale=1)

ax = sns.distplot(normal_data,
                  bins=100,
                  kde=True,
                  color='green',
                  hist_kws={"linewidth": 10,'alpha':1})
ax.set(xlabel='Normal Distribution', ylabel='Frequency')

# NEXT: Show how the plot changes by varying parameters
# Encourage students to read help on these functions