In [3]:
from datascience import *
import numpy as np
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

In [4]:
# np.array(list) converts list to an array
# provided all the elements of list are of the same type

n = 100
second = round(n * 0.6)
third = round(n * 0.4)

year = np.array(['Second'] * second + ['Third'] * third)
major = np.array(['Declared'] * (round(second * 0.5)) + ['Undeclared'] * (round(second * 0.5)) + \
                 ['Declared'] * (round(third * 0.8))  + ['Undeclared'] * (round(third * 0.2)))
                 
students = Table().with_columns(
    'Year', year,
    'Major', major
)

In [5]:
def create_population(prior_disease_prob, n):
    disease = round(n * prior_disease_prob)
    no_disease = round(n * (1 - prior_disease_prob))

    status = np.array(['Disease'] * disease  +  ['No disease'] * no_disease)
    result = np.array(['Test +'] * (disease) + ['Test +'] * (round(no_disease * 0.05))  + \
                 ['Test -'] * (round(no_disease * 0.95)))
                 
    t = Table().with_columns(
    'Status', status,
    'Test Result', result
    )
    return t.pivot('Test Result', 'Status')

## More Likely Than Not ##

In [6]:
students.show(3)

Year,Major
Second,Declared
Second,Declared
Second,Declared


In [7]:
students.pivot('Major', 'Year')

Year,Declared,Undeclared
Second,30,30
Third,32,8


In [14]:
# Verify: 60% of students are Second years, 40% are Third years
60 / (60 + 40)

0.6

In [15]:
# Verify: 50% of Second years have Declared
30 / 60

0.5

In [16]:
# Verify: 80% of Third years have Declared
32 / 40

0.8

In [17]:
# Chance of third year, given that they have declared
# P(third year | declared)

32 / 62

0.5161290322580645

In [18]:
# P(second year | declared)

30 / 62

0.4838709677419355

## Tree Diagram Calculation

In [19]:
# P(second year | declared), from tree diagram

(0.6 * 0.5) / (0.6 * 0.5 + 0.4 * 0.8)

0.4838709677419354

## Decisions ##

In [8]:
create_population(1/1000, 10000)

Status,Test +,Test -
Disease,10,0
No disease,500,9490


In [9]:
10 / 510

0.0196078431372549

But what if we have someone not from the general population, but someone who has symptoms? We can update the prior probabilities.

In [10]:
# P(disease | tested +)
# if prior probability of disease is 1/10

(0.1 * 1) / (0.1*1 + 0.9*0.05)

0.689655172413793

In [11]:
create_population(1/10, 10000)

Status,Test +,Test -
Disease,1000,0
No disease,450,8550


In [12]:
# P(disease | tested +)
# if prior probability of disease is 0.5

(0.5 * 1) / (0.5*1 + 0.5*0.05)

0.9523809523809523

In [13]:
create_population(0.5, 10000)

Status,Test +,Test -
Disease,5000,0
No disease,250,4750
