In [1]:
from datascience import *
import numpy as np
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)  

## Years and Majors

In [2]:
# np.array(list) converts list to an array
# provided all the elements of list are of the same type

n = 100
second = round(n * 0.6)
third = n - round(n * 0.6)

year = np.array(
    np.sum([
        ['Second'] * second,
        ['Third'] * third
    ]),
    dtype=object
)

major = np.array(
    np.sum([
        ['Declared'] * round(second * 0.5),
        ['Undeclared'] * (second - round(second * 0.5)),
        ['Declared'] * round(third * 0.8),
        ['Undeclared'] * (third  - round(third * 0.8))
    ]),
    dtype=object
)

students = Table().with_columns(
    'Year', year,
    'Major', major
)

students

Year,Major
Second,Declared
Second,Declared
Second,Declared
Second,Declared
Second,Declared
Second,Declared
Second,Declared
Second,Declared
Second,Declared
Second,Declared


In [3]:
students.group('Year')

Year,count
Second,60
Third,40


In [4]:
students.pivot('Year', 'Major')

Major,Second,Third
Declared,30,32
Undeclared,30,8


In [5]:
# I pick one student at random...That student has declared a major!
# Second Year or Third Year?

# Probablity of third year
32 / (30 + 32)

0.5161290322580645

In [6]:
# Probability of second year
30 / (30 + 32)

0.4838709677419355

## Tree Diagram Calculation

In [7]:
# P(third year | declared), from tree diagram

(0.4 * 0.8) / (0.6 * 0.5 + 0.4 * 0.8)

0.5161290322580645

In [8]:
# P(second year | declared), from tree diagram

(0.6 * 0.5) / (0.6 * 0.5 + 0.4 * 0.8)

0.4838709677419354

## Monty Hall

In [9]:
# P(win car | switched), from tree diagram

(2/3 * 1/2) / (2/3 * 1/2 + 1/3 * 1/2)

0.6666666666666666

## Disease Decisions

In [10]:
# P(disease | tested +)
# if prior probability of disease is 1/10

(0.001 * 1) / (0.001*1 + 0.999*0.05)

0.019627085377821395

In [11]:
def create_population(prior_disease_prob, n):
    
    disease = round(n * prior_disease_prob)
    no_disease = n - round(n * prior_disease_prob)

    status = np.array(
        np.sum([
            ['Disease'] * disease,
            ['No disease'] * no_disease
        ]),
        dtype=object
    )
        
    result = np.array(
        np.sum([
            ['Test +'] * disease,
            ['Test +'] * round(no_disease * 0.05),
            ['Test -'] * (no_disease - round(no_disease * 0.05))
        ]),
        dtype=object
    )
                 
    t = Table().with_columns(
    'Status', status,
    'Test Result', result
    )
    return t

In [12]:
create_population(1/1000, 10000)

Status,Test Result
Disease,Test +
Disease,Test +
Disease,Test +
Disease,Test +
Disease,Test +
Disease,Test +
Disease,Test +
Disease,Test +
Disease,Test +
Disease,Test +


In [13]:
create_population(1/1000, 10000).pivot('Test Result', 'Status')

Status,Test +,Test -
Disease,10,0
No disease,500,9490


## More Common Disease

In [14]:
# P(disease | tested +)
# if prior probability of disease is 1/10

(0.1 * 1) / (0.1*1 + 0.9*0.05)

0.689655172413793

In [15]:
create_population(1/10, 10000).pivot('Test Result', 'Status')

Status,Test +,Test -
Disease,1000,0
No disease,450,8550


In [16]:
# P(disease | tested +)
# if prior probability of disease is 1/10

1000 / (1000 + 450)

0.6896551724137931