# Coin Flips and Die Rolls

In [1]:
# import numpy
import numpy as np

### 1. Two fair coin flips produce exactly two heads

In [2]:
# simulate 1 million tests of two fair coin flips
tests = np.random.randint(2, size=(int(1e6), 2))

# sums of all tests
test_sums = tests.sum(axis=1)

# proportion of tests that produced exactly two heads
(test_sums == 0).mean()

0.249632

### 2. Three fair coin flips produce exactly one head

In [3]:
# simulate 1 million tests of three fair coin flips
tests = np.random.randint(2, size=(int(1e6), 3))

# sums of all tests
test_sums = tests.sum(axis=1)

# proportion of tests that produced exactly one head
(test_sums == 2).mean()

0.374981

### 3. Three bias coin flips with P(H) = 0.6 produce exactly one head

In [4]:
# simulate 1 million tests of three bias coin flips
# hint: use np.random.choice()
tests = np.random.choice([0, 1], size=(int(1e6), 3), p=[0.6, 0.4])

# sums of all tests
test_sums = tests.sum(axis=1)

# proportion of tests that produced exactly one head
(test_sums == 2).mean()

0.288433

### 4. A die rolls an even number

In [5]:
# simulate 1 million tests of one die roll
tests = np.random.choice(np.arange(1, 7), size=int(1e6))

# proportion of tests that produced an even number
(tests % 2 == 0).mean()

0.500004

### 5. Two dice roll a double

In [6]:
# simulate the first million die rolls
first = np.random.choice(np.arange(6), size=int(1e6))

# simulate the second million die rolls
second = np.random.choice(np.arange(6), size=int(1e6))

# proportion of tests where the 1st and 2nd die rolled the same number
(first == second).mean()

0.166611

# Binomial Distributions
Use NumPy to create simulations and compute proportions for the following outcomes. The first one is done for you.

In [7]:
# import numpy
import numpy as np

### 1. A fair coin flip produces heads

In [8]:
# simulate 1 million tests of one fair coin flip
# remember, the output of these tests are the # successes, or # heads
tests = np.random.binomial(1, 0.5, int(1e6))

# proportion of tests that produced heads
(tests == 1).mean()

0.499624

### 2. Five fair coin flips produce exactly one head

In [9]:
# simulate 1 million tests of five fair coin flips
tests = np.random.binomial(5, 0.5, int(1e6))

# proportion of tests that produced 1 head
(tests == 1).mean()

0.156045

### 3. Ten fair coin flips produce exactly four heads

In [10]:
# simulate 1 million tests of ten fair coin flips
tests = np.random.binomial(10, 0.5, int(1e6))

# proportion of tests that produced 4 heads
(tests == 4).mean()

0.205062

### 4. Five biased coin flips with P(H) = 0.8 produce exactly five heads

In [11]:
# simulate 1 million tests of five biased coin flips
tests = np.random.binomial(5, 0.8, int(1e6))

# proportion of tests that produced 5 heads
(tests == 5).mean()

0.327933

### 5. Ten biased coin flips with P(H) = 0.15 produce at least 3 heads

In [12]:
# simulate 1 million tests of ten biased coin flips
tests = np.random.binomial(10, 0.15, int(1e6))

# proportion of tests that produced at least 3 heads
(tests >= 3).mean()

0.179698

# Cancer Test Results

In [13]:
import pandas as pd

df = pd.read_csv('cancer_test_data.csv')
df.head()

Unnamed: 0,patient_id,test_result,has_cancer
0,79452,Negative,False
1,81667,Positive,True
2,76297,Negative,False
3,36593,Negative,False
4,53717,Negative,False


In [14]:
df.shape

(2914, 3)

In [15]:
# number of patients with cancer
df.has_cancer.sum()

306

In [16]:
# number of patients without cancer
(df.has_cancer == False).sum()

2608

In [17]:
# proportion of patients with cancer
df.has_cancer.mean()

0.10501029512697323

In [18]:
# proportion of patients without cancer
1 - df.has_cancer.mean()

0.8949897048730268

In [19]:
# proportion of patients with cancer who test positive
(df.query('has_cancer')['test_result'] == 'Positive').mean()

0.9052287581699346

In [20]:
# proportion of patients with cancer who test negative
(df.query('has_cancer')['test_result'] == 'Negative').mean()

0.09477124183006536

In [21]:
# proportion of patients without cancer who test positive
(df.query('has_cancer == False')['test_result'] == 'Positive').mean()

0.2036042944785276

In [22]:
# proportion of patients without cancer who test negative
(df.query('has_cancer == False')['test_result'] == 'Negative').mean()

0.7963957055214724

# Conditional Probability & Bayes Rule Quiz

In [23]:
# load dataset
import pandas as pd
df=pd.read_csv('cancer_test_data.csv')
df.head()

Unnamed: 0,patient_id,test_result,has_cancer
0,79452,Negative,False
1,81667,Positive,True
2,76297,Negative,False
3,36593,Negative,False
4,53717,Negative,False
