In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

# Lecture 15

### The Monty Hall Problem ###

In [None]:
goats = make_array('first goat', 'second goat')
doors = np.append(goats, 'car')
doors

In [None]:
def other_goat(x):
    if x == 'first goat':
        return 'second goat'
    elif x == 'second goat':
        return 'first goat'

In [None]:
[other_goat('first goat'), other_goat('second goat')]

In [None]:
def monty_hall():
    """Return 
    [contestant's guess, what Monty reveals, what remains behind the other door]"""
    
    contestant_choice = np.random.choice(doors)
    
    if contestant_choice == 'first goat':
        monty_choice = 'second goat'
        remaining_choice = 'car'
    
    if contestant_choice == 'second goat':
        monty_choice = 'first goat'
        remaining_choice = 'car'
    
    if contestant_choice == 'car':
        monty_choice = np.random.choice(goats)
        remaining_choice = other_goat(monty_choice)
        
    return [contestant_choice, monty_choice, remaining_choice]

In [None]:
monty_hall()

In [None]:
results = Table(['Guess', 'Revealed', 'Remaining'])

for i in np.arange(10000):
    results.append(monty_hall())


In [None]:
results.show(3)

In [None]:
results.group('Remaining').barh('Remaining')

In [None]:
results.group('Guess').barh('Guess')

## Random Sampling ##

We load in a dataset of all United flights national flights from 6/1/15 to 8/9/15, their destination and how long they were delayed, in minutes.

In [None]:
united = Table.read_table('united.csv')
united = united.with_column('Row', np.arange(united.num_rows)).move_to_start('Row')
united

Some deterministic samples:

In [None]:
united.where('Destination', 'JFK') 

In [None]:
united.take(np.arange(0, united.num_rows, 1000))

In [None]:
united.take(make_array(34, 6321, 10040))

A random sample:

In [None]:
start = np.random.choice(np.arange(1000))
systematic_sample = united.take(np.arange(start, united.num_rows, 1000))
systematic_sample.show()

## Distributions ##

In [None]:
die = Table().with_column('Face', np.arange(1, 7))
die

In [None]:
die.sample(10)

In [None]:
die.hist()

In [None]:
roll_bins = np.arange(0.5, 6.6, 1)

In [None]:
die.hist(bins=roll_bins)

In [None]:
die.sample(10).hist(bins=roll_bins)

In [None]:
die.sample(1000).hist(bins=roll_bins)

In [None]:
die.sample(100000).hist(bins=roll_bins)

## Large Random Samples ##

In [None]:
united 

In [None]:
united_bins = np.arange(-20, 201, 5)
united.hist('Delay', bins = united_bins)

In [None]:
min(united.column('Delay'))

In [None]:
max(united.column('Delay'))

In [None]:
np.average(united.column('Delay'))

In [None]:
united.sample(10).hist('Delay', bins = united_bins)

In [None]:
united.sample(1000).hist('Delay', bins = united_bins)

## Simulating Statistics ##

In [None]:
np.median(united.column('Delay'))

In [None]:
np.median(united.sample(10).column('Delay'))

In [None]:
def sample_median(size):
    return np.median(united.sample(size).column('Delay'))

In [None]:
sample_median(10)

In [None]:
sample_medians = make_array()

for i in np.arange(1000):
    new_median = sample_median(10)
    sample_medians = np.append(sample_medians, new_median)

In [None]:
Table().with_column('Sample medians', sample_medians).hist(bins = np.arange(-10,31))

In [None]:
sample_medians = make_array()

for i in np.arange(1000):
    new_median = sample_median(1000)
    sample_medians = np.append(sample_medians, new_median)

In [None]:
Table().with_column(
    'Sample medians', sample_medians).hist(bins = np.arange(-10,31))