In [None]:
from datascience import *
import numpy as np

import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
%matplotlib inline

## Sampling

In [None]:
top = Table.read_table('top_movies.csv')
top = top.with_column('Row Index', np.arange(top.num_rows)).move_to_start('Row Index')
top

In [None]:
top.take([3, 5, 7])

In [None]:
top.where('Title', are.containing('and the'))

In [None]:
start = np.random.choice(np.arange(10))
top.take(np.arange(start, start+5))

In [None]:
top.sample(5)

In [None]:
top.sample(5, with_replacement=False)

## Dice

In [None]:
die = Table().with_column('face', np.arange(6)+1)
die

In [None]:
def face_hist(t):
    t.hist('face', bins=np.arange(0.5, 7, 1), unit='face')
    plots.xlabel('Face')
    
face_hist(die)

In [None]:
face_hist(die.sample(10))

## Flights

In [None]:
united = Table.read_table('united.csv')
united

In [None]:
united.hist('Delay')

In [None]:
def delay_hist(t):
    t.hist('Delay', unit='minute', bins=np.arange(-20, 301, 10))
    
delay_hist(united)

In [None]:
united.where('Delay', are.between(10, 20)).num_rows / united.num_rows

In [None]:
s = united.sample(1000)
delay_hist(s)

In [None]:
s.where('Delay', are.between(10, 20)).num_rows / s.num_rows

## Calculation vs Simulation

Roll a die 4 times. What's P(at least one 6)?

In [None]:
1 - (5/6) ** 4

In [None]:
rolls = np.arange(1, 51, 1)
at_least_one = Table().with_columns(
    'Rolls', rolls,
    'Chance of at least one 6', 1 - (5/6)**rolls
)
at_least_one.set_format(1, PercentFormatter(3))

In [None]:
at_least_one.scatter('Rolls')

In [None]:
at_least_one.where('Rolls', 20)

In [None]:
rolls = np.random.choice(np.arange(6)+1, 20)
rolls

In [None]:
np.count_nonzero(rolls==6)

In [None]:
trials = 1000
successes = 0

for i in np.arange(trials):
    rolls = np.random.choice(np.arange(6)+1, 20)
    if np.count_nonzero(rolls==6) > 0:
        successes = successes + 1

successes/trials

## Estimation

In [None]:
def roll_20():
    trials = 1000
    successes = 0

    for i in np.arange(trials):
        rolls = np.random.choice(np.arange(6)+1, 20)
        if np.count_nonzero(rolls==6) > 0:
            successes = successes + 1

    return successes/trials

estimates = Table(['estimate'])
for k in np.arange(1000):
    estimates.append([roll_20()])

In [None]:
estimates.hist(normed=False)

In [None]:
np.average(estimates.column(0))