In [None]:
import matplotlib
from datascience import *
%matplotlib inline
import matplotlib.pyplot as plots
import numpy as np
plots.style.use('fivethirtyeight')

## Distribution of Sample Means ##

In [None]:
united = Table.read_table('united.csv')

In [None]:
united.hist('Delay', bins = np.arange(-20, 300, 10))

In [None]:
delays = united.column('Delay')
population_mean = np.mean(delays)
population_sd = np.std(delays)

population_mean, population_sd

In [None]:
def one_sample_mean(sample_size):
    sampled_flights = united.sample(sample_size)
    return np.mean(sampled_flights.column('Delay'))

In [None]:
def ten_thousand_sample_means(sample_size):
    means = make_array()
    for i in np.arange(10000):
        means = np.append(means, one_sample_mean(sample_size))
    return means

In [None]:
"""Empirical distribution of random sample means"""

def plot_sample_means(sample_size):
    means = ten_thousand_sample_means(sample_size)
    sample_means = Table().with_column('Sample Means', means)
    
    # Display empirical histogram and print all relevant quantities
    sample_means.hist(bins = np.arange(0, 41, 0.5))
    plots.xlabel('Sample Means')
    plots.title('Sample Size ' + str(sample_size))
    print("Sample size: ", sample_size)
    print("Population mean:", np.mean(united.column('Delay')))
    print("Average of sample means: ", np.mean(means))
    print("Population SD:", np.std(united.column('Delay')))
    print("SD of sample means:", np.std(means))

In [None]:
plot_sample_means(25)

In [None]:
plot_sample_means(100)

In [None]:
plot_sample_means(400)

In [None]:
plot_sample_means(900)

## SD of the sample mean

In [None]:
# Warning: this cell will take a long time to run (a few minutes)!
# Below is an image of the output for this section
# It may be best to run after class

sample_sizes = np.arange(100, 950, 50)

sample_mean_sds = make_array()
for n in sample_sizes:
    sample_means = ten_thousand_sample_means(n)
    sample_mean_sds = np.append(sample_mean_sds, np.std(sample_means))

In [None]:
sd_table = Table().with_columns(
    'Sample size', sample_sizes,
    'SD of simulated sample means', sample_mean_sds,
    'Pop SD / sqrt(sample size)', population_sd / np.sqrt(sample_sizes),
)
sd_table

In [None]:
sd_table.scatter('Sample size')

<img src="SD_of_sample_mean_output.png"/> 

## SD of 0/1 Population  - Population Size ##

In [None]:
# Population of size 10

number_of_ones = 4
zero_one_population_10 = np.append(np.ones(number_of_ones), np.zeros(10 - number_of_ones))
zero_one_population_10

In [None]:
np.std(zero_one_population_10)

In [None]:
# Population of size 100

number_of_ones = 40
zero_one_population_100 = np.append(np.ones(number_of_ones), np.zeros(100 - number_of_ones))
zero_one_population_100

np.std(zero_one_population_100)

In [None]:
# Population of size 1000

number_of_ones = 400
zero_one_population_1000 = np.append(np.ones(number_of_ones), np.zeros(1000 - number_of_ones))
zero_one_population_1000

np.std(zero_one_population_1000)

## SD of 0/1 Population  - Population Proportion ##

In [None]:
# Let's make a graph with proportion of ones on the x axis and SD on the y axis

In [None]:
def sd_of_zero_one_population_100(number_of_ones):
    """SD of a population with num_ones ones and (100 - num_ones) zeros"""
    zero_one_population_100 = np.append(np.ones(number_of_ones), np.zeros(100 - number_of_ones))
    return np.std(zero_one_population_100)

In [None]:
poss_ones = np.arange(101)
zero_one_pop = Table().with_columns(
    'Number of ones', poss_ones,
    'Proportion of ones', poss_ones / 100
)

zero_one_pop 

In [None]:
sds = zero_one_pop.apply(sd_of_zero_one_population_100, 'Number of ones')
zero_one_pop = zero_one_pop.with_column('SD', sds)

zero_one_pop

In [None]:
zero_one_pop.scatter('Proportion of ones', 'SD')

## Election Polls ##

In [None]:
poss_num_of_ones = np.arange(300, 308)

Table().with_columns(
    'Possible Cal Votes', poss_num_of_ones,
    'Percentage of Cal Votes', 100 * (poss_num_of_ones / 660)
)

In [None]:
(0.5 / 0.0075) ** 2

In [None]:
np.ceil((0.5 / 0.0075) ** 2)