In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Population Distribution and Parameter

In [None]:
music = Table.read_table('music.csv')
music_bins = np.arange(0, 800, 25)

In [None]:
# (Population) Probability Distribution
music.hist('duration', bins=music_bins)

In [None]:
# (Population) Parameter
np.median(music.column('duration'))

## Statistics

In [None]:
# (Sample) Statistic
np.median(music.sample(10).column('duration'))

In [None]:
# (Sample) Statistic
np.median(music.sample(100).column('duration'))

### Empirical Distributions of a Statistic

In [None]:
def sample_median(size):
    return np.median(music.sample(size).column('duration'))

In [None]:
sample_median(10)

In [None]:
num_simulations = 5000

In [None]:
sample_medians_array = make_array()

for i in np.arange(num_simulations):
    new_median = sample_median(10)
    sample_medians_array = np.append(sample_medians_array, new_median)

In [None]:
Table().with_column(
    'Sample medians', sample_medians_array
).hist(bins = np.arange(120,350, 10))

#### Empirical Distributions Overlayed

In [None]:
sample_medians_array_10 = make_array()
sample_medians_array_100 = make_array()
sample_medians_array_1000 = make_array()

num_simulations = 5000

for i in np.arange(num_simulations):
    sample_medians_array_10 = np.append(
        sample_medians_array_10, 
        sample_median(10)
    )
    
    sample_medians_array_100 = np.append(
        sample_medians_array_100, 
        sample_median(100)
    )
    
    sample_medians_array_1000 = np.append(
        sample_medians_array_1000, 
        sample_median(1000)
    )

In [None]:
sample_medians = Table().with_columns('Size 10', sample_medians_array_10, 
                                      'Size 100', sample_medians_array_100,
                                      'Size 1000', sample_medians_array_1000)

In [None]:
sample_medians.hist(bins = np.arange(170,300, 1))

## Mendel and Pea Flowers ##

In [None]:
## Mendel had 929 plants, of which 709 had purple flowers
observed_purples = 709 / 929
observed_purples

In [None]:
abs(observed_purples * 100 - 75)

In [None]:
predicted_proportions = make_array(.75, .25)
sample_proportions(929, predicted_proportions)

In [None]:
def purple_flowers():
    return sample_proportions(929, predicted_proportions).item(0) * 100

In [None]:
purple_flowers()

In [None]:
purples = make_array()

for i in np.arange(10000):
    new_purple = purple_flowers()
    purples = np.append(purples, new_purple)

In [None]:
Table().with_column('Percent of purple flowers in sample of 929', purples).hist()
plots.plot([observed_purples*100, observed_purples*100], [0, .3], color='red', lw=2);

In [None]:
Table().with_column('Discrepancy in sample of 929 if the model is true', abs(purples- 75)).hist()
plots.plot([abs(observed_purples * 100 - 75), abs(observed_purples * 100 - 75)], [0, .6], color='red', lw=2);

In [None]:
Discrepancy = Table().with_column('Discrepancy in sample of 929 if the model is true', abs(purples- 75))
Discrepancy.where(
    'Discrepancy in sample of 929 if the model is true', 
    are.above_or_equal_to(abs(observed_purples * 100 - 75))
).num_rows / 10000

## Swain vs. Alabama ##

In [None]:
population_proportions = make_array(.26, .74)
population_proportions

In [None]:
sample_proportions(100, population_proportions)

In [None]:
def panel_proportion():
    return sample_proportions(100, population_proportions).item(0)

In [None]:
panel_proportion()

In [None]:
panels = make_array()

num_simulations = 10000

for i in np.arange(num_simulations):
    new_panel = panel_proportion() * 100
    panels = np.append(panels, new_panel)

In [None]:
Table().with_column('Number of Black Men on Panel of 100', panels).hist(bins=np.arange(5.5,40.))
plots.plot([8, 8], [0, .1], color='red', lw=2);

In [None]:
Black_men_on_panel = Table().with_column('Number of Black Men on Panel of 100', panels)
Black_men_on_panel.where('Number of Black Men on Panel of 100', are.below(9)).num_rows / num_simulations

In [None]:
min(panels)