In [None]:
from datascience import *
import numpy as np

import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
%matplotlib inline
np.set_printoptions(legacy='1.13')

## Survey analysis ##

We asked: "Rate your agreement with the following statement: The climate at UC Berkeley prevents some people from saying things they believe because others might find them offensive."  97 said "strongly agree" and 152 said "somewhat agree", out of 290 responses.  In comparison, a [2016 Gallup survey](www.knightfoundation.org/media/uploads/publication_pdfs/FreeSpeech_campus.pdf) found that, among the college population at large in the US, 54% said "strongly agree" or "somewhat agree".  Let's compare our respondents to the national population.

In [None]:
predicted_yes = 0.54

In [None]:
def test_statistic(observed_yes):
    return abs(observed_yes - predicted_yes)

In [None]:
observed_stat = test_statistic((97 + 152) / 290)
observed_stat

In [None]:
simulated_yes = sample_proportions(290, make_array(0.54, 0.46)).item(0)
test_statistic(simulated_yes)

In [None]:
simulated_stats = make_array()

for i in np.arange(10000):
    simulated_yes = sample_proportions(290, make_array(0.54, 0.46)).item(0)
    new_stat = test_statistic(simulated_yes)
    simulated_stats = np.append(simulated_stats, new_stat)

In [None]:
Table().with_column('Null distribution', simulated_stats).hist()
_ = plots.plot([observed_stat, observed_stat], [0, 1.4])

In [None]:
empirical_pval = np.count_nonzero(simulated_stats >= observed_stat) / 10000
empirical_pval

## Deflategate

In [None]:
football = Table.read_table('http://inferentialthinking.com/notebooks/football.csv')
football = football.drop('Team')
football.show()

In [None]:
initials = np.append(np.ones(11) * 12.5, np.ones(4) * 13)
halftime_wts = (football.column('Blakeman')+football.column('Prioleau'))/2
football = football.with_column(
    'Team', np.char.strip(football.column('Ball'), [' 1234567890']),
    'Weight at Halftime', halftime_wts,
    'Estimate at Start', initials,
    'Drop', initials - halftime_wts
)
football.show()

In [None]:
def difference_in_average_drop(t):
    averages = t.select('Team', 'Drop').group('Team', np.average).column(1)
    return averages.item(1) - averages.item(0)

observed = difference_in_average_drop(football)
observed

In [None]:
group_labels = football.select('Team')
drops = football.select('Drop')

In [None]:
shuffled_drops = drops.sample(with_replacement=False).column(0)
shuffled_tbl = group_labels.with_column('Drop', shuffled_drops)
difference_in_average_drop(shuffled_tbl)

In [None]:
sampled_stats = make_array()

for i in np.arange(10000):
    shuffled_drops = drops.sample(with_replacement=False).column(0)
    shuffled_tbl = group_labels.with_column('Drop', shuffled_drops)
    new_diff = difference_in_average_drop(shuffled_tbl)
    sampled_stats = np.append(sampled_stats, new_diff)

In [None]:
Table().with_column('Null distribution', sampled_stats).hist()
_ = plots.plot([observed, observed], [0, 1.4])

In [None]:
np.count_nonzero(sampled_stats >= observed)/len(sampled_stats)

## The Toast Myth

We saw the Mythbusters crew do an experiment with 48 pieces of toast, where 29 landed butter side up and 19 butter side down.  Let's see if we can figure out how likely this outcome would be, if toast was equally likely to land on either side.  In particular, we'll play a "what-if" game: what if toast was equally likely to land on both sides?  Let's simulate what would happen, under that assumption.

In [None]:
sides = make_array('Butter Side Up', 'Butter Side Down')

In [None]:
possible_outcomes = Table().with_column('Outcome', sides)

In [None]:
possible_outcomes

In [None]:
simulated_experiment = possible_outcomes.sample(48)

In [None]:
simulated_experiment

In [None]:
simulated_experiment.group('Outcome')

In [None]:
def count_up(sample):
    counts = sample.group('Outcome').where('Outcome', 'Butter Side Up')
    number_up = counts.column('count').item(0)
    return number_up

In [None]:
count_up(simulated_experiment)

## Simulation

Above we saw how to simulate an episode of the TV show (i.e., one experiment), under the "what-if" assumption that toast is equally likely to land on both sides.  Now we're going to repeat the simulation 10000 times, and keep track of the statistic (the number of times the toast landed butter-side-up) we get from each simulated TV episode.

In [None]:
counts = make_array()
for i in np.arange(10000): # 10000 repetitions
    one_simulated_episode = possible_outcomes.sample(48)
    number_up = count_up(one_simulated_episode)
    counts = np.append(counts, number_up)
results = Table().with_column('Number that landed butter-side-up', counts)

In [None]:
results

In [None]:
results.hist(bins=np.arange(12,36,1))

In [None]:
results.where('Number that landed butter-side-up',
              are.above_or_equal_to(29)).num_rows / 10000