In [None]:
from datascience import *
import numpy as np
import math

import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
%matplotlib inline

## The Toast Myth

The Mythbusters TV show did an experiment with 48 pieces of toast, where 29 landed butter side up and 19 butter side down.  Let's see if we can figure out how likely this outcome would be, if toast was equally likely to land on either side.  In particular, we'll play a "what-if" game: what if toast was equally likely to land on both sides?  Let's simulate what would happen, under that assumption.

In [None]:
# First, list two possible results
sides = make_array('Butter Side Up', 'Butter Side Down')

In [None]:
# Make that into a table
possible_outcomes = Table().with_column('Outcome', sides)

In [None]:
possible_outcomes

In [None]:
# Ask for 48 cases where the output is sampled (chosen) from those two possibilities
simulated_experiment = possible_outcomes.sample(48)

In [None]:
simulated_experiment

In [None]:
# Group them, which also counts them.
simulated_experiment.group('Outcome')

In [None]:
# To make this a bit more automatic, define a function that provides the butter-side-up count
def count_up(sample):
    """Count the Butter Up entries in the Outcome column"""
    counts = sample.group('Outcome').where('Outcome', 'Butter Side Up')
    number_up = counts.column('count').item(0)
    return number_up

In [None]:
# Always test things!
count_up(simulated_experiment)

## Simulation

Above we saw how to simulate an episode of the TV show (i.e., one experiment), under the "what-if" assumption that toast is equally likely to land on both sides.  Now we're going to repeat the simulation 10000 times, and keep track of the statistic (the number of times the toast landed butter-side-up) we get from each simulated TV episode.

In [None]:
counts = make_array()
for i in np.arange(10000): # 10000 repetitions
    one_simulated_episode = possible_outcomes.sample(48)
    number_up = count_up(one_simulated_episode)
    counts = np.append(counts, number_up)
results = Table().with_column('Number that landed butter-side-up', counts)

In [None]:
results

In [None]:
results.hist(bins=np.arange(12,36,1))  # an alternate form of plotting
# note that this method of plotting gives plots/unit and allows close control over binning

In [None]:
# With this data, what's the chance of the value they saw or higher?
# This is known as the p-value
results.where(results['Number that landed butter-side-up'] >= 29).num_rows / 10000

In [None]:
# Quick, without looking at the number from here, 
# what do you expect the mean and std dev of that distribution to be?
results[0].mean(), results[0].std()

In [None]:
# Many expect it to be sqt(24), because of Gaussian or Poisson distributions.
# But this is actually binomial distribution, where the std dev is smaller because you pick one of two
math.sqrt(24), math.sqrt(24)/math.sqrt(2)

In [None]:
# try simulating the British school study:
# 9821 waist-high drops with 6101 butter down landings
# With just a B written on the toast: 9748 drops with 5663 B-down
# from 2.5m: 2038 with 953 B-side down (sign reversed!)

# is there something going on?



In [None]:
# repeat the experiment for the 9821 case, making a reasonable number of tries to check p> 0.01
counts = make_array()
for i in np.arange(300): # 300 repetitions
    one_simulated_episode = possible_outcomes.sample(9821)
    number_up = count_up(one_simulated_episode)
    counts = np.append(counts, number_up)
results = Table().with_column('Number that landed butter-side-up', counts)
results.hist()

In [None]:
# 9821-6101 = 3820 butter up is far off the plot - very unlikely under out 50/50 null!

# repeat for the butter-less B test
counts = make_array()
for i in np.arange(300): # 300 repetitions
    one_simulated_episode = possible_outcomes.sample(9748)
    number_up = count_up(one_simulated_episode)
    counts = np.append(counts, number_up)
results = Table().with_column('Number that landed butter-side-up', counts)
results.hist()

In [None]:
# 9748 - 5663 = 4085 is again very unlikely: They're landing face down even without butter, so it's not the butter!

# repeat for higher test
counts = make_array()
for i in np.arange(2000): # 2000 repetitions
    one_simulated_episode = possible_outcomes.sample(2038)
    number_up = count_up(one_simulated_episode)
    counts = np.append(counts, number_up)
results = Table().with_column('Number that landed butter-side-up', counts)
results.hist()
results.where(results['Number that landed butter-side-up'] >= 4085).num_rows / 1000

In [None]:
# now 2038 - 953 = 1085 butter up is closer to the peak, but still inconsistent with 50/50;
# something important has changed with height.  What is it? Time to rotate while it falls?  
# Can this experiment test that?