In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Review 

- Located at https://pollev.com/jeremysanchez

## New material

### Jury selection in Alameda County

This table was created with 1423 jurors.

In [None]:
jury = Table().with_columns(
    'Ethnicity', make_array('Asian', 'Black', 'Latino', 'White', 'Other'),
    'Eligible', make_array(0.15, 0.18, 0.12, 0.54, 0.01),
    'Panels', make_array(0.26, 0.08, 0.08, 0.54, 0.04)
)

jury

In [None]:
jury.barh('Ethnicity')

Below is the true distribution of people from which the 1423 jurors were randomly sampled.

In [None]:
model = make_array(0.15, 0.18, 0.12, 0.54, 0.01)

In [None]:
simulated = sample_proportions(1423, model)
simulated

In [None]:
jury_with_simulated = jury.with_column('Simulated', simulated)
jury_with_simulated

In [None]:
jury_with_simulated.barh('Ethnicity')

### We need a new statistic!

In [None]:
diffs = jury.column('Panels') - jury.column('Eligible')
jury_with_difference = jury.with_column('Difference', diffs)
jury_with_difference

In [None]:
sum(jury_with_difference.where('Difference', are.above(0)).column('Difference'))

In [None]:
sum(abs(jury_with_difference.column('Difference')))/2

### The Total Variation Distance (**TVD**)

In [None]:
def tvd(dist1, dist2):
    return sum(abs(dist1 - dist2))/2

In [None]:
obsvd_tvd = tvd(jury.column('Panels'), jury.column('Eligible'))
obsvd_tvd

In [None]:
simulated_tvd = tvd(sample_proportions(1423, model), jury.column('Eligible'))
simulated_tvd

In [None]:
def simulated_tvd():
    return tvd(sample_proportions(1423, model), model)

tvds = make_array()

num_simulations = 10000
for i in np.arange(num_simulations):
    new_tvd = simulated_tvd()
    tvds = np.append(tvds, new_tvd)

In [None]:
title = 'Simulated TVDs (if model is true)'
bins = np.arange(0, .05, .005)

Table().with_column(title, tvds).hist(bins = bins)
print('The observed TVD of ' + str(obsvd_tvd) + ' is nowhere close to the simulated TVDs!')

# Plotting details; ignore this code
plots.ylim(-2, 55)
plots.scatter(obsvd_tvd, 0, color='red', s=30);