# Random samples

In [None]:
# numpy is a standard Python library for numerical computing
import numpy as np

In [None]:
coin = ['heads','tails']

In [None]:
np.random.choice(coin)

In [None]:
def flipcoin():
    return np.random.choice(coin)

In [None]:
flipcoin()

If we want to flip a coin n times:

In [None]:
def flips(n):
    flips = []
    for i in range(n):
        flips.append(flipcoin())
    return flips

In [None]:
print(flips(20))

Computers are convenient because we can get them to do our boring repetitive work thousands and thousands (and millions!) of times

We can also make a function to count the number of times 'heads' came up in our trial:

In [None]:
def countheads(a):
    headcount = 0
    for i in a:
        if i == 'heads':
            headcount += 1
    return headcount

In [None]:
countheads(flips(500))

This enables us to generalize to the distribution of heads in a given number of samples, with each sample having a given number of coin flips.

In [None]:
def coinsamples(numsamples, numflips):
    samples = []
    for i in range(numsamples):
        samples.append(countheads(flips(numflips)))
    return samples

In [None]:
coinsamples(10, 20)

In [None]:
import matplotlib.pyplot as plt

In [None]:
numflips = 300
numsamples = 100
plt.hist(coinsamples(numsamples, numflips))
plt.xlim(0,numflips)

Looks like a Gaussian distribution function.

## Gaussians

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

In [None]:
xnums = np.linspace(-3, 3, 10000)

In [None]:
xnums

In [None]:
ynums = stats.norm.pdf(xnums, 0, 1)

In [None]:
ynums

In [None]:
plt.plot(xnums, ynums)

In [None]:
def makedist(mymean=0,mystddev=1):
    xnums = np.linspace(-3, 3, 1000)
    ynums = stats.norm.pdf(xnums, mymean, mystddev)
    plt.plot(xnums, ynums)
    plt.ylim(0, 0.5)
    plt.show()

In [None]:
makedist(0.5)

### Random fun: we can make this a bit more interactive to get a better conceptual understanding

In [None]:
import ipywidgets

In [None]:
ipywidgets.interact(makedist,mymean=(-3,3,0.2),mystddev=(0.1,3,0.2));

In [None]:
np.random.seed(1)

In [None]:
xnums = np.random.randn(10000)

In [None]:
plt.hist(xnums);

In [None]:
def makenorm(n):
    xnums = np.random.randn(n)
    plt.hist(xnums, bins=100)
    plt.show()

In [None]:
ipywidgets.interact(makenorm,n=(0,10000,100))

In [None]:
np.random.choice([0,1])

In [None]:
def makenorm(n,flips):
    xnums = []
    for i in range(n):
        xnums.append(0)
        for j in range(flips):
            if np.random.choice([1,0],p=[0.2,0.8]):
                xnums[i] += 1
    plt.hist(xnums, bins=np.arange(-0.5,40.5,1), width=0.8)
    plt.xlim(-0.5,0.5+flips)
    plt.show()
makenorm(100,40)

In [None]:
def makenorm(n,flips,probheads):
    xnums = []
    for i in range(n):
        xnums.append(0)
        for j in range(flips):
            if np.random.choice([1,0],p=[probheads, 1-probheads]):
                xnums[i] += 1
    plt.hist(xnums, bins=np.arange(-0.5,flips+0.5,1))
    plt.xlim(-0.5,0.5+flips)
    plt.show()
makenorm(100,40,0.3)

In [None]:
ipywidgets.interact(makenorm,n=(1,1000,10),flips=(1,100,10),probheads=(0.0,1.0,0.1))

In [None]:
def makenorm(n,m,s):
    xnums = s*np.random.randn(n) + m
    print('mean = ',np.mean(xnums))
    print('stddev = ',np.std(xnums))
    plt.hist(xnums, bins=100)
    plt.ylim(0,200)
    plt.xlim(-10,10)
    plt.show()
    
ipywidgets.interact(makenorm,n=(0,10000,100),m=(-3,3),s=(1,5));

In [None]:
x = np.random.randint(1,7,50)

In [None]:
x

In [None]:
np.mean(x)

In [None]:
plt.hist(x);

In [None]:
def diceroll(n):
    x = np.random.randint(1,7,n)
    plt.hist(x)
    plt.show()
    
ipywidgets.interact(diceroll,n=(0,10000,100));

## Racial sampling

[The following is based on an example from Berkeley's Data8 JupyterBook, [linked here](https://inferentialthinking.com/chapters/11/1/Assessing_a_Model.html)]

Calculating random numbers from a distribution can be more than just a mathematical exercise. Statistics and coding can be used to expose racism and inequality.

### Amendment VI of the United States Constitution
“In all criminal prosecutions, the accused shall enjoy the right to a speedy and public trial, by an impartial jury of the State and district wherein the crime shall have been committed.”
 
### The Supreme Court case of Robert Swain
Robert Swain was a Black man convicted in Talladega County, Alabama, in 1962. His case was appealed all the way up to the U.S. Supreme Court based on the claim that Black people were systematically excluded from juries in Talladega County.  (This case also involved issues related to peremptory challenges -- I recommend reading the above link and associated references if you are interested).

### A few details
* In Talladega County, 26% of men were Black.
* Only 8 men among the 100-member jury panel in Robert Swain's case were Black.
* Robert Swain also pointed out that this county's jury panels over the past 10 years had only contained a small percent of Black panelists.
* The U.S. Supreme Court wrote that “the overall percentage disparity has been small.” and Robert Swain was later sentenced to life in prison.

### Our question
* Is it reasonable to expect that a jury panel in this County could have 8% Black membership?

### The model
* A hypothesis about the world
* The panel was selected at random
* The 8-member panel was just due to chance

We can assess this model with code
* Simulate data based on the model
* Show what the data would be like if panel members were selected at random
* Compare the simulated data with the real data
* If they're not consistent, reject the model

In [None]:
elems = ['b','w']
elems_perc = [0.26, 0.74]

In [None]:
np.random.choice(elems, p=elems_perc)

In [None]:
panelsize = 100
panel = []
for i in range(panelsize):
    race = np.random.choice(elems, p=elems_perc)
    panel.append(race)
print('Number of Black members on the panel: ', panel.count('b'))

In [None]:
print(panel)

In [None]:
numpanels = 10000
numblackmembers = []

for i in range(numpanels):
    panelsize = 100
    panel = []
    for i in range(panelsize):
        race = np.random.choice(elems, p=elems_perc)
        panel.append(race)
    numblackmembers.append(panel.count('b'))
    
plt.hist(numblackmembers, bins=np.arange(5.5, 46.5, 1), width=0.75)
plt.xlim(5.5,46.5)
plt.ylim(0,5)

In [None]:
plt.hist(numblackmembers, bins=np.arange(5.5, 46.5, 1), width=0.75)
plt.xlim(5.5,46.5)
#plt.ylim(0,5)