In [None]:
#!pip install datascience
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Random choice

In [None]:
coin = make_array('Heads', 'Tails')
coin

In [None]:
np.random.choice(coin)

In [None]:
np.random.choice(coin, 10)

In [None]:
two_groups = make_array('treatment', 'control')
outcomes = np.random.choice(two_groups, 10)
outcomes

In [None]:
outcomes == 'control'

In [None]:
#sum(outcomes)

In [None]:
sum(outcomes == 'control')

In [None]:
sum(outcomes == 'treatment')

## Conditional Statements

In [None]:
x = 0
if x > 0:
    print('positive')
elif x < 0:
    print('negative')
else:
    print('zero')

In [None]:
def sign(x):
    if x > 0:
        return 'positive'
    elif x < 0:
        return 'negative'
    else:
        return 'zero'

In [None]:
sign(-3)

In [None]:
numbers=make_array(-1,1,0)
numbers

In [None]:
random_numbers=Table().with_column("RN",np.random.choice(numbers, 30))
random_numbers.with_column("Signs",random_numbers.apply(sign,"RN"))

## For Statements

In [None]:
np.arange(4)

In [None]:
for i in np.arange(4):
    print('iteration', i)

In [None]:
for things in ['a',"b",'c', 1, [1,2,3]]:
    print('This time through the for loop * things * is ', things, '.')

In [None]:
coin

In [None]:
sum(np.random.choice(coin, 100) == 'Heads')

That was one experiment (flipping 100 times).  What if we wanted to repeat 10,000 experiments, and produce a table of results?

In [None]:
num_heads = make_array(sum(np.random.choice(coin, 100) == 'Heads'))
num_heads

In [None]:
num_heads = np.append(num_heads, sum(np.random.choice(coin, 100) == 'Heads'))
num_heads

In [None]:
num_heads = make_array()
for i in np.arange(10000):
    num_heads = np.append(num_heads, sum(np.random.choice(coin, 100) == 'Heads'))
num_heads

In [None]:
len(num_heads)

In [None]:
t = Table().with_column('Heads in 100 coin flips', num_heads)
t.hist(bins=np.arange(30, 70, 1))

In [None]:
most = t.group('Heads in 100 coin flips')\
        .where('Heads in 100 coin flips', are.between(40, 60))
most.show()

In [None]:
sum(most.column('count')) / t.num_rows * 100

In [None]:
#noaccident that it's 95%

## Monty Hall

In [None]:
doors = make_array(1, 2, 3)
doors

In [None]:
prizes = make_array('car', 'first goat', 'second goat')
prizes

In [None]:
np.random.choice(prizes, 3, replace=False)

In [None]:
def setup():
    """Return a random game of Let's Make a Deal."""
    return Table().with_columns(
        'Door', doors,
        'Prize', np.random.choice(prizes, 3, replace=False)
    )

a_game = setup()
a_game

In [None]:
def behind(game, door):
    """Return the prize behind a door."""
    return game.where('Door', door).column('Prize').item(0)

for door in doors:
    print('Behind door number', door, 'is the', behind(a_game, door))

In [None]:
def location(game, prize):
    """Return the door that hides a prize."""
    return game.where('Prize', prize).column('Door').item(0)

for prize in prizes:
    print('The', prize, 'is behind door number', location(a_game, prize))

In [None]:
a_guess = np.random.choice(doors)
a_guess

In [None]:
def expose(game, guess):
    """Return the door that Monty Hall opens to expose a goat."""
    guessed_prize = behind(game, guess)
    if guessed_prize == 'first goat':
        reveal = 'second goat'
    elif guessed_prize == 'second goat':
        reveal = 'first goat'
    else:
        reveal = np.random.choice(['first goat', 'second goat'])
    return location(game, reveal)

an_exposed_door = expose(a_game, a_guess)
print('The contestant picked door', a_guess, 'which hides the', behind(a_game, a_guess))
print('Monty Hall opens door', an_exposed_door, 'to expose the', behind(a_game, an_exposed_door))

In [None]:
def other(game, guess, exposed):
    """Return the door that is not the guess or the exposed."""
    return (1 + 2 + 3) - guess - exposed

the_remaining_door = other(a_game, a_guess, an_exposed_door)
print('The remaining door', the_remaining_door, 'hides the', behind(a_game, the_remaining_door))

In [None]:
# A different way to create a table: provide the labels as a list
trials = Table(['trial', 'guess', 'exposed', 'remaining'])
for i in np.arange(1, 10000+1):
    game = setup()
    guess = np.random.choice(doors)
    exposed = expose(game, guess)
    remaining = other(game, guess, exposed)
    # Append one row to the table
    trials.append([i, behind(game, guess), behind(game, exposed), behind(game, remaining)])
    
trials

In [None]:
trials.group('guess')

In [None]:
trials.group('exposed')

In [None]:
trials.group('remaining')

In [None]:
compare = trials.group('guess')\
                .relabeled('count', 'Guess Count')\
                .join('guess', trials.group('remaining'), 'remaining')\
                .relabeled('count', 'Remaining Count')\
                .relabeled('guess', 'Prize')
compare

In [None]:
compare.barh('Prize')

## Probability

#### 3 Cards: Red, Green, Blue

What's the chance of GR (first G, second R) when sampling two cards without replacement?



A. 1/6

B. 1/3

C. 1/2

D. Something else


<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>




 

Outcomes are: RB RG BR BG GR GB
Probability is 1/6

What's the probability of drawing the red card either first or second when drawing without replacement?

#### Multiplication rule

P(GR) = P(drew G first)  ×  P(drew R second given that drew G first) 

      = 1/3  ×  1/2

#### Addition rule

What's the chance of one R and one G in any order when sampling two cards without replacement?













P(one R and one G in any order) = P(GR) + P(RG) = 1/6 + 1/6

#### Example: Flipping coin ten times 

P(at least one H) = 1 - P(no H) = 1 - P(TTTTTTTTTT) = 1 - (1/2)**10

In [None]:
1-(1/2)**10

Roll a six-sided die 20 times. What's P(at least one 6)?

<br/>
<br/>
<br/>
<br/>
<br/>
<br/>

### Calculation

In [None]:
1 - (5/6) ** 20

In [None]:
rolls = np.arange(1, 51, 1)
at_least_one = Table().with_columns(
    'Rolls', rolls,
    'Chance of at least one 6', 1 - (5/6)**rolls
)
at_least_one.set_format(1, PercentFormatter(3))

In [None]:
at_least_one.scatter('Rolls')

In [None]:
at_least_one.where('Rolls', 20)