In [None]:
from datascience import *
import numpy as np

import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
%matplotlib inline

## Monty Hall

In [None]:
doors = make_array(1, 2, 3)
doors

In [None]:
prizes = make_array('car', 'first goat', 'second goat')
prizes

In [None]:
np.random.choice(prizes, 3, replace=False)

In [None]:
def setup():
    """Return a random game of Let's Make a Deal."""
    return Table().with_columns(
        'Door', doors,
        'Prize', np.random.choice(prizes, 3, replace=False)
    )

a_game = setup()
a_game

In [None]:
def behind(game, door):
    """Return the prize behind a door."""
    return game.where('Door', door).column('Prize').item(0)

for door in doors:
    print('Behind door number', door, 'is the', behind(a_game, door))

In [None]:
def location(game, prize):
    """Return the door that hides a prize."""
    return game.where('Prize', prize).column('Door').item(0)

for prize in prizes:
    print('The', prize, 'is behind door number', location(a_game, prize))

In [None]:
a_guess = np.random.choice(doors)
a_guess

In [None]:
def expose(game, guess):
    """Return the door that Monty Hall opens to expose a goat."""
    guessed_prize = behind(game, guess)
    if guessed_prize == 'first goat':
        reveal = 'second goat'
    elif guessed_prize == 'second goat':
        reveal = 'first goat'
    else:
        reveal = np.random.choice(['first goat', 'second goat'])
    return location(game, reveal)

an_exposed_door = expose(a_game, a_guess)
print('The contestant picked door', a_guess, 'which hides the', behind(a_game, a_guess))
print('Monty Hall opens door', an_exposed_door, 'to expose the', behind(a_game, an_exposed_door))

In [None]:
def other(game, guess, exposed):
    """Return the door that is not the guess or the exposed."""
    return (1 + 2 + 3) - guess - exposed

the_remaining_door = other(a_game, a_guess, an_exposed_door)
print('The remaining door', the_remaining_door, 'hides the', behind(a_game, the_remaining_door))

In [None]:
trials = Table(['trial', 'guess', 'exposed', 'remaining'])
for i in np.arange(10000)+1:
    game = setup()
    guess = np.random.choice(doors)
    exposed = expose(game, guess)
    remaining = other(game, guess, exposed)
    trials.append([i, behind(game, guess), behind(game, exposed), behind(game, remaining)])
    
trials

In [None]:
trials.group('guess')

In [None]:
trials.group('exposed')

In [None]:
trials.group('remaining')

In [None]:
compare = trials.group('guess').join('guess', trials.group('remaining'), 'remaining')
compare.relabeled(0, 'Prize').relabeled(1, 'Guess Count').relabeled(2, 'Remaining Count').barh(0)

## Probability

#### Tickets

Red Green Blue

What's the chance of GR when sampling two cards without replacement

RB RG BR BG GR GB

P(GR) = 1/6

P(one R and one G) = P(GR) + P(RG) = 1/6 + 1/6


In [None]:
Table().with_column('door', doors).with_column('chance', 1).set_format('chance', DistributionFormatter)

In [None]:
outcomes = trials.group(['guess', 'exposed', 'remaining'])
outcomes

In [None]:
changes = outcomes.with_column(
    'chance', 
    [1/3 * 1 * 1/2, 
     1/3 * 1 * 1/2, 
     1/3 * 1 * 1,
     1/3 * 1 * 1]).set_format(['count', 'chance'], DistributionFormatter)
changes

Roll a die 4 times. What's P(at least one 6).

In [None]:
1 - (5/6) ** 4

In [None]:
rolls = np.arange(1, 51, 1)
results = Table().with_columns(
    'Rolls', rolls,
    'Chance of at least one 6', 1 - (5/6)**rolls
)
results.set_format(1, PercentFormatter(3))

In [None]:
results.scatter('Rolls')

In [None]:
results.where('Rolls', 50)

In [None]:
rolls = np.random.choice(np.arange(6)+1, 50)
rolls

In [None]:
np.count_nonzero(rolls==6)

In [None]:
trials = 300000
successes = 0

for i in np.arange(trials):
    rolls = np.random.choice(np.arange(6)+1, 50)
    if np.count_nonzero(rolls==6) > 0:
        successes = successes + 1
        
print(PercentFormatter(3).format_value(successes/trials))

In [None]:
top = Table.read_table('top_movies.csv')
top = top.with_column('Row Index', np.arange(top.num_rows)).move_to_start('Row Index')
top

In [None]:
top.take([3, 5, 7])

In [None]:
top.where('Title', are.containing('and the'))