In [1]:
import numpy as np
import pandas as pd

### How likely is it that you roll doubles when rolling two dice?

In [2]:
outcomes = [1,2,3,4,5,6]
n_simulations = 100_000
n_trials = 2

rolls = np.random.choice(outcomes, size = (n_simulations, n_trials))
rolls

array([[4, 4],
       [1, 3],
       [1, 6],
       ...,
       [4, 4],
       [4, 3],
       [4, 3]])

In [3]:
#convert to DF so that we can apply a lambda function
pd.DataFrame(rolls).apply(lambda row: row[0]==row[1], axis=1).mean()

0.16595

### If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?

In [8]:
#Heads = 1, Tails = 0
outcomes = [1,0]
n_simulations = 100_000
n_trials = 8

flips = np.random.choice(outcomes, size = (n_simulations, n_trials))
flips[0:10]

array([[1, 0, 0, 0, 1, 0, 0, 1],
       [0, 1, 1, 1, 0, 1, 0, 0],
       [1, 0, 1, 1, 1, 1, 0, 0],
       [0, 1, 0, 1, 0, 0, 0, 0],
       [0, 1, 1, 1, 1, 0, 1, 0],
       [1, 1, 0, 0, 0, 1, 1, 0],
       [0, 1, 1, 1, 0, 0, 0, 0],
       [1, 1, 1, 0, 1, 1, 1, 0],
       [0, 1, 1, 1, 0, 0, 0, 0],
       [0, 0, 1, 1, 1, 1, 0, 0]])

In [11]:
#convert to dataframe
#This is for exactly 3 heads
pd.DataFrame(flips).apply(lambda row: row.sum() == 3, axis = 1).mean()

0.21874

In [12]:
#This is for more than 3 heads
pd.DataFrame(flips).apply(lambda row: row.sum() > 3, axis = 1).mean()

0.63546

### There are approximitely 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?

In [14]:
#data science = 1, web development = 0
outcomes = [0, 0, 0, 1]
n_simulations = 100_000
n_trials = 2

choices = np.random.choice(outcomes, size = (n_simulations, n_trials))
choices[0:10]

array([[0, 0],
       [0, 0],
       [1, 1],
       [0, 0],
       [0, 1],
       [0, 1],
       [0, 0],
       [0, 0],
       [1, 0],
       [1, 0]])

In [15]:
(choices.sum(axis=1) == 2).mean()

0.06262

In [16]:
#Same as above but in a different way
pd.DataFrame(choices).apply(lambda row: row.sum() == 2, axis = 1).mean()

0.06262

### Codeup students buy, on average, 3 poptart packages (+- 1.5) a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon?

In [17]:
n_simulations = 100_000
n_trials = 5

poptarts = np.random.normal(3,1.5, size = (n_simulations, n_trials))
poptarts

array([[ 5.49791481,  3.25105683,  3.39565659,  2.55897498,  2.99659436],
       [ 2.93086179,  2.34147031,  5.2737142 ,  2.11887872,  3.45104041],
       [ 7.00905427,  4.74858522,  0.62369791,  1.74356126,  0.42928376],
       ...,
       [ 2.81332871,  3.41356613,  1.20171401,  5.7028992 ,  4.3675177 ],
       [ 2.8264138 ,  4.46482791,  2.07187242,  1.44772245,  2.47896394],
       [ 3.96418063, -0.46989248,  4.23958121,  2.0875942 ,  0.95872726]])

In [18]:
(poptarts.sum(axis = 1) < 17).mean()

0.72285

### Compare Heights
    - Men have an average height of 178 cm and standard deviation of 8cm.
    - Women have a mean of 170, sd = 6cm.
    - Since you have means and standard deviations, you can use np.random.normal to 
        generate observations.
    -If a man and woman are chosen at random, P(woman taller than man)?

In [19]:
n_simulations = 100_000

men_heights = np.random.normal(178, 8, size = n_simulations)
men_heights

array([183.784776  , 182.63582221, 168.72475462, ..., 175.90146164,
       185.15628831, 169.54637074])

In [20]:
women_heights = np.random.normal(170, 6, size = n_simulations)
women_heights

array([175.04544051, 166.27227274, 170.206257  , ..., 162.96772269,
       170.11314161, 184.05558536])

In [21]:
(women_heights > men_heights).mean()

0.21114

### When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails. What are the odds that after having 50 students download anaconda, no one has an installation issue? 100 students?

In [24]:
#Success = 0, Failure = 1
outcomes = [0,1]
n_simulations = 100_000
n_trials = 50

installations = np.random.choice(outcomes, size = (n_simulations, n_trials), p=[(1-1/250), (1/250)])
installations


array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [25]:
#For 50 installations
(installations.sum(axis=1) == 0).mean()

0.8182

In [26]:
#For 100 installations
n_trials = 100
installations = np.random.choice(outcomes, size = (n_simulations, n_trials), p = [(1-1/250), (1/250)])
installations

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [27]:
(installations.sum(axis=1) == 0).mean()

0.67153

### What is the probability that we observe an installation issue within the first 150 students that download anaconda?

In [28]:
n_trials = 150

installations = np.random.choice(outcomes, size = (n_simulations, n_trials), p = [(1-1/250), (1/250)])
installations

array([[0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [29]:
(installations.sum(axis=1) >= 1).mean()

0.45321

### How likely is it that 450 students all download anaconda without an issue?

In [30]:
n_trials = 450

installations = np.random.choice(outcomes, size = (n_simulations, n_trials), p = [(1-1/250), (1/250)])
installations

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [31]:
(installations.sum(axis=1) == 0).mean()

0.16684

### There's a 70% chance on any given day that there will be at least one food truck at Travis Park. However, you haven't seen a food truck there in 3 days. How unlikely is this?

In [32]:
#Success = 1, Failure = 0
outcomes = [1,0]
n_simulations = 100_000
n_trials = 3

trucks = np.random.choice(outcomes, size = (n_simulations, n_trials), p = [.7, .3])
trucks

array([[1, 0, 0],
       [0, 1, 1],
       [1, 0, 0],
       ...,
       [1, 0, 0],
       [0, 0, 1],
       [1, 1, 1]])

In [33]:
#In terms of likelihood to happen
(trucks.sum(axis=1) == 0).mean()

0.02746

In [34]:
#In terms of likelihood not to happen
1 - (trucks.sum(axis=1) == 0).mean()

0.97254

### How likely is it that a food truck will show up sometime this week?

In [35]:
#Assuming the week is 7 days, not 5
n_trials = 7

trucks = np.random.choice(outcomes, size = (n_simulations, n_trials), p = [.7, .3])
trucks

array([[1, 1, 0, ..., 1, 1, 1],
       [0, 1, 1, ..., 1, 0, 0],
       [1, 1, 1, ..., 1, 1, 1],
       ...,
       [1, 0, 0, ..., 1, 0, 1],
       [1, 0, 0, ..., 0, 1, 1],
       [1, 1, 1, ..., 1, 1, 1]])

In [36]:
(trucks.sum(axis=1) >= 1).mean()

0.99976

### If 23 people are in the same room, what are the odds that two of them share a birthday? What if it's 20 people? 40?

In [66]:
n_simulations = 100_000
n_trials = 23

birthdays = np.random.randint(1,366, size = (n_simulations, n_trials))
birthdays[0:10]

array([[284, 240, 119, 209, 107, 282, 278,  42, 178,  27, 233, 200, 331,
         54,  26, 311, 172, 125, 110, 144,  34, 107, 241],
       [167, 316, 160,  33, 195, 316, 303, 208, 169,  32, 326,  89, 186,
        166,   7, 279,  51,  26, 141, 323, 206,  68,  32],
       [192, 160, 328,  61, 214, 102, 201, 308, 271, 122, 213, 294,  91,
         68, 194, 226, 306, 261, 105, 167, 230, 260, 217],
       [189, 281, 170,  10, 351, 151, 318, 153,  62, 132, 210, 134,  29,
         55, 234, 194, 149,  39, 158, 159, 184, 306, 328],
       [320, 290,  26,  49,  15,  94, 110, 301,  59, 250, 262, 321, 191,
         12,  28, 157, 347, 269, 221, 294, 263, 327, 322],
       [343, 353, 246, 168, 364, 107,  72, 201, 207, 159, 186,  38, 137,
         49, 143,  97, 256,  64, 334, 192, 262, 307, 161],
       [ 73, 313, 345,  28, 337, 281, 133, 216, 225, 324, 350, 344, 173,
         12,  71, 120,  42, 256, 130,  89, 209, 216, 198],
       [ 53, 149, 115, 185, 270, 351, 254,  71,  67, 253, 123, 276, 317,
   

In [67]:
(pd.DataFrame(birthdays).nunique(axis = 1) < n_trials).mean()

0.5086

In [68]:
#For 20 people
n_trials = 20

birthdays = np.random.randint(1,366, size = (n_simulations, n_trials))
birthdays

array([[352,   1,  86, ..., 305,  47, 110],
       [ 74, 121, 122, ..., 311, 148, 225],
       [ 87,  97, 249, ..., 365,  66, 238],
       ...,
       [201, 197,   6, ..., 263, 231,  32],
       [  1,  39, 222, ..., 201, 180, 332],
       [155,  17, 150, ..., 345, 168, 299]])

In [69]:
(pd.DataFrame(birthdays).nunique(axis = 1) < n_trials).mean()

0.40924

In [70]:
#For 40 people
n_trials = 40

birthdays = np.random.randint(1,366, size = (n_simulations, n_trials))
birthdays

array([[175,  72, 226, ..., 177,   8, 223],
       [358,  38, 241, ..., 184, 128, 189],
       [ 89, 163, 322, ..., 318,  65, 158],
       ...,
       [304, 290,  78, ..., 106, 177, 210],
       [244,  36, 284, ...,  62,  46, 215],
       [151, 288,   1, ..., 345, 244,  96]])

In [71]:
(pd.DataFrame(birthdays).nunique(axis = 1) < n_trials).mean()

0.8925