# Simulation Exercises

> How likely is it that you roll doubles when rolling two dice?

In [1]:
import numpy as np

# set the number simulations to run 
num_simulations = 1000000

# generate a matrix of random integers representing rolls of the dice
rolls = np.random.randint(1, 7, size=(num_simulations, 2))

# counts the number of doubles in a simulated roll, 
# rolls contains the results of the sim rolls in a array
# rolls[:] selects all values in the column specified, 
# creates a boolean array that evaluate True when both columns have equal values
num_doubles = np.count_nonzero(rolls[:, 0] == rolls[:, 1])

# calculates the likelihood(probability)
likelihood = num_doubles / num_simulations

In [2]:
likelihood

0.166225

> If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?

In [9]:
# define parameters for n(coin flips), k(probability of exactly 3 heads), and p(probability of a single flip)

n = 8
k = 3
p = 0.5

# simulate 1 million trials of 8 coin flips with probability of 0.5
outcomes = np.random.binomial(n, p, size=10**6)

# calculates the proportion of trials that have exactly 'k' head using np.mean
# creates a boolean arrray
prob_3_heads = np.mean(outcomes == k)

In [10]:
prob_3_heads

0.218614

In [11]:
# define parameters for n(coin flips), k(probability of more than 3 heads), and p(probability of a head on a single flip)

n = 8
k = 3
p = 0.5

# simulate 1 million trials of 8 coin flips with probability of 0.5
outcomes = np.random.binomial(n, p, size=10**6)

# calculates the proportion of trials that have outcome > 'k' head using np.mean
# creates a boolean arrray
prob_more_than_3_heads = np.mean(outcomes > k)

In [12]:
prob_more_than_3_heads

0.636788

> There are approximitely 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?

In [42]:
import numpy as np
import pandas as pd

# define the alumni cohorts
cohorts = ['Web Dev', 'Web Dev', 'Web Dev', 'Data Science']

# set the number of trials
num_trials = 100000

# randomly select alumni cohorts for two billboards
billboards = np.random.choice(cohorts, size=(num_trials, 2))

# count the number of times we see two data science billboards
num_data_science = np.sum(np.all(billboards == 'Data Science', axis=1))

# calculate the probability and odds of seeing two data science billboards
#odds = probability / (1 - probability)
probability = num_data_science / num_trials

In [41]:
probability

0.0612

> Codeup students buy, on average, 3 poptart packages with a standard deviation of 1.5 a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon? (Remember, if you have mean and standard deviation, use the np.random.normal) You'll need to make a judgement call on how to handle some of your values

In [24]:
import numpy as np

# parameters
mean = 3
std_dev = 1.5
restocked = 17
days = 5

# simulate the number of poptart packages bought each day
poptarts_per_day = np.round(np.random.normal(mean, std_dev, size=(100000, days)))

# calculate the total number of poptart packages bought during the week
total_poptarts_bought = np.mean(poptarts_per_day, axis=1) * days

# set any negative values to zero
total_poptarts_bought[total_poptarts_bought < 0] = 0

# calculate the probability of being able to buy some poptarts on Friday afternoon
prob = np.mean((restocked - total_poptarts_bought) > 0)

In [25]:
poptarts_per_day 

array([[ 3.,  1.,  5.,  4.,  5.],
       [ 4., -1.,  2.,  4.,  6.],
       [ 6.,  2.,  2.,  2.,  3.],
       ...,
       [ 1.,  3.,  1.,  2.,  1.],
       [ 2.,  1.,  4.,  3.,  3.],
       [ 3.,  2.,  4.,  3.,  2.]])

In [26]:
prob

0.66842

> Compare Heights: Men have an average height of 178 cm and standard deviation of 8cm.
Women have a mean of 170, sd = 6cm. Since you have means and standard deviations, you can use np.random.normal to generate observations. If a man and woman are chosen at random, what is the likelihood the woman is taller than the man?

In [15]:
# simulate men and women heights 
# draw random samples from a normal (Gaussian) distribution.
men_heights = np.random.normal(178, 8, size=100000)
women_heights = np.random.normal(170, 6, size=100000)

# probability of taller women based on the mean of data where women heights > men heights
prob_taller_woman = np.mean(women_heights > men_heights)

In [16]:
prob_taller_woman

0.21288

> When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails. What are the odds that after having 50 students download anaconda, no one has an installation issue? 100 students?

In [43]:
import numpy as np

# number of students()
n = 150
# probability of failed installation
p = .004 # 1 in 250 chance
# number of successful installations
k = 0
# number of simulations
num_sims = 10000

# generate array of random integers representing installation status for each student
installations = np.random.binomial(1, p, size=(num_sims, n))

# calculate the number of successful installations for each simulation
successes = np.sum(installations, axis=1)

# calculate the probability of all successes in n trials for each simulation
prob_success = np.power(1-p, n)

# calculate the probability of no installation issue in n trials for each simulation
prob_no_issue = np.round(1 - prob_success, 6)

# calculate the probability of successful installation for each simulation
prob_install_success = np.power(p, successes)

# calculate the probability of failed installation for each simulation
prob_no_issue  = np.power(1-p, n-successes)

# calculate the probability of at least one failed installation for each simulation
prob_at_least_one_fail = 1 - prob_install_success

# calculate the overall probability of no installation issue
overall_prob_no_issue = np.round(np.mean(prob_no_issue), 6)

# calculate the overall probability of at least one failed installation
overall_prob_at_least_one_fail = np.round(np.mean(prob_at_least_one_fail), 6)

In [44]:
overall_prob_no_issue 

0.451848

In [45]:
overall_prob_at_least_one_fail

0.45974

> There's a 70% chance on any given day that there will be at least one food truck at Travis Park. However, you haven't seen a food truck there in 3 days. How unlikely is this?How likely is it that a food truck will show up sometime this week?

In [47]:
import numpy as np
import pandas as pd

# Define the probability of seeing a food truck on any given day
prob_truck_per_day = 0.7

# Define the number of simulations/trials
num_sims = 100

# Simulate the number of days before seeing a food truck
days_to_truck = np.random.geometric(p=prob_truck_per_day, size=num_sims)

# Calculate the probability of seeing a food truck at least once in a week
prob_truck_per_week = 1 - np.power(1 - prob_truck_per_day, 7)

# Convert the days to a pandas Series for ease of manipulation
days_to_truck = pd.Series(days_to_truck)

# Calculate the probability of not seeing a food truck in 3 days
prob_no_truck_3_days = np.power(1 - prob_truck_per_day, 3)

# Calculate the probability of seeing a food truck within the next 5 days
prob_truck_next_2_days = 1 - np.power(1 - prob_truck_per_day, 5)

In [48]:
prob_truck_per_week 

0.9997813

In [49]:
prob_no_truck_3_days

0.027000000000000014

In [50]:
prob_truck_next_2_days 

0.99757

In [51]:
days = ['food truck', 'no food truck']
p_truck = [0.7, 0.3] # Probability of having a food truck and not having a food truck
num_weeks = 10000 # Number of weeks to simulate
no_truck_weeks = np.sum(np.random.choice(days, size=(num_weeks, 7), p=p_truck) == 'no food truck', axis=1) # Number of weeks with no food trucks
prob_no_trucks = np.sum(no_truck_weeks == 7)/num_weeks # Probability of no food trucks in a week

In [52]:
prob_no_trucks

0.0004

> If 23 people are in the same room, what are the odds that two of them share a birthday? What if it's 20 people? 40?

In [53]:
num_sims = 10000
num_people = 35

birthdays = np.random.randint(0, 366, size=(num_sims, num_people))
unique_birthdays = np.unique(birthdays, axis=1)
prob_shared_birthday = 1 - np.prod(1 - np.arange(num_people) / 365)

In [54]:
prob_shared_birthday

0.8143832388747152

In [56]:
birthdays = pd.DataFrame(birthdays)

In [57]:
birthdays 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25,26,27,28,29,30,31,32,33,34
0,107,99,9,273,364,355,14,61,93,200,...,349,32,296,118,224,3,62,198,144,79
1,182,162,342,60,337,357,55,314,246,13,...,264,331,169,290,153,137,31,15,248,341
2,102,141,178,269,158,206,171,240,21,282,...,36,103,116,12,357,312,329,350,274,152
3,215,316,26,201,51,88,111,289,69,283,...,278,229,283,88,5,318,175,43,59,302
4,130,152,32,130,53,330,197,2,327,321,...,56,199,323,23,219,349,168,88,217,158
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,365,113,159,265,254,35,145,40,59,255,...,13,251,134,102,234,124,110,67,279,178
9996,152,245,170,234,63,108,309,254,287,100,...,272,60,57,146,122,215,151,293,89,268
9997,322,334,264,72,363,14,172,20,350,126,...,105,60,157,177,221,251,295,341,182,73
9998,265,167,68,359,251,135,58,120,231,110,...,197,255,336,238,193,178,244,17,112,141
