# Simulations

In [1]:
#imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
#can use any random seed- lets preset this
np.random.seed(1349) 

#preset the number of simulations we'll have
n_simulations =100_000

<hr style="border:2px solid gray">

### 1. How likely is it that you roll doubles when rolling two dice?
#### Expected?

In [3]:
# Expected probability
2/12

0.16666666666666666

#### Simulate!

In [8]:
#number of dice
number_of_dice = 2 
outcomes = [1,2,3,4,5,6]

In [9]:
rolls = np.random.choice(outcomes, size=(n_simulations, number_of_dice))
rolls

array([[3, 2],
       [5, 3],
       [4, 4],
       ...,
       [3, 5],
       [2, 4],
       [4, 5]])

In [10]:
#let's turn this into a dataframe so we can see our outcomes more clearly
rolls =pd.DataFrame(rolls)

#call our new dataframe
rolls.head()

Unnamed: 0,0,1
0,3,2
1,5,3
2,4,4
3,6,2
4,1,1


In [11]:
#the average likelyhood that roll in index 0 will equal roll in index 1
(rolls[0] == rolls[1]).mean()

0.16445

<hr style="border:1.5px solid black">

### 2. If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?

#### Expected 

In [22]:
3/16

0.1875

#### Simulated

In [14]:
#number of coins
n_coins = 8 

#possible outcomes
outcomes = ['H', 'T']

In [15]:
#us random choice to get the simulation
coinflips = np.random.choice(outcomes, size=(n_simulations, n_coins))

#call the variable
coinflips

array([['T', 'H', 'T', ..., 'H', 'T', 'T'],
       ['T', 'H', 'T', ..., 'H', 'H', 'H'],
       ['T', 'T', 'T', ..., 'H', 'T', 'T'],
       ...,
       ['T', 'T', 'H', ..., 'H', 'H', 'T'],
       ['H', 'H', 'H', ..., 'T', 'H', 'T'],
       ['T', 'H', 'H', ..., 'H', 'H', 'T']], dtype='<U1')

In [16]:
#let's look at the possibility of the coinflip being Heads
flips_by_sim = (coinflips == 'H').sum(axis=1)

#call that variable
flips_by_sim

array([3, 4, 1, ..., 3, 4, 5])

##### Landing on Heads exactly 3 times

In [17]:
#what is the possibility of the flips being heads exactly 3 times
flips_by_sim == 3

array([ True, False, False, ...,  True, False, False])

##### Landing on Heads more than 3 times

In [18]:
#using data from above
(flips_by_sim == 3).mean()

0.21747

In [21]:
#completely redone
outcomes = ['H','T']
n_coins = 8

coinflips = np.random.choice(outcomes, size=(n_simulations, n_coins))
flips_by_sim = (coinflips == 'H').sum(axis=1)
(flips_by_sim == 3).mean()

0.21848

<hr style="border:1.5px solid black">

### 3 There are approximitely 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?

##### First, let's look at this without imports and answer using probability
- 3 webdev per 1 dS == 1/4 classes is DS == 25% of classes are DS
- 2 billboards
- odds of both having a data science student on it
- 25% chance per billboard * 2 billboards = 1/8

In [23]:
(1/4) * (1/4)

0.0625

In [66]:
# theoretical prob
0.25 * 0.25

0.0625

##### Now, let's answer with simulation

In [62]:
#possible outcomes with 4 classes
outcomes = ['wd', 'wd', 'wd', 'ds']

#number of billboards
n_billboards = 2

#using random choice, let's see what the outcomes will be
billboards = np.random.choice(outcomes, size=(n_simulations, n_billboards))

#call the variable
billboards

array([['wd', 'ds'],
       ['wd', 'wd'],
       ['wd', 'wd'],
       ...,
       ['wd', 'ds'],
       ['ds', 'wd'],
       ['wd', 'ds']], dtype='<U2')

In [25]:
# our only options are...
outcomes = ['wd','ds']

#using the probability of 1/4 classes being DS, and 3/4 classes being WD
billboards = np.random.choice(outcomes, size=(n_simulations, n_billboards), 
                p=[0.75, 0.25])

#let's see our outcomes
billboards

array([['wd', 'wd'],
       ['wd', 'wd'],
       ['wd', 'wd'],
       ...,
       ['wd', 'wd'],
       ['ds', 'ds'],
       ['wd', 'wd']], dtype='<U2')

In [26]:
#what are the possibilities that the billboard will be exactly data science
(billboards == 'ds').sum(axis=1)

array([0, 0, 0, ..., 0, 2, 0])

In [63]:
#what is the probability that TWO billboards will be exactly data science
probability = ((billboards == 'ds').sum(axis=1) == 2).mean()

#call that variable
probability

0.06222

##### Converting from probability to odds:

In [64]:
# Probability of seeing anything else
1- probability

0.9377800000000001

In [65]:
#odds of seeing a billboard that is not WD
odds = prob/(1-prob)
odds

0.06665528901025056

<hr style="border:1.5px solid black">

### 4. Codeup students buy, on average, 3 poptart packages (+- 1.5) a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon?

##### Let's break this down

- 1.5-4.5 puchases a day (average 3)
- 17 packages every monday
- probability to buy one on Friday
- Mon:17 - (1.5-4.5) = 15.5-12.5 =day 1 prob = 100%
- Tues: 15-12.5 - (1.5-4.5)= 13.5-8 = day 2 prob 100% 
- Wed:13.5-8 - (1.5-4.5)= 12-3.5 = day 3 prob 100%
- Thurs: 12-3.5 - (1.5-4.5)= 10.5-(-1) = day 4, could have 10.5 or -1 left / 50% chance
- Friday: 10.5 -(-1) - (1.5-4.5) = 9-(-5.5)= day 4, could have 9 or -5.5/ 50% chance

##### Now, let's use simulation

In [68]:
#  3 poptarts per 1.5
mean = 3 
daily_snack = 1.5

number_of_days = 5

In [72]:
#let's use random normal becuase we have the mean and sd already
#see what are chances are that poptarts are bought that day
poptarts_bought_day = np.random.normal(mean, daily_snack, size=(number_of_days))

#take a look at what we get
poptarts_bought_day

array([6.45675137, 2.42458795, 3.3278966 , 2.14820293, 2.18566272])

In [None]:
#we can visualize these popart daily purchases if we want to
plt.hist(np.random.normal(3, 1.5, size = (10000)))
plt.title('Distribution of Poptart Packages Purchased Each Day')
plt.vlines(3, 0, 3000, color='black')
plt.text(3.1, 3000, 'Mean')
plt.vlines(4.5, 0, 3000, color='black', ls='--', alpha=0.5)
plt.text(4.6, 3000, '1σ')
plt.vlines(6.0, 0, 3000, color='black', ls='--', alpha=0.5)
plt.text(6.1, 3000, '2σ')
plt.vlines(1.5, 0, 3000, color='black', ls='--', alpha=0.5)
plt.text(1.6, 3000, '-1σ')
plt.vlines(0, 0, 3000, color='black', ls='--', alpha=0.5)
plt.text(0.1, 3000, '-2σ')
plt.ylabel('Count')
plt.xlabel('Number of Packages Purchased')
plt.show()

In [73]:
#see what are chances are that poptarts are bought per week
poptarts_bought_per_week = poptarts_bought_day.sum(axis=1)

#call that variable
poptarts_bought_per_week

AxisError: axis 1 is out of bounds for array of dimension 1

In [None]:
#probability that less than 17 poptarts are bought that week
(poptarts_bought_per_week <= 16).mean()

<hr style="border:1.5px solid black">

### 5. Compare Heights
- Men have an average height of 178 cm and standard deviation of 8cm.
- Women have a mean of 170, sd = 6cm.
- If a man and woman are chosen at random, P(woman taller than man)?

In [34]:
#men's average height in cm
men_avg = 178
#men's standard deviation in cm
men_std = 8

#women's average height in cm
women_avg = 170
#women's standard deviation in cm
women_std = 6

In [38]:
#men avg sim
men = np.random.normal(178, 8, size = n_simulations)
men

array([181.30494255, 178.67652296, 173.77270595, ..., 174.86483176,
       170.16563794, 186.77891105])

In [39]:
#women avg sim
women = np.random.normal(170, 6, size = n_simulations)
women

array([165.84128909, 179.76583246, 156.73865439, ..., 164.82203397,
       171.90800698, 169.96102568])

In [77]:
#probability that women are taller then men on average
(women > men).mean()

0.21187

<hr style="border:1.5px solid black">

### 6. When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails.
- What are the odds that after having 50 students download anaconda, no one has an installation issue? 100 students?
- What is the probability that we observe an installation issue within the first 150 students that download anaconda?
- How likely is it that 450 students all download anaconda without an issue?

##### Let's break it down
- prob_fail = 1/250
- 50 students == 0 fails
- 100 students == 0 fails
- 150 students >= 1 fail
- 450 students == 0 fails

##### 50 installs with no issues

In [78]:
outcomes = ['S', 'F']

number_of_students = 50

In [92]:
downloads = np.random.random((n_simulations, number_of_students)) 
p = 1/250
downloads

array([[0.52345138, 0.03077982, 0.2613347 , ..., 0.24818141, 0.17705971,
        0.78900088],
       [0.45584809, 0.77003348, 0.52528605, ..., 0.46514095, 0.84201336,
        0.60021125],
       [0.40628104, 0.95790566, 0.87141532, ..., 0.545892  , 0.46030235,
        0.4008099 ],
       ...,
       [0.07349499, 0.74510922, 0.13432688, ..., 0.29016113, 0.39627559,
        0.38992984],
       [0.20703787, 0.6947583 , 0.87245435, ..., 0.42736372, 0.49593723,
        0.30045504],
       [0.84243199, 0.42543965, 0.6277653 , ..., 0.44415875, 0.76757581,
        0.01055467]])

In [93]:
failure = downloads < p

In [80]:
(downloads == 'S').sum(axis=1)

array([19, 19, 20, ..., 20, 20, 20])

In [97]:
(failure.sum(axis=1) == 0).mean()

0.81844

<hr style="border:0.5px solid black">

##### 100 installs

In [98]:
outcomes = ['S', 'F']
number_of_students = 100

downloads = np.random.random((n_simulations, number_of_students)) 
p = 1/250
downloads
(downloads == 'S').sum(axis=1)
failure = downloads < p
(failure.sum(axis=1) == 0).mean()

  (downloads == 'S').sum(axis=1)


AttributeError: 'bool' object has no attribute 'sum'

<hr style="border:0.5px solid black">

##### 150 installs at least 1 fail

In [51]:
outcomes = ['S', 'F'] 
n_students = 150

downloads = np.random.choice(outcomes, size=(n_simulations, n_students), 
                p = [249/250, 1/250])

((downloads == 'S').sum(axis=1) < n_students).mean()

0.44986

<hr style="border:0.5px solid black">

##### 450 installs

In [None]:
outcomes = ['S', 'F']
n_students = 450

downloads = np.random.choice(outcomes, size=(n_simulations, n_students), 
                p = [249/250, 1/250])

((downloads == 'S').sum(axis=1) == n_students).mean()

<hr style="border:1px solid black">

### 7. There's a 70% chance on any given day that there will be at least one food truck at Travis Park. However, you haven't seen a food truck there in 3 days. How unlikely is this?

In [None]:
n_days = 3

In [None]:
# 0 = no truck, 1 = truck shows up
trucks = np.random.choice([0, 1], size = (n_simulations, n_days), p = [0.3, 0.7])

#call the variable
trucks

##### probability that no truck shows up in 3 days

In [None]:
#axis 1 is 0 (or no truck shows up)
(trucks.sum(axis = 1) == 0).mean()

##### How likely is it that a food truck will show up sometime this week

In [None]:
#likelyhood that in axis 1 there is a 1 (or a truck DOES show up) one or more days
(trucks.sum(axis = 1) >= 1).mean()

<hr style="border:1px solid black">

### 8. If 23 people are in the same room, what are the odds that two of them share a birthday? What if it's 20 people? 40?


Let's break it down:
- 365 days in a year
- what's the probability of same birthday

#### 23 students in the class

In [None]:
#number of days in a year
outcomes = range(0, 365) 

#number of students
n_students= 23 

In [None]:
#using random choice to get the possible birthdays
birthdays= np.random.choice(outcomes, size=(n_simulations, n_students))

#call that variable
birthdays

In [None]:
#turn it into a DF so it's easier to read
birthdays = pd.DataFrame(birthdays)

#call our new DF
birthdays.head()

In [None]:
#what are the chances that on average we will have a unique birthday
(birthdays.nunique(axis=1) < 23).mean()

<hr style="border:0.5px solid black">

#### 20 students per class

In [60]:
#number of days in a year
outcomes = range(0, 365) 

#number of students
n_students= 20 

In [61]:
#let's see what the possibility of birhtdays is with 20 students
birthdays= np.random.choice(outcomes, size=(n_simulations, n_students))

#turn that into a DF
birthdays = pd.DataFrame(birthdays)

#call the DF
birthdays

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,80,231,258,136,318,55,304,162,54,203,290,332,146,101,301,187,115,80,316,295
1,41,31,155,92,327,26,199,119,218,64,49,297,231,187,243,230,276,323,37,5
2,3,72,5,355,192,158,130,49,67,256,343,291,253,205,83,230,134,170,126,69
3,347,170,94,291,240,183,14,246,111,339,163,275,70,196,99,83,242,197,242,310
4,203,36,142,34,320,29,196,202,266,57,359,39,117,363,342,31,259,207,112,105
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,132,32,24,335,179,137,291,189,256,271,214,346,109,10,306,245,6,323,170,78
99996,89,50,316,362,100,152,22,84,129,281,360,194,312,87,13,56,100,348,205,45
99997,32,130,157,15,359,154,312,70,11,189,142,357,149,169,291,338,291,210,362,292
99998,270,347,36,181,285,0,231,267,78,82,174,60,147,108,8,131,70,256,194,329


In [58]:
#get the probability
(birthdays.nunique(axis=1) = n_students).mean()

SyntaxError: cannot assign to function call here. Maybe you meant '==' instead of '='? (3799527546.py, line 2)

<hr style="border:0.5px solid black">

##### 40 students per class

In [57]:
#number of days in a year
outcomes = range(0, 365) 

#number of students
n_students = 40

#let's see what the possibility of birhtdays is with 40 students
birthdays = np.random.choice(outcomes, size=(n_simulations, n_students))

#call the DF
birthdays = pd.DataFrame(birthdays)
birthdays

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,56,169,347,124,18,13,267,81,22,95,...,322,58,297,364,236,331,28,87,178,269
1,62,88,9,299,344,244,109,108,164,82,...,302,331,160,330,66,61,1,69,318,32
2,147,171,170,165,315,204,321,77,129,49,...,283,85,335,344,292,285,80,129,99,95
3,225,34,96,210,117,222,360,119,22,39,...,345,184,191,256,200,280,213,42,143,342
4,162,124,102,15,248,224,129,345,236,236,...,193,88,123,289,210,154,106,254,198,51
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,189,343,246,167,4,339,154,9,316,350,...,51,107,152,44,325,38,219,352,215,347
99996,324,295,177,108,146,222,230,326,151,330,...,293,23,242,232,324,193,341,323,239,171
99997,90,252,194,358,87,123,361,266,121,267,...,340,246,260,10,55,278,41,183,138,102
99998,104,20,277,63,276,110,186,66,48,308,...,191,22,275,6,83,227,20,201,364,335


In [53]:
#get the probability
(birthdays.nunique(axis=1) < n_students).mean()

0.88993