# $Simulation$

In [124]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'
import viz # curriculum example visualizations

np.random.seed(9)

### 1. How likely is it that you roll doubles when rolling two dice?

In [14]:
n_trials = nrows = 10_000
n_dice = ncols = 2

rolls = np.random.choice([1, 2, 3, 4, 5, 6], size = (n_trials, n_dice))
rolls

array([[1, 1],
       [6, 5],
       [2, 6],
       ...,
       [6, 6],
       [5, 5],
       [4, 1]])

In [34]:
df = pd.DataFrame(rolls, columns=['roll_1', 'roll_2'])
df.shape

(10000, 2)

In [27]:
bool_mask = df.roll_1.values == df.roll_2.values

In [37]:
bool_mask.sum()/bool_mask.size

0.1636

In [54]:
#probability of rolling double dice
bool_mask.mean()

0.1636

### 2. If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?


In [42]:
n_trials = nrows = 10_000
n_coins = ncols = 8

# 1 equals heads 0 equals tails
flips = np.random.choice([0, 1], size = (n_trials, n_coins))
flips

array([[1, 0, 0, ..., 1, 1, 1],
       [1, 0, 1, ..., 0, 1, 1],
       [1, 0, 1, ..., 0, 1, 1],
       ...,
       [1, 0, 0, ..., 0, 0, 1],
       [1, 1, 0, ..., 0, 1, 0],
       [1, 1, 0, ..., 1, 1, 1]])

In [47]:
flips_df = pd.DataFrame(flips, columns=['1st','2nd','3rd','4th','5th','6th','7th','8th'])
flips_df.head()

Unnamed: 0,1st,2nd,3rd,4th,5th,6th,7th,8th
0,1,0,0,1,1,1,1,1
1,1,0,1,0,0,0,1,1
2,1,0,1,0,0,0,1,1
3,0,0,0,1,0,1,1,0
4,1,0,1,1,0,1,0,1


In [50]:
total_heads= flips_df.sum(axis=1)
total_heads.head()

0    6
1    4
2    4
3    3
4    5
dtype: int64

In [52]:
# probability of getting 3 heads
(total_heads ==3).mean()

0.2188

In [55]:
#probablity of getting more than 3 heads
(total_heads > 3).mean()

0.6337

### 3. There are approximately 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?


In [81]:
n_trials = nrows = 10_000
n_students = ncols = 2

# 0 = Web Development students, 1=Data Science students
billboard = np.random.choice([0,0,0,1], size = (n_trials, n_students))


In [86]:
billboard_df = pd.DataFrame(billboard, columns=['1st','2nd'])
billboard_df.tail()

Unnamed: 0,1st,2nd
9995,0,1
9996,0,0
9997,1,0
9998,0,0
9999,1,0


In [95]:
# probability of two data science students selected on a billboard
((billboard_df.sum(axis=1) == 2).sum())/len(billboard_df)


0.0643

### 4. Codeup students buy, on average, 3 poptart packages with a standard deviation of 1.5 a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon? (Remember, if you have mean and standard deviation, use the np.random.normal) You'll need to make a judgement call on how to handle some of your values.


In [145]:
nrows = 10_000
monday = 17

array_pop = np.random.normal(loc = 3 , scale = 1.5, size=(nrows,5))

In [147]:
# Monday Tuesday  Wednesday  Thursday
sum_pop= array_pop.sum(axis=1)

In [148]:
sum_pop

array([13.88757223,  8.16134546, 16.62591946, ..., 17.76262174,
       11.8781606 ,  9.75049894])

In [158]:
bool_mask = sum_pop < 17
                   
                      

In [161]:
bool_mask.sum()/len(bool_mask)

0.7259

In [162]:
# probability that I will be able to buy poptarts on Friday
bool_mask.mean()

0.7259

### 5. Compare Heights
* Men have an average height of 178 cm and standard deviation of 8cm.
* Women have a mean of 170, sd = 6cm.
* Since you have means and standard deviations, you can use np.random.normal to generate observations.
* If a man and woman are chosen at random, what is the likelihood the woman is taller than the man?

In [95]:
# creating random heights for men given a mean and std
nrows = 10_000
men_random = np.random.normal(loc = 178, scale =8, size = (nrows,1))
men_random

array([[176.08074274],
       [183.71805179],
       [192.88500337],
       ...,
       [190.96350518],
       [181.90303759],
       [165.46937575]])

In [96]:
# creating random heights for women given a mean and std
women_random = np.random.normal(loc = 170, scale =6, size = (nrows,1))
women_random

array([[173.57922802],
       [175.92704053],
       [174.17293867],
       ...,
       [171.42521312],
       [163.34052833],
       [158.81020739]])

In [97]:
#conbining random heights into one array
combine = np.concatenate((women_random,men_random), axis =1)

In [98]:
#converting array to a datafram
combine_df = pd.DataFrame(combine, columns = {'men','women'})


In [99]:
#checking dataframe type
combine_df.dtypes

women    float64
men      float64
dtype: object

In [100]:
# creating a True and False Series based on the condition that women are taller than men
bool_mask = combine_df.women > combine_df.men
bool_mask

0       False
1       False
2       False
3       False
4       False
        ...  
9995    False
9996     True
9997    False
9998    False
9999    False
Length: 10000, dtype: bool

In [101]:
# The probability that a women chosen at random is taller than a man chosen at random
bool_mask.mean()

0.2074

### 6. When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails.

* What are the odds that after having 50 students download anaconda, no one has an installation issue? 100 students?
* What is the probability that we observe an installation issue within the first 150 students that download anaconda?
* How likely is it that 450 students all download anaconda without an issue?


In [92]:
# function that outputs the probability of corruption occuring ; given input is the number of students
def p_corruption(n_students):
    data = np.random.random((10_000, n_students))
    corruption = data < (1/250)
    return (corruption.sum(axis=1)>=1).mean() 


In [94]:
p_corruption(50)

0.1811

In [83]:
# set up the probability of corruction and size of trials
p_corrupted = 1/250
n_students = ncols = 450
nrows = 10_000


In [84]:
# run random trials
data = np.random.random((nrows, ncols))
data.size


4500000

In [85]:
corruption = data < p_corrupted
corruption

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [86]:
corruption.sum(axis=1)

array([1, 2, 3, ..., 3, 6, 1])

In [66]:
# at 250 students there is a .6249 probability or corruption and .3751 of no corruption
(corruption.sum(axis=1)>=1).mean() 

0.6249

In [76]:
# at 50 students there is a .1811 probability of corruption and .8189 or no corruption
1-(corruption.sum(axis=1)>=1).mean() 

0.8189

In [82]:
# at 150 students there is an .4555 probability of corruption and .5445 probability of no corruption
1- (corruption.sum(axis=1)>=1).mean() 

0.5445

In [88]:
# at 450 students there is an .83358 probability of corrupion and .1642 probability of no corruption
1-(corruption.sum(axis=1)>=1).mean() 

0.1642

### 7. There's a 70% chance on any given day that there will be at least one food truck at Travis Park.
* However, you haven't seen a food truck there in 3 days. How unlikely is this?
* How likely is it that a food truck will show up sometime this week?

In [137]:
p_truck =.7

ncols = 3
nrows = 10_000


In [138]:
data = np.random.random((nrows, ncols))
data


array([[0.53997185, 0.03615257, 0.55414766],
       [0.94911184, 0.92170487, 0.87540019],
       [0.56237953, 0.41947643, 0.34387037],
       ...,
       [0.69158201, 0.69089866, 0.53110345],
       [0.26026031, 0.83061181, 0.29446877],
       [0.84194359, 0.89789462, 0.90961934]])

In [139]:
food_truck = (data <= p_truck)


In [144]:
#probability of food truck not showing up 3 days in a row
(food_truck.sum(axis=1)==0).mean()

0.0223

### 8. If 23 people are in the same room, what are the odds that two of them share a birthday? What if it's 20 people? 40?