# MATH252 Probability and Statistics II
# Week 4 - Sampling Distributions and Probability Plots
Rochester Institute of Technology

Fall 2020

Will Clifford

In [1]:
import math as m
import scipy.stats as st
import matplotlib.pyplot as plt

## Sampling Distributions

### Example 1

The time it takes a randomly selected rat to find its way through a maze is a normally distributed random variable with $\mu = 1.5$ minutes and 
$\sigma = 0.35$ minutes. Suppose five rats are randomly selected and their times are denoted by $X_1$, $X_2$, $X_3$, $X_4$, $X_5$.

Determine the probability that the sample average time $\bar{X}$ is at most $1.8$ minutes?

In [2]:
def Z(xbar, mu, sig):
    return (xbar - mu) / sig

def Z_sd(xbar, mu, s, n):
    return (xbar - mu) / (s / m.sqrt(n))

In [3]:
mu    = 1.5
sig   = 0.35
n     = 5

x_bar = 1.8

round(st.norm.cdf(Z_sd(x_bar, mu, sig, n)), 6)


0.972358

### Example 2

When a batch of a certain chemical product is prepared, the amount of a particular impurity in the batch is a random variable with mean $4.0g$ and standard deviation $1.5g$. If $50$ batches are independently prepared, what is the probability that the sample average amount of impurity is between $3.5$ and $3.8$?

In [4]:
mu = 4.0
sd = 1.5
n  = 50

a = 3.5
b = 3.8

# Since n >= 30, we can assume normal distribution
round(st.norm.cdf(Z_sd(b, mu, sd, n)) - st.norm.cdf(Z_sd(a, mu, sd, n)), 6)

0.163678

### Example 3

Suppose only $40\%$ of all drivers in a certain state regularly wear a seatbelt. A random sample of $500$ drivers is selected. What is the probability that the total expected number of drivers who wear a seat belt is

1. Between $180$ and $230$

2. At most $175$

3. At least $190$

In [5]:
def assume_normal(n, pi):
    return (n*pi >= 10) and (n*(1 - pi) >= 10)

def samp_mu(n, pi):
    return n * pi

def samp_sd(n, pi):
    return m.sqrt(n * pi * (1 - pi))

In [6]:
pi = 0.4
n  = 500

assume_normal(n, pi)

True

In [7]:
# Since we can assume normality:
mu = samp_mu(n, pi)
sd = samp_sd(n, pi)

In [8]:
# 1. 
a = 180
b = 230

round(st.norm.cdf(Z(b, mu, sd)) - st.norm.cdf(Z(a, mu, sd)), 6)

0.96297

In [9]:
# 2.
x_bar = 175

round(st.norm.cdf(Z(x_bar, mu, sd)), 6)

0.011239

In [10]:
# 3.
x_bar = 190

round(1 - st.norm.cdf(Z(x_bar, mu, sd)), 6)

0.819345

## Probability Plots

### Example 1

- See the data set tablet_size.txt
- Would you say this data comes from a Normal distribution?
- Would you say this data comes from a Weibull distribution?

In [11]:
filename = "tablet_weight.csv"

weights = []
with open(filename, 'r') as myFile:
    for line in myFile:
        weights.append(float(line))

In [None]:
st.probplot(weights, plot=plt)
plt.show()

# yes normal, idfk weibull