In [49]:
import numpy as np
from scipy.stats import norm
from scipy.stats import t 
from scipy.stats import chi2
from scipy.stats import f

In [50]:
can_data = np.array([101.8, 101.5, 102.6, 101, 101.8, 96.8, 102.4, 100, 98.8,
                    98.1, 98.8, 98, 99.4, 95.5, 100.1, 100.5, 97.4, 100.2, 101.4,
                    98.7, 101.4, 99.4, 101.7, 99, 99.7, 98.8, 99.5, 100, 99.7, 100.9,
                    99.7, 99, 98.8, 99.7, 100.9, 99.9, 97.5, 101.5, 98.2, 99.2, 98.6,
                    101.4, 102.1, 102.9, 100.8, 99.4, 103.7, 100.3, 100.2, 101.1, 101.8,
                    100, 101.2, 100.5, 101.2, 101.6, 99.9, 100.5, 100.4, 98.1, 100.1,
                    101.6, 99.3, 96.1, 100, 99.7, 99.7, 99.4, 101.5, 100.9, 101.2, 
                    99.9, 99.1, 100.7, 100.8, 100.8, 101.4, 100.3, 98.4, 97.2])

In [51]:
n = len(can_data)
can_mean = np.mean(can_data)
can_std = np.std(can_data, ddof=1)

ll = can_mean - norm(loc = 0, scale =1).ppf(0.975)*can_std/np.sqrt(n)
ul = can_mean + norm(loc = 0, scale =1).ppf(0.975)*can_std/np.sqrt(n)

print("pont estimator of the mean is %.2f " % can_mean)
print("95% confidence interval of the mean is {} and {}".format(np.round(ll, 4), np.round(ul, 4)))


pont estimator of the mean is 100.04 
95% confidence interval of the mean is 99.7053 and 100.3722


### about the mean (a small sized sample)

In [52]:
small_can = np.random.choice(can_data, 25, replace=True)
print("___________________________________________________________")
print(small_can)
print("___________________________________________________________")

___________________________________________________________
[ 99.  100.   99.7 100.  102.1  98.  101.4 100.5  95.5 101.4  97.2 100.7
 100.9 100.   98.8 100.3 100.1 100.1 100.  101.4 100.  101.5  99.   99.7
 101.5]
___________________________________________________________


In [53]:
n = len(small_can)
df = n - 1
can_mean = np.mean(small_can)
can_std = np.std(small_can, ddof=1)

ll = can_mean - t(df).ppf(0.975)*can_std/np.sqrt(n)
ul = can_mean + t(df).ppf(0.975)*can_std/np.sqrt(n)
print("pont estimator of the mean is %.2f " % can_mean)
print("95% confidence interval of the mean is {} and {}".format(np.round(ll, 4), np.round(ul, 4)))

pont estimator of the mean is 99.95 
95% confidence interval of the mean is 99.3438 and 100.5602


### about the variance

### Find the 95% confidence interval of the variance with ‘can data’.

In [54]:
n = len(can_data)
df = n - 1
s2 = np.var(can_data, ddof = 1)
chi_dist = chi2(df)

ll = (n-1)*s2/chi_dist.ppf(0.975)
ul = (n-1)*s2/chi_dist.ppf(0.025)
print("The point estimator of the variance is", s2)
print("The 95% confidence interval of the variance is {} and {}".format(np.round(ll, 4), np.round(ul, 4)))

The point estimator of the variance is 2.3160743670886093
The 95% confidence interval of the variance is 1.7348 and 3.2494


### about the difference of two means

In [55]:
A_group =np.random.choice(can_data, 20, replace=True)
B_group =np.random.choice(can_data, 18, replace=True)

print("__________________ A group _________________________")
print(A_group)
print("__________________ B group _________________________")
print(B_group)

__________________ A group _________________________
[101.8  98.   99.4 100.7 103.7  97.4  98.1 101.5 100.8  99.5  99.7 102.4
  98.8  98.   98.4 100.9 101.1  99.7  99.9 101.8]
__________________ B group _________________________
[ 98.1  99.5  99.9 102.9 101.7 101.2 100.5  98.   97.2  98.8 102.4 101.4
  99.3  98.2  99.7 100.8 101.4 103.7]


### About the differece of two means

### Calculate the pooled estimator of the variance

In [56]:
n1 = len(A_group)
n2 = len(B_group)

pooled_s2 = ((n1-1)*np.var(A_group, ddof=1) + (n2-1)*np.var(B_group, ddof=1))/(n1+n2-2)
print("Pooled estimator of the variance is ", np.round(pooled_s2,4))

Pooled estimator of the variance is  3.0865


### About the difference of two means

### Calculate the 95% confidence interval of the difference of the means.

In [57]:
a_mean = np.mean(A_group)
b_mean = np.mean(B_group)
df = n1+n2-2

ll = (a_mean - b_mean) - t(df).ppf(0.975)*np.sqrt(pooled_s2)*np.sqrt(1/n1+1/n2)
ul = (a_mean - b_mean) + t(df).ppf(0.975)*np.sqrt(pooled_s2)*np.sqrt(1/n1+1/n2)

print("The point estimator of the difference of the means is ", np.round(a_mean - b_mean,4))
print("95% confidence interval of the mean variance is {} and {}".format(np.round(ll, 4), np.round(ul, 4)))

The point estimator of the difference of the means is  -0.1811
95% confidence interval of the mean variance is -1.3387 and 0.9765


#  About the ratio of two variances

# -> Compute the 95% confidence interval of the ratio of A over B group in the previous example.

In [58]:
n1 = len(A_group)
n2 = len(B_group)
f1_dist = f(n1-1, n2-1)
f2_dist = f(n2-1, n1-1)

A_s2 = np.var(A_group, ddof=1)
B_s2 = np.var(B_group, ddof=1)

ll = (A_s2/B_s2)*f2_dist.ppf(0.975)
ul = (A_s2/B_s2)*f1_dist.ppf(0.975)

print("95% cofidence interval of ratio of variance A over B is {} and {}".format(np.round(ll,4), np.round(ul,4)))

95% cofidence interval of ratio of variance A over B is 2.2008 and 2.2575
