You sample the weight of 30 cats out of the population of 1 million cats.
Your mean is 21.7 lbs and your standard deviation is 10.79. What is the 95% confidence interval of your sample mean being the population mean?

In [5]:
import pandas as pd

In [24]:
import numpy as np

In [25]:
import math 

In [7]:
np.random.randint(5, 40, size=30)

array([29, 14,  7, 28, 32, 21, 38,  5, 13,  9, 33,  6, 34, 33, 16, 26, 11,
       30, 23, 33,  9, 13, 14, 13,  9, 24, 26, 16, 38, 33])

In [16]:
cat_weight = pd.DataFrame(data = {'weight': np.random.randint(5, 40, size=30)}, index=None, columns=["weight"], dtype=int)

In [18]:
cat_weight

Unnamed: 0,weight
0,21
1,39
2,36
3,8
4,30
5,15
6,20
7,34
8,33
9,6


In [19]:
cat_weight.describe()

Unnamed: 0,weight
count,30.0
mean,21.7
std,10.72911
min,5.0
25%,13.0
50%,19.5
75%,32.75
max,39.0


In [54]:
# to calculate mean
cat_mean = cat_weight.mean()
cat_mean

weight    21.7
dtype: float64

In [21]:
# to calculate standard deviation
cat_std = cat_weight.std()
cat_std

weight    10.72911
dtype: float64

In [30]:
# manually calculating standard error
# to calculate standard error, its standard deviation of the sample over square root of the sample size

cat_std/math.sqrt(30)

weight    1.958858
dtype: float64

In [23]:
# or you can just use the built in pandas standard error method

cat_sem = cat_weight.sem()
cat_sem

weight    1.958858
dtype: float64

In [35]:
# what is the 95% confidence interval?
# Calculated through an existing library

import statsmodels.stats.api as sms
sms.DescrStatsW(cat_weight).tconfint_mean(alpha=0.05)

(array([ 17.69368463]), array([ 25.70631537]))

In [61]:
# What is the 95% confidence interval?
# Exactly 1.96 standard deviation gives you 95% in Z score
# Use T score table for sample sizes < = 30
# Typically you multiple Standard Error by a little higher than 2 for smaller sample sizes. 

def confidence_interval(data):
    cat_mean = cat_weight.mean()
    cat_sem = cat_std/math.sqrt(30)
    cat_bounds = cat_sem * 2.045
    return cat_mean, cat_mean-cat_bounds, cat_mean+cat_bounds

confidence_interval(cat_weight)


(weight    21.7
 dtype: float64, weight    17.694134
 dtype: float64, weight    25.705866
 dtype: float64)