# 2.3: Classical confidence intervals

In [1]:
from __future__ import print_function, division
%matplotlib inline

import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


## CI for continuous data, Pg 18

In [2]:
# import the t-distribution from scipy.stats
from scipy.stats import t

In [3]:
#R: y <- c(35,34,38,35,37)
y = np.array([35,34,38,35,37])
y

array([35, 34, 38, 35, 37])

In [4]:
#R: n <- length(y)
n = len(y)
n

5

In [5]:
#R: estimate <- mean(y)
estimate = np.mean(y)
estimate

35.799999999999997

Numpy uses a denominator of **N** in the standard deviation calculation by
default, instead of **N-1**. To use **N-1**, the unbiased estimator-- and to
agree with the R output, we have to give `np.std()` the argument `ddof=1`:

In [6]:
#R: se <- sd(y)/sqrt(n)
se = np.std(y, ddof=1)/np.sqrt(n)
se

0.73484692283495334

In [7]:
#R: int.50 <- estimate + qt(c(.25,.75),n-1)*se
int50 = estimate + t.ppf([0.25, 0.75], n-1)*se
int50

array([ 35.25570103,  36.34429897])

In [8]:
#R: int.95 <- estimate + qt(c(.025,.975),n-1)*se
int95 = estimate + t.ppf([0.025, 0.975], n-1)*se
int95

array([ 33.75973786,  37.84026214])

## CI for proportions, Pg 18

In [9]:
# import the normal from scipy.stats
from scipy.stats import norm

In [10]:
#R: y <- 700
y = 700
y

700

In [11]:
#R: n <- 1000
n = 1000
n

1000

In [12]:
#R: estimate <- y/n
estimate = y/n
estimate

0.7

In [13]:
#R: se <- sqrt (estimate*(1-estimate)/n)
se = np.sqrt(estimate*(1-estimate)/n)
se

0.014491376746189439

In [14]:
#R: int.95 <- estimate + qnorm(c(.025,.975))*se
int95 = estimate + norm.ppf([.025,0.975])*se
int95

array([ 0.67159742,  0.72840258])

## CI for discrete data, Pg 18

In [15]:
#R: y <- rep(c(0,1,2,3,4), c(600,300,50,30,20))
y = np.repeat([0,1,2,3,4], [600,300, 50, 30, 20])
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0,

In [16]:
#R: n <- length(y)
n = len(y)
n

1000

In [17]:
#R: estimate <- mean(y)
estimate = np.mean(y)
estimate

0.56999999999999995

See the note above about the difference different defaults for standard
deviation in Python and R.

In [18]:
#R: se <- sd(y)/sqrt(n)
se = np.std(y, ddof=1)/np.sqrt(n)
se

0.027674281668470926

In [19]:
#R: int.50 <- estimate + qt(c(.25,.75),n-1)*se
int50 = estimate + t.ppf([0.25, 0.75], n-1)*se
int50

array([ 0.55132718,  0.58867282])

In [20]:
#R: int.95 <- estimate + qt(c(.025,.975),n-1)*se
int95 = estimate + t.ppf([0.025, 0.975], n-1)*se
int95

array([ 0.51569361,  0.62430639])