In [1]:
from collections import namedtuple
import numpy as np
import scipy.stats

In [93]:
"""

Since we very often pass the tuple (n1, s1, n2, s2)
as an argument, we give that tuple a name 'data'.

n1: number of patients in treatment 1
s1: number of successes in treatment 1
n2: number of patients in treatment 2
s2: number of successes in treatment 2

"""

data = namedtuple('data', ['n1', 's1', 'n2', 's2'])

"""

We also define a namedtuple for the configuration.

parameter: tau or psi
method: mle, parametric bootstrap, or posterior simulation
B: number of replications (for parametric bootstrap or posterior simulation)
interval_type: normal or percentile
alpha: significance level (between 0 and 1)

"""

config = namedtuple('config', ['parameter', 'method', 'B', 'interval_type', 'alpha'])

#####################
# Auxiliary functions
#####################

def mle_estimator(p1, p2, parameter):
    if parameter == 'tau':
        return p2 - p1
    if parameter == 'psi':
        return log_odds(p1, p2)

def fisher_std_err(data, parameter):
    
    p1_mle = data.s1/data.n1
    p2_mle = data.s2/data.n2
    
    if parameter == 'tau':
        return np.sqrt(
            p1_mle*(1-p1_mle)/data.n1 + p2_mle*(1-p2_mle)/data.n2
        )
        
    if parameter == 'psi':
        return np.sqrt(
            1/( data.n1 * p1_mle * (1 - p1_mle) )
            + 1/( data.n2 * p2_mle * (1 - p2_mle) )
        )
        
def log_odds(p1, p2):
    return np.log(
        ( p1/(1-p1) ) / ( p2/(1-p2) )
    )
    
def confidence_interval(center, error, replications, alpha, interval_type):
    
    if interval_type == 'normal':
        z = scipy.stats.norm.isf(alpha/2)
        lower_bound = center - z*error
        upper_bound = center + z*error
    
    if interval_type == 'percentile':
        lower_bound = np.quantile(replications, q=alpha/2)
        upper_bound = np.quantile(replications, q=1-alpha/2)
    
    return (lower_bound, upper_bound)

###############
# Main function
###############

def result(data, config):
    
    # MLE
    p1_mle = data.s1/data.n1
    p2_mle = data.s2/data.n2
    estimate = mle_estimator(p1_mle, p2_mle, config.parameter)
    
    # Standard error from the Fisher information
    if config.method == 'mle':
        std_err = fisher_std_err(data, config.parameter)
        
    # Parametric bootstrap replications
    if config.method == 'parametric bootstrap':
        bernoulli1_replications = scipy.stats.bernoulli.rvs(p=p1_mle, size=[data.n1, config.B]).mean(axis=0)
        bernoulli2_replications = scipy.stats.bernoulli.rvs(p=p2_mle, size=[data.n2, config.B]).mean(axis=0)            
    # Simulation replications
    if config.method == 'posterior simulation':
        bernoulli1_replications = scipy.stats.beta.rvs(a=data.s1, b=data.n1-data.s1, size=config.B)
        bernoulli2_replications = scipy.stats.beta.rvs(a=data.s2, b=data.n2-data.s2, size=config.B)
    # MLE replications & standard error from replications
    if config.method in ('parametric bootstrap', 'posterior simulation'):
        mle_replications = mle_estimator(bernoulli1_replications, bernoulli2_replications, config.parameter)
        if config.method == 'posterior simulation':
            estimate = mle_replications.mean()
        std_err = mle_replications.std()
       
    # Confidence interval
    if config.method not in ('parametric bootstrap', 'posterior simulation'):
        """
        Having to set mle_replications to None when config.method == 'mle'
        is admittedly a bit clunky. The upside is that the confidence_interval
        function can take a host of arguments (namely estimate, std_err, and mle_replications)
        even when it only needs some of them!
        """
        mle_replications = None
    interval = confidence_interval(
        estimate,
        std_err,
        mle_replications,
        config.alpha,
        config.interval_type
    )
        
    # Pass the results
    return config, estimate, std_err, interval

##########################
# Printing out the results
##########################

def report(result):
    config, estimate, std_err, interval = result
    lower_bound, upper_bound = interval
    report_text = (
        "--------------------------------------------------\n"
        f"Method:        {config.method.upper()}\n"
        f"Parameter:     {config.parameter}\n"
    )
    if config.method in ('parametric bootstrap', 'posterior simulation'):
        report_text += f"Interval type: {config.interval_type}\n"
    report_text += (
        "--------------------------------------------------\n"
    )
    if config.method == 'posterior simulation':
        report_text += f"Posterior mean:      {estimate:.3f}\n"
    else:
        report_text += f"MLE:                 {estimate:.3f}\n"
    report_text += (
        f"Standard error:      {std_err:.4f}\n"
        f"Confidence interval: ({lower_bound:.4f}, {upper_bound:.4f})\n"
        "--------------------------------------------------"
    )
    print(report_text)

(a) Find the MLE of $\tau$.
Find the standard error and
90 percent confidence interval using the delta method.

In [94]:
current_data = data(50, 30, 50, 40)

"""
configuration:
   parameter
   method
   B
   interval_type -- must pass 'normal' when using the MLE method
   alpha
"""

report(result(current_data, config('tau', 'mle', None, 'normal', 0.1)))

--------------------------------------------------
Method:        MLE
Parameter:     tau
--------------------------------------------------
MLE:                 0.200
Standard error:      0.0894
Confidence interval: (0.0529, 0.3471)
--------------------------------------------------


(b) Find the standard error and 90 percent confidence interval using the parametric bootstrap.

In [105]:
report(result(current_data, config('tau', 'parametric bootstrap', int(1e5), 'normal', 0.1)))
report(result(current_data, config('tau', 'parametric bootstrap', int(1e5), 'percentile', 0.1)))

--------------------------------------------------
Method:        PARAMETRIC BOOTSTRAP
Parameter:     tau
Interval type: normal
--------------------------------------------------
MLE:                 0.200
Standard error:      0.0897
Confidence interval: (0.0524, 0.3476)
--------------------------------------------------
--------------------------------------------------
Method:        PARAMETRIC BOOTSTRAP
Parameter:     tau
Interval type: percentile
--------------------------------------------------
MLE:                 0.200
Standard error:      0.0893
Confidence interval: (0.0600, 0.3400)
--------------------------------------------------


(c) Use the prior $f(p_1, p_2) = 1$.
Use simulation to find the posterior mean and posterior 90 percent interval for $\tau$.

In [114]:
report(result(current_data, config('tau', 'posterior simulation', int(1e6), 'normal', 0.1)))
report(result(current_data, config('tau', 'posterior simulation', int(1e6), 'percentile', 0.1)))

--------------------------------------------------
Method:        POSTERIOR SIMULATION
Parameter:     tau
Interval type: normal
--------------------------------------------------
Posterior mean:      0.200
Standard error:      0.0886
Confidence interval: (0.0543, 0.3458)
--------------------------------------------------
--------------------------------------------------
Method:        POSTERIOR SIMULATION
Parameter:     tau
Interval type: percentile
--------------------------------------------------
Posterior mean:      0.200
Standard error:      0.0886
Confidence interval: (0.0531, 0.3444)
--------------------------------------------------


(d) Let
$$ \psi = \log \left( \frac{p_1}{1-p_1} \div \frac{p_2}{1-p_2} \right)$$
be the log-odds ratio.
Note that $\psi = 0$ if $p_1 = p_2$.
Find the MLE of $\psi$.
Use the delta method to find a 90 percent confidence interval for $\psi$.

In [115]:
report(result(current_data, config('psi', 'mle', None, 'normal', 0.1)))

--------------------------------------------------
Method:        MLE
Parameter:     psi
--------------------------------------------------
MLE:                 -0.981
Standard error:      0.4564
Confidence interval: (-1.7316, -0.2301)
--------------------------------------------------


(e) Use simulation to find the posterior mean and posterior 90 percent interval for $\psi$.

In [116]:
report(result(current_data, config('psi', 'posterior simulation', int(1e6), 'normal', 0.1)))
report(result(current_data, config('psi', 'posterior simulation', int(1e6), 'percentile', 0.1)))

--------------------------------------------------
Method:        POSTERIOR SIMULATION
Parameter:     psi
Interval type: normal
--------------------------------------------------
Posterior mean:      -1.011
Standard error:      0.4645
Confidence interval: (-1.7750, -0.2468)
--------------------------------------------------
--------------------------------------------------
Method:        POSTERIOR SIMULATION
Parameter:     psi
Interval type: percentile
--------------------------------------------------
Posterior mean:      -1.010
Standard error:      0.4644
Confidence interval: (-1.7842, -0.2584)
--------------------------------------------------
