In [2]:
import numpy as np 
import matplotlib.pyplot as plt
import random
import csv

In [3]:
# 1: procedure BLB

k = 10 #number of partitions
r = 500 #bootstrap iterations

# tau: estimators
tau = np.zeros([k,r])

def estimator_mean(data):
    return np.mean(data)

def estimator_var(data):
    return np.var(data)

In [4]:
def blb(estimator_func, data, n):
    theta_vec = []
    sigma_sq_vec = []
    # randomly partition X into k subsets
    random.shuffle(data)
    partitions = [data[i:i + k] for i in range(0, len(data), k)]

    # range for randint
    low=0
    high=len(partitions[0]) #don't need to do b-1 because upper bound is exclusive

    # for each partition
    for i in range(k):
    
        b = len(partitions[i]) 
    
        # for a partition, create r subsets
        for c in range(r):
            I = np.random.randint(low, high, size=n)
            replicate = data[I]
        
        
            tau[i,c] = estimator_func(replicate)  
           
        low = low + b
        high = high + b
        
    # mean and var of estimators for each prtition, not the data        
    theta_vec = np.mean(tau,axis=1)
    sigma_sq_vec = np.var(tau,axis=1) #var

    # should be private. for now, non private
    theta = np.mean(theta_vec)
    sigma_sq = np.mean(sigma_sq_vec) #var

    return (theta, sigma_sq)

In [8]:
header = ['n', 'T', 'true_parameter', 'estimator_mean', 'estimator_variance']

T = 100 #trials
list_of_n = [1000, 2500, 5000, 10000, 15000, 20000]

true_theta = 4


with open('blb_poisson.csv', 'w', encoding='UTF8', newline='') as file:
    writer = csv.writer(file)

    # write the header
    writer.writerow(header)

    for n in list_of_n:
        for t in range(T):
            data = np.random.poisson(true_theta, n)
            theta, sigma_sq = blb(estimator_mean, data, n) #returns mean and variance of estimator
            row = [n, t, true_theta, theta, sigma_sq]
        
            # write the data
            writer.writerow(row)