In [1]:
####################
## Load Libraries ##
####################
import pandas as pd
import numpy as np
import math

In [2]:
##################################################################
## function to simulate data and recover Hoeffding's Inequality ##
##################################################################
def Hoeffdinger_Bounds_Analysis(distribution, m=100, n=100, delta=0.05, seed=1081):
    
    np.random.seed(seed)
    
    df = pd.DataFrame()
    df['id'] = list(range(0, m))
    df['X'] = None
    df['flag'] = 0
    
    if(distribution=='Rademacher'):
        b = 1
        a = -1
        mean = 0
        k = np.sqrt((np.log(2) - np.log(delta)) * ((b-a)**2) / (2*n))
        for i in range(0, m):
            df.loc[i, 'X'] = np.random.choice([-1, 1], size=n, replace=True).mean()
            if(abs(df.loc[i, 'X'] - mean)>=k):
                df.loc[i, 'flag'] = 1
        
    if(distribution=='Beta'):
        b = 1
        a = 0
        alpha = 1
        beta = 3
        mean = alpha / (alpha + beta)
        k = np.sqrt((np.log(2) - np.log(delta)) * ((b-a)**2) / (2*n))
        for i in range(0, m):
            df.loc[i, 'X'] = np.random.beta(alpha, beta, size=n).mean()
            if(abs(df.loc[i, 'X'] - mean)>=k):
                df.loc[i, 'flag'] = 1
    
    if(distribution=='Binomial'):
        b = 1
        a = 0
        p = 0.25
        mean = b*p
        k = np.sqrt((np.log(2) - np.log(delta)) * ((b-a)**2) / (2*n))
        for i in range(0, m):
            df.loc[i, 'X'] = np.random.binomial(b, p, size=n).mean()
            if(abs(df.loc[i, 'X'] - mean)>=k):
                df.loc[i, 'flag'] = 1
    
    if(distribution=='Normal'):
        mean = 0
        sigma = 1
        k = np.sqrt((np.log(2) - np.log(delta)) * (2*(sigma**2)) / n)
        for i in range(0, m):
            df.loc[i, 'X'] = np.random.normal(mean, sigma, size=n).mean()
            if(abs(df.loc[i, 'X'] - mean)>=k):
                df.loc[i, 'flag'] = 1

    if(distribution=='Uniform'):
        b = 1
        a = 0
        mean = 0.5
        k = np.sqrt(((np.log(2) - np.log(delta)) * ((b-a)**2)) / (2*n))
        for i in range(0, m):
            df.loc[i, 'X'] = np.random.uniform(0, 1, size=n).mean()
            if(abs(df.loc[i, 'X'] - mean)>=k):
                df.loc[i, 'flag'] = 1
    
    print('k: ' + str(k))
    print('percent outside bounds: ' + str(df['flag'].mean()))

In [4]:
########################
## simulation results ##
########################
print('Rademacher RVs:')
Hoeffdinger_Bounds_Analysis(distribution='Rademacher', m=100000, n=1000, delta=0.2, seed=1081)
print('\n')
print('Beta RVs:')
Hoeffdinger_Bounds_Analysis(distribution='Beta', m=100000, n=1000, delta=0.2, seed=1081)
print('\n')
print('Binomial RVs:')
Hoeffdinger_Bounds_Analysis(distribution='Binomial', m=100000, n=1000, delta=0.2, seed=1081)
print('\n')
print('Uniform RVs:')
Hoeffdinger_Bounds_Analysis(distribution='Uniform', m=100000, n=1000, delta=0.2, seed=1081)
print('\n')
print('Normal (Gaussian) RVs:')
Hoeffdinger_Bounds_Analysis(distribution='Normal', m=100000, n=1000, delta=0.2, seed=1081)

Rademacher RVs:
k: 0.06786140424415112
percent outside bounds: 0.03413


Beta RVs:
k: 0.03393070212207556
percent outside bounds: 0.0


Binomial RVs:
k: 0.03393070212207556
percent outside bounds: 0.01427


Uniform RVs:
k: 0.03393070212207556
percent outside bounds: 0.0002


Normal (Gaussian) RVs:
k: 0.06786140424415112
percent outside bounds: 0.0314
