### Indepedent data with randomly assigned block labels

In [1]:
import pandas as pd
import numpy as np

In [2]:
s_small = 1
s_large = 5
s_total = np.sqrt(s_small**2 + s_large**2)

In [4]:
numbers = np.random.normal(size=5000, scale=s_total)
labels = np.random.uniform(0,500,size=5000).astype(np.int)

In [5]:
df = pd.DataFrame({'vals':numbers, 'cluster':labels})

In [6]:
def weight_sn(df):
    ncluster = df.groupby('cluster').size()

    sampled = ncluster.sample(n=ncluster.index.size, replace=True)
    scount = sampled.groupby(level=0).size()
    scount.name = 'cw'

    df = df.join(scount, on='cluster')
    df.cw.fillna(0, inplace=True)

    return df.cw

In [7]:
standard = []
for _ in range(10000):
    standard += [np.mean(df.vals.sample(5000, replace=True))]

In [8]:
block = []
for _ in range(10000):
    w = weight_sn(df)
    block += [np.sum(df.vals*w)/np.sum(w)]

In [9]:
print('Standard:', np.mean(standard), np.std(standard))
print('Block:', np.mean(block), np.std(block))
print('Calculated:', np.mean(df.vals), np.std(df.vals)/np.sqrt(5000))

Standard: -0.027224918606523426 0.0731652042344793
Block: -0.026851000393951108 0.07035613453086466
Calculated: -0.027608366061573564 0.07233792541757868


### Block sampled data

In [10]:
numbers = np.random.normal(size=500, scale=s_large)
numbers = np.random.normal(numbers, scale=s_small, size=(10,500)).transpose()

In [17]:
df2 = pd.DataFrame({'vals': numbers.flatten(), 'cluster': np.repeat(np.arange(0,500,1),10)})

In [18]:
standard = []
for _ in range(10000):
    standard += [np.mean(df2.vals.sample(5000, replace=True))]

In [19]:
block = []
for _ in range(10000):
    w = weight_sn(df2)
    block += [np.sum(df2.vals*w)/np.sum(w)]

In [20]:
print('Standard:', np.mean(standard), np.std(standard))
print('Block:', np.mean(block), np.std(block))
print('Calculated:', np.mean(df2.vals), np.std(df2.vals)/np.sqrt(5000))

Standard: 0.09165555019627283 0.06867501296904654
Block: 0.08868715265947687 0.2143991695321291
Calculated: 0.09133251721890298 0.06947236233136976
