Using Monte Carlo approach to do hypothesis testing. Motivated by Chapter 9 of Think Stats, by Allen B. Downey.

Below we use coin toss as a demonstration.

Suppose we toss a coin 250 times, and observe 140 heads and 110 tails. We want to know if the coin is a fair coin.

In [1]:
import numpy as np
import pandas as pd

In [13]:
class CoinTest(object):
    
    def __init__(self, data):
        self.heads, self.tails = data
        self.actual_statistics = self.computeStatistics(data)

        
    def computeStatistics(self, data):        
        heads, tails = data
        return abs(heads - tails)


    def pValue(self, n_iter=100):
        stats = np.array([self.computeStatistics(self.MonteCarlo()) for _ in range(n_iter)])
        count_extremes = np.sum(stats > self.actual_statistics)
        return float(count_extremes)/float(n_iter)
        
    
    def MonteCarlo(self):
        n = self.heads + self.tails
        hist = pd.Series(np.random.choice(['H', 'T'], size=n, replace=True)).value_counts().to_dict()
        heads = hist['H'] if 'H' in hist else 0
        tails = hist['T'] if 'T' in hist else 0
        return (heads, tails)
    

In [21]:
ct = CoinTest((140, 110))
pval = ct.pValue(1000)
print(pval)

p_value_threshold = 0.05
if pval < p_value_threshold:
    print('Null hypothesis should be rejected')
else:
    print('fail to reject null hypothesis')

0.049
Null hypothesis should be rejected
