In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

## The permutation test is a non-parametric method for testing whether two distributions are the same. This test is “exact” meaning that it is not based on large sample theory approximations.

In [2]:
class distribution_test:
    def __init__(self, x1, x2):
        self.x1 = x1
        self.x2 = x2
        
    def __getlen__(self,x):
        return len(x)
    
    def permutation_test(self,B):
        x, y = np.copy(self.x1), np.copy(self.x2)
        n = self.__getlen__(x)
        m = self.__getlen__(y)
        t_obs = abs(np.mean(self.x1) - np.mean(self.x2)) #subjective - could choose any test statistic
        
        T = []
        for i in range(B):
            w_data = np.append(x,y)
            np.random.shuffle(w_data)
            xc, yc = w_data[:n], w_data[n:n+m]
            t_new = abs(np.mean(xc) - np.mean(yc))
            T.append(t_new)
        
        count = 0
        for i in T:
            if i > t_obs:
                count += 1
        p_value = count/B
        
        if p_value < 0.05:
            print("The distributions are different (enough evidence to reject the Null Hypothesis) with the p_value: ",p_value)
        elif p_value >= 0.05:
            print("The distributions are not different (Not enough evidence to reject the Null Hypothesis), and the p_value is: ",p_value)
        return p_value

In [21]:
x = np.random.binomial(20,0.5,1000)
y = np.random.binomial(20,0.5,1000)

per = distribution_test(x,y)
p = per.permutation_test(1000)


The distributions are not different (Not enough evidence to reject the Null Hypothesis), and the p_value is:  0.755
