In [136]:
import numpy as np
import scipy.stats as stats
import pandas as pd
import math
import seaborn as sns

In [209]:
class ChiSquareTest: 
    #Chi-squared goodness of fit test. Used to determine whether a frequency of a variable follows an expected frequency
    def __init__(self,observedCounts, expectedProps):
        
        self.expectedProps = np.array(expectedProps)
        self.observedCounts = np.array(observedCounts)
        
        #expected counts = expected proportion for each category * total # observations in sample
        self.expectedCounts = np.array([e*sum(self.observedCounts) for e in self.expectedProps])
        
    def runTest(self):
        
        #chi2 statistic = sum((observed-expected)**2/expected)
        self.chi2 = sum((self.observedCounts-self.expectedCounts)**2/self.expectedCounts)
        
        #use chi square distribution cdf function to determine probability that chi2 >= calculated chi2
        #degrees of freedom = # of categories - 1
        self.pValue = 1-stats.chi2.cdf(self.chi2, df=len(self.observedCounts)-1)
        
        return (self.chi2, self.pValue)

In [210]:
if __name__ == '__main__':
    print('running tests')

    observedCounts = [25, 35, 50, 90]
    expectedProps = [0.15,0.2,0.2,0.45]

    test = ChiSquareTest(observedCounts, expectedProps )
    testResult = test.runTest()
    print(testResult)
    
    assert round(stats.chisquare(observedCounts, test.expectedCounts)[1],5) == round(testResult[1],5)
    
    expectedProps = [0.6,0.2,0.1,0.1]
    test = ChiSquareTest(observedCounts, expectedProps )
    testResult = test.runTest()
    print(testResult)
    
    assert round(stats.chisquare(observedCounts, test.expectedCounts)[1],5) == round(testResult[1],5)
    
    print('tests succeeded')

running tests
(3.9583333333333335, 0.265998669940964)
(365.8333333333333, 0.0)
tests succeeded
