In [157]:
import numpy as np
import seaborn as sns
import scipy.stats as stats
import statsmodels.api as sm

In [158]:
class ConfidenceInterval:
    
    def __init__(self, sampleData, alpha = 0.05, populationProportions = False):
        
        self.sampleData = sampleData
        self.alpha = alpha
        self.populationProportions = populationProportions
        self.n = len(self.sampleData)
            
        if self.populationProportions:
            self.pHat = sum(self.sampleData)/self.n
            self.SE = np.sqrt(self.pHat*(1-self.pHat)/self.n)
            self.xBar = None
                        
        else:
            self.xBar = np.mean(self.sampleData)
            self.SE = np.std(self.sampleData)/np.sqrt(self.n)
            self.pHat = None
        
    def getInterval(self):
        
        if self.populationProportions:
            #for two-tailed CI, find z value where a value at or more extreme occurs < (alpha)/2 % of the time
            stats.zValue = np.abs(stats.norm.ppf(self.alpha/2))
            #CI = pHat +- Zscore * Avg. Standard Error
            self.interval = (self.pHat - stats.zValue*self.SE, self.pHat + stats.zValue*self.SE)
        
        else:
            #for two-tailed CI, find t value where a value at or more extreme occurs < (alpha)/2 % of the time
            #uses inverse of CDF of t-distribution f(probability) = tScore
            stats.tValue = np.abs(stats.t.ppf((self.alpha)/2, df = self.n -1))
            #CI = Xbar +- Tscore * Avg. Standard Error
            self.interval = (self.xBar - stats.tValue*self.SE, self.xBar + stats.tValue*self.SE)
        
        print({'sampleEstimate': self.pHat if self.pHat else self.xBar,
               'confidenceInterval': self.interval
              })
        return self.interval
        

In [159]:
if __name__ == '__main__':
    print('running tests')
    ALPHA = 0.05
    POPULATION_SIZE = 1000000
    
    #mean
    populationData = np.random.normal(loc = 50 , scale = 25, size = POPULATION_SIZE)
    print('true population mean: {}'.format(np.mean(populationData)))
    sampleData = np.random.choice(populationData, 100)
    
    assert stats.t.interval(alpha = 1-ALPHA,
                 df=len(sampleData)-1,
                 loc=np.mean(sampleData),
                 scale=stats.sem(sampleData, ddof=0)
                )  == ConfidenceInterval(sampleData, alpha=ALPHA).getInterval()
    
    
    #population proportions
    populationDataProp = np.concatenate((np.zeros(int(POPULATION_SIZE*0.3)), np.ones(int(POPULATION_SIZE*0.7))))
    print('true population proportion: {}'.format(sum(populationDataProp)/len(populationDataProp)))
    sampleDataProp = np.random.choice(populationDataProp , 100)

    assert ConfidenceInterval(sampleDataProp,
                              alpha = ALPHA,
                              populationProportions = True
                      ).getInterval() == sm.stats.proportion_confint(
        count = sum(sampleDataProp),
        nobs = len(sampleDataProp),
        alpha = ALPHA,
        method='normal'
                              )
    
    print('tests succeeded')

running tests
true population mean: 50.01237847429673
{'sampleEstimate': 51.53452853698198, 'confidenceInterval': (46.448383687746045, 56.620673386217916)}
true population proportion: 0.7
{'sampleEstimate': 0.64, 'confidenceInterval': (0.5459217287420775, 0.7340782712579226)}
tests succeeded
