In [36]:

import os,sys,time,math
import pandas as pd
import statsmodels.stats.api as smstats
import statsmodels.api as sm
from sklearn import linear_model
from sklearn import preprocessing

import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

'''init notebook'''
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
sns.set()
%matplotlib inline

'''
!git clone https://github.com/aizatrosli/MANB1123.git
!ls
datasetarr = []
for r, d, f in os.walk('./MANB1123'):
    for file in f:
        if '.xlsx' in file:
            datasetarr.append(os.path.join(r, file))
print('\n'.join(datasetarr))
'''

"\n!git clone https://github.com/aizatrosli/MANB1123.git\n!ls\ndatasetarr = []\nfor r, d, f in os.walk('./MANB1123'):\n    for file in f:\n        if '.xlsx' in file:\n            datasetarr.append(os.path.join(r, file))\nprint('\n'.join(datasetarr))\n"

In [15]:
cl = 95
1-((1-(cl/100))/2)
from scipy import stats
import math

In [33]:
def pointestimate(x,n):
	'''
	point estimate
		x/n	OR	X/N
	:param x: number of items interest
	:param n: size of population
	:return: population proportion OR sample proportion
	'''
	pe = x/n
	print("{0}Margin of Error (mean population){0}".format("="*5))
	print("="*50)
	print("x:\t{0}\nn:\t{1}\npe:\t{2}".format(x, n, pe))
	print("="*50)
	return pe

def errormeanpop(cl, std, n, tail=1):
	'''
	This margin of error mean population
		z∗(σ/√n)
	*reminder if 95% confidence level:
		- two-tail : q=0.975
		- one-tail : q=0.95
	:param cl: confidence level. eg 95%
	:param std: standard deviation. use std()
	:param n: sample size
	:param kwargs: tail = specify tail number. default: 1
	:return: margin of error mean population
	'''
	q = 1-((1-(cl/100))/2) if tail != 1 else cl/100
	zcritical = stats.norm.ppf(q=q)
	merror = (zcritical*std)/math.sqrt(n)
	print("{0}Margin of Error (mean population){0}".format("="*5))
	print("="*50)
	print("zcritical:\t{0}\nstd:\t{1}\nn:\t{2}\n\nmargin of error:\t{3}".format(zcritical,std,n,merror))
	print("="*50)
	return merror


def errorpopproportion(cl, p, n, tail=1):
	'''
	Margin of error mean population
			_______
		z∗ /p(1--p)
		 / --------
	   √      n

	*reminder if 95% confidence level:
		- two-tail : q=0.975
		- one-tail : q=0.95
	:param cl: confidence level. eg 95%
	:param p: population propotion
	:param n: sample
	:param tail: specify tail number. default: 1
	:return: margin of error population proportion
	'''
	print("{0}Margin of Error (population proportion){0}".format("="*5))
	print("="*50)
	q = 1-((1-(cl/100))/2) if tail != 1 else cl/100
	zcritical = stats.norm.ppf(q=q)
	merror = zcritical * math.sqrt((p * (1 - p)) / n)
	print("zcritical:\t{0}\np:\t{1}\nn:\t{2}\n\nmargin of error:\t{3}".format(zcritical, p, n, merror))
	print("="*50)
	return merror


def errorsinglepopmeans(cl, std, n, tail=1):
	'''
	Margin of error mean population
		t∗(σ/√n)
	*reminder if 95% confidence level:
		- two-tail : q=0.975
		- one-tail : q=0.95
	:param cl: confidence level. eg 95%
	:param std: sample std,s = df.std(ddof=1)
	:param n: sample
	:param tail: specify tail number. default: 1
	:return:
	'''
	print("{0}Margin of Error (single population means T-test){0}".format("="*5))
	print("="*50)
	q = 1-((1-(cl/100))/2) if tail != 1 else cl/100
	tcritical = stats.t.ppf(q=q, df=n-1)
	merror = (tcritical * std) / math.sqrt(n)
	print("tcritical:\t{0}\nstd:\t{1}\nn:\t{2}\n\nmargin of error:\t{3}".format(tcritical,std,n,merror))
	print("="*50)
	return merror

def cinterval(pe, merror):
	'''
	confidence interval = point estimate * margin of error
		1) population proportion
			cinterval(pe, errorpoppropotion())
		2) single population means
			cinterval(pe, errorsinglepopmeans())
	:param pe: sample mean / sample proportion / population proportion / population mean
	:param merror: margin of error mean population or population proportion.
					please run errormeanpop or errorpopproption
	:return: confindence interval
	'''
	ci = [pe-merror, pe+merror]
	print("{0}Confidence Interval{0}".format("="*5))
	print("="*50)
	print("point estimation:\t{0}\nmargin of error:\t{1}\n\nconfidence interval:\t[{0}+{1} ,{0}-{1}]\n\t\t\t[{2} , {3}]".format(pe, merror, ci[0],ci[1]))
	print("="*50)
	return ci


In [34]:
cinterval(40.78, errormeanpop(99,1.26,100))

=====Margin of Error (mean population)=====
zcritical:	2.3263478740408408
std:	1.26
n:	100

margin of error:	0.29311983212914594
=====Confidence Interval=====
point estimation:	40.78
margin of error:	0.29311983212914594

confidence interval:	[40.78+0.29311983212914594 ,40.78-0.29311983212914594]
			[40.48688016787086 , 41.073119832129144]


[40.48688016787086, 41.073119832129144]

In [35]:
cinterval(272/800, errorpoppropotion(95, 272/800,800, tail=2))

=====Margin of Error (population proportion)=====
zcritical:	1.959963984540054
p:	0.34
n:	800

margin of error:	0.03282573988815335
=====Confidence Interval=====
point estimation:	0.34
margin of error:	0.03282573988815335

confidence interval:	[0.34+0.03282573988815335 ,0.34-0.03282573988815335]
			[0.3071742601118467 , 0.3728257398881534]


[0.3071742601118467, 0.3728257398881534]

In [38]:
help(smstats.ztest)

Help on function ztest in module statsmodels.stats.weightstats:

ztest(x1, x2=None, value=0, alternative='two-sided', usevar='pooled', ddof=1.0)
    test for mean based on normal distribution, one or two samples
    
    In the case of two samples, the samples are assumed to be independent.
    
    Parameters
    ----------
    x1 : array_like, 1-D or 2-D
        first of the two independent samples
    x2 : array_like, 1-D or 2-D
        second of the two independent samples
    value : float
        In the one sample case, value is the mean of x1 under the Null
        hypothesis.
        In the two sample case, value is the difference between mean of x1 and
        mean of x2 under the Null hypothesis. The test statistic is
        `x1_mean - x2_mean - value`.
    alternative : string
        The alternative hypothesis, H1, has to be one of the following
    
           'two-sided': H1: difference in means not equal to value (default)
           'larger' :   H1: difference in means