# Statistical Hypothesis Testing with *statsHypo* Package

Author: Ziad Ghauch

In [1]:
import numpy as np

import sys
sys.path.append('../statsHypo/')
from sample import *
from correlation import *
from factorial_designs import *
from other import *
from single_sample import *
from time_series import *
from two_dependent_samples import *
from two_independent_samples import *
from twoormore_dependent_samples import *
from twoormore_indenpendent_samples import *

In [2]:
np.random.seed(999)

X = np.random.normal(20.,1.,100)
Y = np.random.normal(10.,1.,100)
Z = np.random.normal(50.,1.,100)

alpha_val = 0.05

### Single-Sample Z-test

In [3]:
t1 = InferenceSingleSample(X, alpha_val, [20.,1.])
print (t1.z_test.__doc__)
stat=t1.z_test()


 Test 1: Single-Sample Z-test 
        
        Parametric test to assess the likelihood that a sample is derived from a population 
        with known parameters (i.e. mean, sigma); Population assumed Gaussian. Perform Z-test
        only when population paramerers (mean and sigma) are known. The test statistic is 
        based on the Gaussian distribution.
        
        H0 (null hypothesis): 
        	-> mean of the sample equals the population mean 
        H1 (alternate hypothesis): 
        	-> mean of the sample does not equal (i.e. nondirectional, two-tailed
            	test) the population mean, or 
        	-> mean of the sample is less than the population mean (directional, 
            	one-tailed left test), or 
        	-> mean of the sample is greater than the population mean (directional,
            	one-tailed right test)
        
        Parameters
        ----------
        P : sample of size (N,1) from the population (mu, sigma)
        
        Return
        

### Single-Sample T-test

In [4]:
t11 = InferenceSingleSample(X, alpha_val, [20.])
print (t11.t_test.__doc__)
stat=t11.t_test()

 Test 2: The Single-Sample T-test  

        Parametric test to assess the likelihood that a sample is derived from a population 
        with known parameters mean but unknown sigma; Population assumed Gaussian. Perform T-test
        only when population std dev paramerer (i.e. sigma) is unknown or when sample size N is
        small. The test statistic is based on the t-distribution. 
        
        H0 (null hypothesis): 
        	-> mean of the sample equals the population mean 
        H1 (alternate hypothesis): 
        	-> mean of the sample does not equal (i.e. nondirectional, two-tailed
            	test) the population mean, or 
        	-> mean of the sample is less than the population mean (directional, 
            	one-tailed left test), or 
        	-> mean of the sample is greater than the population mean (directional,
            	one-tailed right test)
        
        Parameters
        ----------
        P : sample of size (N,1) from the population (mean mu)
     

### Single-Sample Chi-Square Test

In [5]:
t = InferenceSingleSample(X, alpha_val, [1.])
print (t.chi_square_test_population_variance.__doc__)
print (t.chi_square_test_population_variance())

 Test 3: Single-Sample Chi-Square Test for a Population Variance 

        Statistical inference single-sample test to assess the likelihood that 
        a sample with variance s^2 is derived from a population 
        with variance sigma^2. The test statistic is based on the chi-square 
        distribution, with the assumptions that the population from which the 
        sample is drawn is normal. 
        
        H0 (null hypothesis): 
        	-> variance of the sample equals the population variance 
        H1 (alternate hypothesis): 
        	-> variance of the sample does not equal (i.e. nondirectional, two-tailed
            	test) the population variance, or 
        	-> variance of the sample is less than the population variance (directional, 
            	one-tailed left test), or 
        	-> variance of the sample is greater than the population variance (directional,
            	one-tailed right test)
        
        Parameters
        ----------
        P : sample of 

### Single-Sample Test for Evaluating Population Skewness

In [6]:
t = InferenceSingleSample(X, alpha_val)
print (t.test_population_skewness())

~Single-Sample Test for Evaluating Population Skewness~
	|------------------------------|
	  DESCRIPTIVE STATISTICS
	|------------------------------|
	  Size ..........: 100
	  Min ...........: 17.909
	  25%  ..........: 19.545
	  50%  ..........: 20.14
	  75%  ..........: 20.918
	  Max ...........: 22.491
	  Mean ..........: 20.19
	  Std Dev .......: 0.976
	  Skewness ......: -0.012
	  Kurtosis ......: -0.466
	|------------------------------|
stat=-0.052
None


## Single-Sample Test for Evaluating Population Kurtosis

In [7]:
t = InferenceSingleSample(X, alpha_val)
print (t.test_population_kurtosis())

~Single-Sample Test for Evaluating Population Kurtosis~
	|------------------------------|
	  DESCRIPTIVE STATISTICS
	|------------------------------|
	  Size ..........: 100
	  Min ...........: 17.909
	  25%  ..........: 19.545
	  50%  ..........: 20.14
	  75%  ..........: 20.918
	  Max ...........: 22.491
	  Mean ..........: 20.19
	  Std Dev .......: 0.976
	  Skewness ......: -0.012
	  Kurtosis ......: -0.466
	|------------------------------|
stat=0.994
stat=-0.965, p=0.335
None


### D’Agostino’s K2 Test

In [8]:
t = InferenceSingleSample(X, alpha_val)
t.get_sample_desciptive_statistics(X)
print (t.dagostino_pearson_test_normality.__doc__)
stat,p=t.dagostino_pearson_test_normality()


	|------------------------------|
	  DESCRIPTIVE STATISTICS
	|------------------------------|
	  Size ..........: 100
	  Min ...........: 17.909
	  25%  ..........: 19.545
	  50%  ..........: 20.14
	  75%  ..........: 20.918
	  Max ...........: 22.491
	  Mean ..........: 20.19
	  Std Dev .......: 0.976
	  Skewness ......: -0.012
	  Kurtosis ......: -0.466
	|------------------------------|
 Test 5a: D’Agostino–Pearson Test of Normality

        Test for assessing goodness-of-fit for a normal distribution
        
        Null hypothesis H0: The sample is derived from a normally distributed population.
        Alternative hypothesis H1: The sample is not derived from a normally distributed population.
        
~D’Agostino–Pearson Test of Normality~
	|------------------------------|
	  DESCRIPTIVE STATISTICS
	|------------------------------|
	  Size ..........: 100
	  Min ...........: 17.909
	  25%  ..........: 19.545
	  50%  ..........: 20.14
	  75%  ..........: 20.918
	  Max ...........


### Pearson’s Correlation Coefficient

In [9]:
t2 = Correlation(X, Y, alpha_val)
print (t2.pearson_correlation_coefficient.__doc__)
t2.pearson_correlation_coefficient()

 Test 28: Pearson Product–Moment Correlation Coefficient
        
        Test for assessing linear relationship between two samples

        Null hypothesis H0: the correlation (rho) between the two variables equals 0
        Alternative hypothesis H1: the correlation between the two variables equals some value other than 0.
        
~Pearson Product–Moment Correlation Coefficient~
	|------------------------------|
	  DESCRIPTIVE STATISTICS
	|------------------------------|
	  Size ..........: 100
	  Min ...........: 17.909
	  25%  ..........: 19.545
	  50%  ..........: 20.14
	  75%  ..........: 20.918
	  Max ...........: 22.491
	  Mean ..........: 20.19
	  Std Dev .......: 0.976
	  Skewness ......: -0.012
	  Kurtosis ......: -0.466
	|------------------------------|
	|------------------------------|
	  DESCRIPTIVE STATISTICS
	|------------------------------|
	  Size ..........: 100
	  Min ...........: 7.93
	  25%  ..........: 9.304
	  50%  ..........: 10.008
	  75%  ..........: 10.618

(-0.09789590445579319, 0.33255464192464346)

### Spearman’s Rank Correlation

In [10]:
t3 = Correlation(X, Y, alpha_val)
print (t3.spearmans_correlation_coefficient.__doc__)
t3.spearmans_correlation_coefficient()


 Test 29: Spearman’s Rank-Order Correlation Coefficient

        Test for assessing correlation between two samples
        
        H0 (null hypothesis): correlation (rho) between the two variables equals 0
        H1 (alternate hypothesis): the correlation between the two variables equals some value other than 0   
        
~Spearman’s Rank-Order Correlation Coefficient~
	|------------------------------|
	  DESCRIPTIVE STATISTICS
	|------------------------------|
	  Size ..........: 100
	  Min ...........: 17.909
	  25%  ..........: 19.545
	  50%  ..........: 20.14
	  75%  ..........: 20.918
	  Max ...........: 22.491
	  Mean ..........: 20.19
	  Std Dev .......: 0.976
	  Skewness ......: -0.012
	  Kurtosis ......: -0.466
	|------------------------------|
	|------------------------------|
	  DESCRIPTIVE STATISTICS
	|------------------------------|
	  Size ..........: 100
	  Min ...........: 7.93
	  25%  ..........: 9.304
	  50%  ..........: 10.008
	  75%  ..........: 10.618
	  Max ..

NameError: name 'dist_stats' is not defined

### Kendall’s Rank Correlation

Tests whether two samples have a monotonic relationship.

Assumptions

+ Observations in each sample are independent and identically distributed (iid).
+ Observations in each sample can be ranked.

Interpretation

+ H0: the two samples are independent.
+ H1: there is a dependency between the samples.

In [None]:
t4 = Correlation(X, Y, alpha_val)
print (t4.kendall_tau.__doc__)
t4.kendall_tau()


### Chi-Squared Test

In [None]:
t5 = InferenceTwoIndependentSamples(X, Y, alpha_val)
print (t5.chi_square_test_homogeneity.__doc__)
t5.chi_square_test_homogeneity()


### Augmented Dickey-Fuller Unit Root Test

In [None]:
t6 = TimeSeries(X, alpha_val)
print (t6.augmented_dickey_fuller_test.__doc__)
t6.augmented_dickey_fuller_test()


### Kwiatkowski-Phillips-Schmidt-Shin

Tests whether a time series is trend stationary or not.

In [None]:
t7 = TimeSeries(X, alpha_val)
print (t7.kwiatkowski_phillips_schmidt_shin_test.__doc__)
t7.kwiatkowski_phillips_schmidt_shin_test()


### Student’s t-test

In [None]:
t8 = InferenceTwoIndependentSamples(X, Y, alpha_val)
print (t8.t_test_independent.__doc__)
t8.t_test_independent()

### Paired Student’s t-test

Tests whether the means of two paired samples are significantly different.

In [None]:
t9 = InferenceTwoDependentSamples(X, Y, alpha_val)
print (t9.t_test_dependent.__doc__)
t9.t_test_dependent()

### Analysis of Variance Test (ANOVA)

Tests whether the means of two or more independent samples are significantly different.

In [None]:
t10 = InferenceTwoOrMoreIndenpendentSamples(X, Y, Z, alpha=0.05)
print (t10.single_factor_anova.__doc__)
t10.single_factor_anova()



### Mann-Whitney U Test

Tests whether the distributions of two independent samples are equal or not.

In [None]:
t97=InferenceTwoIndependentSamples(X,Y,alpha=0.05)
t97.mann_whitney_utest()


### Wilcoxon Signed-Rank Test

Tests whether the distributions of two paired samples are equal or not.

In [None]:
t98=InferenceTwoDependentSamples(X,Y,alpha=0.05)
t98.wilcoxon_matched_pairs_test()


### Kruskal-Wallis H Test

Tests whether the distributions of two or more independent samples are equal or not.

In [None]:
t99=InferenceTwoOrMoreIndenpendentSamples(X,Y,Z,alpha=0.05)
t99.kruskal_wallis_oneway_analysis_variance()


### Friedman Test

Tests whether the distributions of two or more paired samples are equal or not.

In [None]:
t100=InferenceTwoOrMoreDependentSamples(X,Y,Z,alpha=0.05)
t100.friedman_twoway_analysis_variance()

### The Single-Sample t Test

In [None]:
t101=InferenceSingleSample(X,alpha=0.05, inf_parameters=[0.5])
t101.t_test()

### Chi-Square Goodness-of-Fit Test 

In [None]:
t102=InferenceSingleSample(X, alpha=0.05, inf_parameters=[X])
t102.chi_square_goodness_of_fit_test()

### Cramér-von Mises test for goodness of fit.

In [None]:
#t103=InferenceTwoIndependentSamples(X, Y, alpha=0.05)
#t103.cramer_von_mises_goodness_of_fit_test()

### Cressie-Read power divergence statistic and goodness of fit test

In [None]:
t104=InferenceSingleSample(X, alpha=0.05, inf_parameters=[X])
t104.cressie_read_power_divergence_and_goodness_of_fit_test()

### Kolmogorov–Smirnov Goodness-of-fit Test for a Single Sample

In [None]:
t105=InferenceSingleSample(X, alpha=0.05, inf_parameters=[])
t105.kolmogorov_smirnov_goodness_of_fit_test()