In [1]:
from src.auto_statstest import StatsTest
from src.statstest import *

np.random.seed(14)

In [3]:
# import datasets

df = pd.read_csv("synthetic_data.csv") # synthetic dataset to showcase various cases below
df2 = pd.read_csv("case_control.csv")

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 12 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   normal1                         10000 non-null  float64
 1   normal2                         10000 non-null  float64
 2   normal3                         10000 non-null  float64
 3   non-normal                      10000 non-null  float64
 4   categorical_2_levels1           10000 non-null  object 
 5   dependent_categorical_3_levels  10000 non-null  object 
 6   categorical_2_levels2           10000 non-null  object 
 7   categorical_multi_levels        10000 non-null  object 
 8   diff_mean_normal_samples        10000 non-null  float64
 9   categorical_fishers             10000 non-null  object 
 10  categorical_nofishers           10000 non-null  object 
 11  boolean                         10000 non-null  bool   
dtypes: bool(1), float64(5), object(6)

# AUTOSTATSTEST

*1. Create class instance - StatsTest*

parameter 1 = data (in pandas dataframe/numpy array)

parameter 2 = level of significance (optional)

In [4]:
test = StatsTest(df) # default level of significance = 0.05
test2 = StatsTest(df, alpha = 0.2)

*2. Call class method - autostatstest*

parameter 1 = dependent variable 

parameter 2 = independent variable 

*3. View & retrieve stored results (if needed)*

**EXAMPLE CASES**

In [5]:
# case 1 - column does not exist in dataset

result1 = test.autostatstest("normal1", "normal")

ValueError: Input column 'normal' is not in the dataset. Please try again.

In [6]:
# case 2 - column has inappropriate datatype

result2 = test.autostatstest("normal1", "boolean")

ValueError: Input column 'boolean' is not of a valid datatype. Please try again.

In [7]:
# case 3 - continuous normal vs continuous normal (pearson's correlation)

result3 = test.autostatstest("normal1", "normal2")


Column 'normal1' is normally distributed.

Column 'normal2' is normally distributed.

----- Conducted Pearson's Correlation -----

Pearson's Correlation statistic = 0.006323, p-value = 0.527231
Level of significance = 0.05
Null hypothesis (H0) = No linear relationship between the variables
Conclusion = Fail to reject null hypothesis (H0)


*Extracting results without post-hoc tests:*

In [8]:
result3

DatkPearsonrResult(statistic=0.006323105498256652, pvalue=0.527231483649771, significant=False)

In [9]:
result3.pvalue

0.527231483649771

In [10]:
# case 4 - continuous normal vs continuous non-normal (spearman's correlation)

result4 = test.autostatstest("normal1", "non-normal")


Column 'normal1' is normally distributed.

Column 'non-normal' is not normally distributed.

Non-parametric Spearman's Correlation Test is recommended as one or more columns have non-normal distribution.

----- Conducted Spearman's Correlation -----

Spearman's Correlation statistic = 0.013385, p-value = 0.180783
Level of significance = 0.05
Null hypothesis (H0) = No monotonic relationship between the variables
Conclusion = Fail to reject null hypothesis (H0)


In [11]:
# case 5 - categorical 2 levels vs categorical 2 levels (chi-square test) (results insignificant)

result5 = test.autostatstest("categorical_2_levels1", "categorical_2_levels2")


----- Conducted Chi-square Test of Independence -----

| categorical_2_levels1   |   No |   Yes |
|:------------------------|-----:|------:|
| Female                  | 2550 |  2551 |
| Male                    | 2461 |  2438 |

Chi-square Test of Independence statistic = 0.050396, p-value = 0.822376
Level of significance = 0.05
Null hypothesis (H0) = Variables are independent from each other
Conclusion = Fail to reject null hypothesis (H0)


In [12]:
# case 6 - categorical 2 levels vs categorical > 2 levels (chi-square test) (results insignificant)

result6 = test.autostatstest("categorical_2_levels1", "categorical_multi_levels")


----- Conducted Chi-square Test of Independence -----

| categorical_2_levels1   |   Blue |   Green |   Orange |   Red |
|:------------------------|-------:|--------:|---------:|------:|
| Female                  |   1249 |    1294 |     1291 |  1267 |
| Male                    |   1189 |    1212 |     1227 |  1271 |

Chi-square Test of Independence statistic = 1.713072, p-value = 0.634032
Level of significance = 0.05
Null hypothesis (H0) = Variables are independent from each other
Conclusion = Fail to reject null hypothesis (H0)


In [13]:
# case 7 - categorical 2 levels vs categorical > 2 levels (chi-square test) (results significant) 

result7 = test.autostatstest("categorical_2_levels1", "dependent_categorical_3_levels")


----- Conducted Chi-square Test of Independence -----

| categorical_2_levels1   |    A |    B |    C |
|:------------------------|-----:|-----:|-----:|
| Female                  | 1574 | 2492 | 1035 |
| Male                    | 2864 | 1046 |  989 |

Chi-square Test of Independence statistic = 963.312454, p-value = 0.000000
Level of significance = 0.05
Null hypothesis (H0) = Variables are independent from each other
Conclusion = Reject null hypothesis (H0)

----- Conducted Post-hoc Analysis using Multiple Comparisons Correction -----

('A', 'B'): p-value = 3.482469367701404e-211, corrected p-value = 1.0447408103104213e-210, significant = True
('A', 'C'): p-value = 1.5043637150254447e-32, corrected p-value = 1.5043637150254447e-32, significant = True
('B', 'C'): p-value = 1.1063385068377032e-46, corrected p-value = 1.6595077602565549e-46, significant = True


*Extracting results with post-hoc tests:*

In [14]:
result7

{'chi2': DatkChi2Result(statistic=963.3124544371774, pvalue=6.597180166003396e-210, significant=True),
 'post-hoc': [DatkChi2PostHocResult(combi=('A', 'B'), pvalue=3.482469367701404e-211, significant=True, corrected_pvalue=1.0447408103104213e-210),
  DatkChi2PostHocResult(combi=('A', 'C'), pvalue=1.5043637150254447e-32, significant=True, corrected_pvalue=1.5043637150254447e-32),
  DatkChi2PostHocResult(combi=('B', 'C'), pvalue=1.1063385068377032e-46, significant=True, corrected_pvalue=1.6595077602565549e-46)]}

In [15]:
result7["chi2"].pvalue

6.597180166003396e-210

In [16]:
result7["post-hoc"][1].corrected_pvalue

1.5043637150254447e-32

In [17]:
# function to print all significant categories from post-hoc test results

print_significant_categories(result7)

('A', 'B')
('A', 'C')
('B', 'C')


In [18]:
# case 8 - categorical 2 levels vs categorial 2 levels (fisher's exact test) [chi square assumption fails and contingency table is 2x2]

result8 = test.autostatstest("categorical_2_levels2", "categorical_fishers")


Fisher's Exact Test is recommended as assumption of Chi-square Test of Independence is violated (Each cell in contingency table should be at least 5) and contingency table is 2x2 in shape.

----- Conducted Fisher's Exact Test -----

| categorical_2_levels2   |   A |    B |
|:------------------------|----:|-----:|
| No                      |   3 | 5008 |
| Yes                     |   0 | 4989 |

Fisher's Exact Test statistic = inf, p-value = 0.249929
Level of significance = 0.05
Null hypothesis (H0) = Variables are independent from each other
Conclusion = Fail to reject null hypothesis (H0)


In [19]:
# case 9 - categorical 2 levels vs categorical > 2 levels (chi-square test) [chi-square assumption fails but contingency table is not 2x2]

result9 = test.autostatstest("categorical_2_levels1", "categorical_nofishers")


----- Conducted Chi-square Test of Independence -----

| categorical_2_levels1   |   A |    B |   C |
|:------------------------|----:|-----:|----:|
| Female                  |   2 | 5097 |   2 |
| Male                    |   3 | 4893 |   3 |

Chi-square Test of Independence statistic = 0.485564, p-value = 0.784443
Level of significance = 0.05
Null hypothesis (H0) = Variables are independent from each other
Conclusion = Fail to reject null hypothesis (H0)





In [20]:
# case 10 - continuous normal vs categorical 2 levels (student's independent t-test)

result10 = test.autostatstest("normal1", "categorical_2_levels1")


Column 'normal1' is normally distributed.

----- Conducted Independent Student's T-Test -----

Samples have equal variance according to Levene Test
Sample 1 mean: 0.0019
Sample 2 mean: -0.0123

Independent Student's T-Test statistic = 0.696498, p-value = 0.486133
Level of significance = 0.05
Null hypothesis (H0) = Samples have identical means
Conclusion = Fail to reject null hypothesis (H0)


In [21]:
# case 11 - continuous non-normal vs categorical 2 levels (mann-whitney u test)

result11 = test.autostatstest("non-normal", "categorical_2_levels1")


Column 'non-normal' is not normally distributed.

Non-parametric Mann-Whitney U Test is recommended as dependent variable has non-normal distribution.

----- Conducted Mann-Whitney U Test -----

Sample 1 median: 0.511
Sample 2 median: 0.5009

Mann-Whitney U Test statistic = 12597213.000000, p-value = 0.478353
Level of significance = 0.05
Null hypothesis (H0) = Samples have identical medians
Conclusion = Fail to reject null hypothesis (H0)


In [22]:
# case 12 - continuous normal vs categorical > 2 levels (one-way anova) (results insignificant)

result12 = test.autostatstest("normal1", "categorical_multi_levels")


Column 'normal1' is normally distributed.

One-way ANOVA is recommended as independent variable has more than 2 categories.

----- Conducted One-way ANOVA -----

Samples have equal variance according to Levene Test
Sample 1 mean: -0.022268012275382745
Sample 2 mean: 0.038590753127707975
Sample 3 mean: -0.004087864528348485
Sample 4 mean: -0.03117921542210321

One-way ANOVA statistic = 2.300516, p-value = 0.075171
Level of significance = 0.05
Null hypothesis (H0) = Samples have identical means
Conclusion = Fail to reject null hypothesis (H0)


In [23]:
# case 13 - continuous normal vs categorical > 2 levels (one-way anova) (results significant)

result13 = test2.autostatstest("normal1", "dependent_categorical_3_levels")


Column 'normal1' is normally distributed.

One-way ANOVA is recommended as independent variable has more than 2 categories.

----- Conducted One-way ANOVA -----

Samples have equal variance according to Levene Test
Sample 1 mean: -0.00257382801082104
Sample 2 mean: -0.041299923115269255
Sample 3 mean: 0.012552302389702334

One-way ANOVA statistic = 1.836290, p-value = 0.159461
Level of significance = 0.2
Null hypothesis (H0) = Samples have identical means
Conclusion = Reject null hypothesis (H0)

----- Conducted post-hoc analysis using Tukey's HSD Test -----

('A', 'C'): p-value = 0.3298167019108571, significant = False
('A', 'B'): p-value = 0.7652610526926076, significant = False
('C', 'B'): p-value = 0.13763072635349416, significant = True


In [24]:
# case 14 - continuous non-normal vs categorical > 2 levels (kruskal-wallis test) (not significant)

result14 = test.autostatstest("non-normal", "categorical_multi_levels")


Column 'non-normal' is not normally distributed.

Non-parametric Kruskal-Wallis Test is recommended as dependent variable has non-normal distribution.

----- Conducted Kruskal-Wallis Test -----

Samples have equal variance according to Levene Test
Sample 1 median: 0.5110231902601691
Sample 2 median: 0.49787893472981626
Sample 3 median: 0.5081001249490381
Sample 4 median: 0.5068358433360468

Kruskal-Wallis Test statistic = 1.433220, p-value = 0.697767
Level of significance = 0.05
Null hypothesis (H0) = Samples have identical medians
Conclusion = Fail to reject null hypothesis (H0)


In [25]:
# case 15 - continuous non-normal vs categorical > 2 levels (kruskal-wallis test) (results significant)

result15 = test.autostatstest("diff_mean_normal_samples", "categorical_multi_levels")


Column 'diff_mean_normal_samples' is not normally distributed.

Non-parametric Kruskal-Wallis Test is recommended as dependent variable has non-normal distribution.

----- Conducted Kruskal-Wallis Test -----

Samples have equal variance according to Levene Test
Sample 1 median: 20.040293312568075
Sample 2 median: 30.000850608980677
Sample 3 median: 10.017259230965943
Sample 4 median: 39.984323622953276

Kruskal-Wallis Test statistic = 9373.640810, p-value = 0.000000
Level of significance = 0.05
Null hypothesis (H0) = Samples have identical medians
Conclusion = Reject null hypothesis (H0)

----- Conducted post-hoc analysis using Dunn's Test -----

('Blue', 'Green'): p-value = 4.656502042359794e-199, significant = True
('Blue', 'Orange'): p-value = 1.7558624993534906e-200, significant = True
('Blue', 'Red'): p-value = 0.0, significant = True
('Green', 'Orange'): p-value = 0.0, significant = True
('Green', 'Red'): p-value = 2.765475226006906e-211, significant = True
('Orange', 'Red'): p-

# AUTOSTATSTEST_ALL

**This class method takes one column as input (dependent variable). Autostatstest is then conducted with all other columns in the dataset, assuming datatypes are appropriate. Results of statistical tests are stored in a dictionary. All errors raised are printed and stored as results as well.**

*1. Create class instance - StatsTest*

parameter 1 = data (in pandas dataframe/numpy array)

parameter 2 = level of significance (optional)

*2. Call class method - autostatstest_all*



*3. View & retrieve stored results in the dictionary (if needed)*

**EXAMPLE CASES**

In [26]:
result_all_1 = test.autostatstest_all("normal1")


[1mTest 1: normal1 vs normal2[0m

Column 'normal1' is normally distributed.

Column 'normal2' is normally distributed.

----- Conducted Pearson's Correlation -----

Pearson's Correlation statistic = 0.006323, p-value = 0.527231
Level of significance = 0.05
Null hypothesis (H0) = No linear relationship between the variables
Conclusion = Fail to reject null hypothesis (H0)

[1mTest 2: normal1 vs normal3[0m

Column 'normal1' is normally distributed.

Column 'normal3' is normally distributed.

----- Conducted Pearson's Correlation -----

Pearson's Correlation statistic = 0.007523, p-value = 0.451896
Level of significance = 0.05
Null hypothesis (H0) = No linear relationship between the variables
Conclusion = Fail to reject null hypothesis (H0)

[1mTest 3: normal1 vs non-normal[0m

Column 'normal1' is normally distributed.

Column 'non-normal' is not normally distributed.

Non-parametric Spearman's Correlation Test is recommended as one or more columns have non-normal distribution.

-

In [27]:
result_all_1

{'normal2': DatkPearsonrResult(statistic=0.006323105498256652, pvalue=0.527231483649771, significant=False),
 'normal3': DatkPearsonrResult(statistic=0.007523414893723428, pvalue=0.45189586823596, significant=False),
 'non-normal': DatkSpearmanrResult(statistic=0.013384566625845667, pvalue=0.18078260126597606, significant=False),
 'categorical_2_levels1': DatkTTestResult(statistic=0.6964982784951681, pvalue=0.4861330268721993, significant=False),
 'dependent_categorical_3_levels': DatkOneWayAnovaResult(statistic=1.8362901753835394, pvalue=0.15946146883768222, significant=False),
 'categorical_2_levels2': DatkTTestResult(statistic=-2.2604870701425077, pvalue=0.023812450776105082, significant=True),
 'categorical_multi_levels': DatkOneWayAnovaResult(statistic=2.3005161004505177, pvalue=0.07517058079934379, significant=False),
 'diff_mean_normal_samples': DatkSpearmanrResult(statistic=-0.00358831799988318, pvalue=0.7197536168000374, significant=False),
 'categorical_fishers': DatkTTestRes

In [28]:
result_all_2 = test.autostatstest_all("categorical_2_levels1")


[1mTest 1: categorical_2_levels1 vs normal1[0m

Logistic regression is recommended instead of a statistical test.

[1mTest 2: categorical_2_levels1 vs normal2[0m

Logistic regression is recommended instead of a statistical test.

[1mTest 3: categorical_2_levels1 vs normal3[0m

Logistic regression is recommended instead of a statistical test.

[1mTest 4: categorical_2_levels1 vs non-normal[0m

Logistic regression is recommended instead of a statistical test.

[1mTest 5: categorical_2_levels1 vs dependent_categorical_3_levels[0m

----- Conducted Chi-square Test of Independence -----

| categorical_2_levels1   |    A |    B |    C |
|:------------------------|-----:|-----:|-----:|
| Female                  | 1574 | 2492 | 1035 |
| Male                    | 2864 | 1046 |  989 |

Chi-square Test of Independence statistic = 963.312454, p-value = 0.000000
Level of significance = 0.05
Null hypothesis (H0) = Variables are independent from each other
Conclusion = Reject null hypothesi

In [29]:
result_all_2

{'normal1': 'Logistic regression is recommended instead of a statistical test.',
 'normal2': 'Logistic regression is recommended instead of a statistical test.',
 'normal3': 'Logistic regression is recommended instead of a statistical test.',
 'non-normal': 'Logistic regression is recommended instead of a statistical test.',
 'dependent_categorical_3_levels': {'chi2': DatkChi2Result(statistic=963.3124544371774, pvalue=6.597180166003396e-210, significant=True),
  'post-hoc': [DatkChi2PostHocResult(combi=('A', 'B'), pvalue=3.482469367701404e-211, significant=True, corrected_pvalue=1.0447408103104213e-210),
   DatkChi2PostHocResult(combi=('A', 'C'), pvalue=1.5043637150254447e-32, significant=True, corrected_pvalue=1.5043637150254447e-32),
   DatkChi2PostHocResult(combi=('B', 'C'), pvalue=1.1063385068377032e-46, significant=True, corrected_pvalue=1.6595077602565549e-46)]},
 'categorical_2_levels2': DatkChi2Result(statistic=0.050395916502479376, pvalue=0.8223757761862471, significant=False)

# INDIVIDUAL STATISTICAL TESTS

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 12 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   normal1                         10000 non-null  float64
 1   normal2                         10000 non-null  float64
 2   normal3                         10000 non-null  float64
 3   non-normal                      10000 non-null  float64
 4   categorical_2_levels1           10000 non-null  object 
 5   dependent_categorical_3_levels  10000 non-null  object 
 6   categorical_2_levels2           10000 non-null  object 
 7   categorical_multi_levels        10000 non-null  object 
 8   diff_mean_normal_samples        10000 non-null  float64
 9   categorical_fishers             10000 non-null  object 
 10  categorical_nofishers           10000 non-null  object 
 11  boolean                         10000 non-null  bool   
dtypes: bool(1), float64(5), object(6)

In [31]:
result_pearsons = pearsonr_test(var1 = df["normal1"], var2 = df["normal2"], alpha = 0.05)


----- Conducted Pearson's Correlation -----

Pearson's Correlation statistic = 0.006323, p-value = 0.527231
Level of significance = 0.05
Null hypothesis (H0) = No linear relationship between the variables
Conclusion = Fail to reject null hypothesis (H0)


In [32]:
observed = pd.crosstab(df["categorical_2_levels1"], df["dependent_categorical_3_levels"])

result_chi2 = chisquare_test(observed = observed, alpha = 0.05)


----- Conducted Chi-square Test of Independence -----

| categorical_2_levels1   |    A |    B |    C |
|:------------------------|-----:|-----:|-----:|
| Female                  | 1574 | 2492 | 1035 |
| Male                    | 2864 | 1046 |  989 |

Chi-square Test of Independence statistic = 963.312454, p-value = 0.000000
Level of significance = 0.05
Null hypothesis (H0) = Variables are independent from each other
Conclusion = Reject null hypothesis (H0)


In [33]:
result_chi2_posthoc = chisquare_posthoc_test(df = observed, alpha = 0.05)


----- Conducted Post-hoc Analysis using Multiple Comparisons Correction -----

('A', 'B'): p-value = 3.482469367701404e-211, corrected p-value = 1.0447408103104213e-210, significant = True
('A', 'C'): p-value = 1.5043637150254447e-32, corrected p-value = 1.5043637150254447e-32, significant = True
('B', 'C'): p-value = 1.1063385068377032e-46, corrected p-value = 1.6595077602565549e-46, significant = True
