In [1]:
import bz2
import numpy as np
import pandas as pd
import pickle
import scipy.stats as sp_stats
from itertools import product, combinations

sections = ['utj', 'isthmus', 'ia-junction', 'ampulla']

classes2 = {'ordering':[('narrow_end','#30a2da'), ('wide_end','#fc4f30'), ('narrow_lumen','#e5ae38'),('wide_lumen','green')]}
classes2['utj'] = {'narrow_end':[1,2,3, 7,8, 13,21], 'wide_end':[4,5,6, 9,14,19], 'narrow_lumen':[15, 16, 17,18], 'wide_lumen':[10,11,12, 20]}
classes2['isthmus'] = {'narrow_end':[1,2,3,4,5,6], 'wide_end':[7,8, 9], 'narrow_lumen':[10, 11, 12, 13, 14, 15], 'wide_lumen':[16,17,18,19,20,21]}
classes2['ia-junction'] = {'narrow_end':[1,2,3,4,5,6,7,8], 'wide_end':[9,10,11,12,13,14,15,16], 'narrow_lumen':[17,18,19,20,21,22,23,24], 'wide_lumen':[25,26,27,28,29,30,31,32]}
classes2['ampulla'] = {'narrow_end':[1,2,3,4,5,6], 'wide_end':[7,8,9,10, 11, 12], 'narrow_lumen':[13,14,15,16,17,18], 'wide_lumen':[19, 20, 21, 22, 23, 24]}
         
# cross section: [file name: replicates]
# secs = {'ampulla':['a4_aes_v3.4', 3], 'isthmus': ['a1_i80_v3.4', 3]}
secs = {'ampulla':['a4_adi_v33_3rd_28800000', 3], 'isthmus': ['a1_i90_v33_3rd_28800000', 3]}
stats = dict()
for sec in secs:
    with bz2.BZ2File('./resources/analysis/output/stats_'+sec+'_'+ secs[sec][0] +'.pickle.bz2', 'rb') as stats_file:
        stats[sec] = pickle.load(stats_file)
stats.keys()

# produce a single DataFrame with all the cross sections DataFrames
v2b_data = None
for sec in sections:
    if sec in stats and 'data_frame' in stats[sec]:
        if v2b_data is None:
            v2b_data = stats[sec]['data_frame']
        else:
            v2b_data = v2b_data.append(stats[sec]['data_frame'])

print('EVs in replicate 1 in Ampulla',v2b_data.query("section=='ampulla' & replicate==1")['radius_um'].count())
print('EVs in replicate 1 in Isthmus',v2b_data.query("section=='isthmus' & replicate==1")['radius_um'].count())

EVs in replicate 1 in Ampulla 221
EVs in replicate 1 in Isthmus 1064


|Aims| Parametric | Non-parametric |
|-|-|-|
|Compare samples from 2 independent data sets|2 sample T-Test (Student's)|Mann-Whitney U|
|Compare samples from 2 paired data sets|paired Student T-test|Wilcoxon Signed-Rank|
|Compare samples from >2 indep. data sets|one-way ANOVA|Kruskal-Wallis H Test|
|Compare samples from >2 paired data sets|repeated measures ANOVA|Friedman|

## Statistics functions follow

# Incorrect form, multiple 1-to-1 comparissons

In [4]:
"""
Kolmogorov-Smirnov (one of the most common methods?)
This method is exact with continuous distributions, less precise with discrete distributions
ks_stat, ks_p = sp_stats.ks_2samp(d1, d2)

Mann Withney U test, needs n > 25
This method is robust to violations of homogeneity of variance

Ranksums similar to mw, assumes continuous distributions. It does not handle ties between measurements in x and y, (MW does)
rank_stat, rank_p = sp_stats.ranksums(d1, d2)

Epps-Singleton does not assume a continuous distribution, needs n > 25

brunnermunzel does not test if both distributions are the same
"""

def test_significance_non_parametric(d1, n_d1, d2, n_d2, alpha=0.05, continuous_distribution=True, paired_samples=False):
    """
    Useful tests for paired and non-paired non-parametric samples.
    """
    if n_d1 < 25 or n_d2 < 25:
        print('n<25, using the Anderson-Darling test for k-samples,', end =' ')
        # returns statistc, critical values, and significance level
        ak_stat, ak_critical_values, ak_significance_level = sp_stats.anderson_ksamp([d1, d2])
        # is the test value greater than the critical value for 5%?
        if ak_stat > ak_critical_values[2]:
            print(f'significance level: {ak_significance_level:.4f}, Reject H0')
            return True, ak_significance_level
    else:
        p_normal1 = test_normality(d1)
        p_normal2 = test_normality(d2)
        
        if n_d1 > 100 and n_d2 > 100:
            print('N is sufficiently large, parametric test will take precedence')
            return test_significance_parametric(d1, n_d1, d2, n_d2, alpha, paired_samples)
        else:
            if continuous_distribution:
                if paired_samples:
                    # wilcoxon signed-rank
                    wsr_stat, wsr_p = sp_stats.wilcoxon(d1, d2)
                    print('| Wilcoxon signed-rank,', end=' ')
                    if wsr_p > alpha:
                        pass
                    else:
                        print(f'stat:{wsr_stat} p: {wsr_p:.4f}, Reject H0')
                        return True, mwu_p
                else:
                    # Mann-Whitney U
                    mwu_stat, mwu_p = sp_stats.mannwhitneyu(d1, d2)
                    print('| Mann-Whitney U,', end=' ')
                    if mwu_p > alpha:
                        pass
                    else:
                        print(f'stat:{mwu_stat} p: {mwu_p:.4f}, Reject H0')
                        return True, mwu_p
            else:
                epps_stat, epps_p = sp_stats.epps_singleton_2samp(d1, d2)
                print('| Epps singleton 2 samp,', end=' ')
                if epps_p > alpha:
                    pass # print(f'Fail to reject h0')
                else:
                    print(f'p:{epps_p:.4f}, Reject H0')
                    return True, epps_p
    
    print('N.S.')
    return False, None

def test_normality(data):
    """
    Only the result from D'Agostino and Pearson's test is reported
    """
    statistic, p = sp_stats.normaltest(data)
    
    print(f'normality test', f'p={p:.3f}' if p > 1E-3 else f'p={p:.3E}', end=' ')
    if p < 0.05:
        print('(sig. non-normal)', end=' ')
    return p

def test_variance_homogeneity(d1, d2, p_normal1, p_normal2):
    """
    Levene test is more robust for data with significant deviations from normality
    """
    if p_normal1 > 0.05 and p_normal2 > 0.05:
        bartlet_stat, p_var = sp_stats.bartlett(d1, d2)
        print('variance homogeneity test (bartlett)', f'p={p_var:.3E}' if p_var < 1E-3 else f'p={p_var:.3f}', end=' ')
    else:
        levene_stat, p_var = sp_stats.levene(d1, d2)
        print(f'variance homogeneity test (levene)', f'p={p_var:.3E}' if p_var < 1E-3 else f'p={p_var:.3f}', end=' ')
    return p_var

def test_significance_parametric(d1, n_d1, d2, n_d2, alpa=0.05, paired_samples=False, p_normal1=None, p_normal2=None):
    """
    Useful tests for paired and non-paired non-parametric samples.
    These tests assume coninuous distributions
    """
    if n_d1 < 100 and n_d2 < 100:
        if n_d1 < 20 or n_d2 < 20:
            print('WARNING Sample size is too-low, non-parametric test must be used instead.')
            return False, None
        else:
            print('Sample size is low 20 < n < 100')
    if p_normal1 is None:
        p_normal1 = test_normality(d1)
    if p_normal2 is None:
        p_normal2 = test_normality(d2)
    # test equal variances
    p_var = test_variance_homogeneity(d1, d2, p_normal1, p_normal2)

    if paired_samples:
        # paired samples t-test
        ttest_stat, ttest_p = sp_stats.ttest_rel(d1, d2)
        print('| t-test paired,', end=' ')
    else:
        # independent samples t-test, assumes identical variances
        # We need to set equal_var=False if samples have different sizes https://stackoverflow.com/a/22613361 to execute a welch's test
        ttest_stat, ttest_p = sp_stats.ttest_ind(d1, d2, equal_var=False if p_var > alpha and n_d1 != n_d2 else p_var > alpha)
        print('| t-test indep.,', end=' ')
        
    if ttest_p > alpha:
        print('N.S.')
        return False, None
    else:
        print(f'p={ttest_p:.4f}, Reject H0')
        return True, ttest_p

# Correct form, multiple samples simultaneously

## Rois in same class

In [2]:
# rois in same class, all repeats together
def differences_between_rois_in_same_class_same_region(section, alpha=0.05):
    print('=='*20, '\n','ROIs in same class in', section)
    for class_name, class_colour in classes2['ordering']:
        print('Rois in', class_name, [roi for roi in classes2[section][class_name]])
        dpr = []
        lengths = []
        for i_roi in classes2[section][class_name]:
            d = v2b_data.query(f"section=='{section}' & replicate in {[i for i in range(secs[section][1])]} & roi=={i_roi-1}")['radius_um']
            lengths.append((i_roi, len(d)))
            dpr.append(d)
        min_n = min([len(d) for d in dpr])
        # samples with less than 3 data points cannot be tested
        if min_n < 3:
            print(f'min_n ({min_n}) < 3, Attempting testing a subset of the data.', end=' ')
            original_dpr = dpr
            dpr = [d for d in dpr if len(d)>3]
            if len(dpr) < 2:
                print('There are not sufficient data points for testing')
                break
            print('Elements skipped:', [pair for pair in lengths if pair[1] < 3], end='\n\t      ')
            min_n = min([len(d) for d in dpr])

        # one way t-test assumes homoscedasticity, we must check the samples have the same variance
        parametric = False
        if min_n > 99:
            print(f'min_n ({min_n}) > 100, Normality not essential, skipping test', end=' ')
            # same variances?
            levene_stat, levene_p = sp_stats.levene(*dpr)
            if levene_p > 0.05:
                parametric = True
                print('Fail to reject Homoscedasticity', end=' ')
            else:
                print('Significantly different variances.', end=' ')

        elif min_n > 25:
            print(f'25 < min_n ({min_n}) < 100,', end='\n\t')
            if min([test_normality(d) for d in dpr]) < alpha:
                print('\n\tAt least one distribution is significantly non-normal.', end=' ')
            else:
                print('\n\tDistributions not significantly different from normal', end=' ')
                # same variances?
                levene_stat, levene_p = sp_stats.levene(*dpr)
                if levene_p > 0.05:
                    parametric = True
                    print('Fail to reject Homoscedasticity', end=' ')
                else:
                    print('Significantly different variances.', end=' ')
        else:
            print(f'3 < min_n ({min_n}) < 25,', end=' ')

        if parametric:
            print('\n\tParametric test.\n\t', sp_stats.f_oneway(*dpr))
        else:
            print('\n\tNon-parametric test.\n\t', sp_stats.kruskal(*dpr))
        
        dprs = dpr
        pooled_dev = np.sqrt(np.sum([(r.count()-1) * (r.mean()**2) for r in dprs]) / (np.sum([r.count() for r in dprs]) - len(dprs)))
        print('Pooled deviation', pooled_dev,'\n-------')

In [5]:
differences_between_rois_in_same_class_same_region('ampulla')
#differences_between_rois_in_same_class_same_region('isthmus')

 ROIs in same class in ampulla
Rois in narrow_end [1, 2, 3, 4, 5, 6]
25 < min_n (35) < 100,
	normality test p=7.706E-05 (sig. non-normal) normality test p=2.072E-05 (sig. non-normal) normality test p=0.003 (sig. non-normal) normality test p=0.058 normality test p=0.116 normality test p=0.031 (sig. non-normal) 
	At least one distribution is significantly non-normal. 
	Non-parametric test.
	 KruskalResult(statistic=2.7978747610719283, pvalue=0.7311130028398605)
Pooled deviation 0.10283165292097028 
-------
Rois in wide_end [7, 8, 9, 10, 11, 12]
3 < min_n (15) < 25, 
	Non-parametric test.
	 KruskalResult(statistic=3.4526524664824274, pvalue=0.6305637109408736)
Pooled deviation 0.0979522541535115 
-------
Rois in narrow_lumen [13, 14, 15, 16, 17, 18]
3 < min_n (9) < 25, 
	Non-parametric test.
	 KruskalResult(statistic=3.974726089122708, pvalue=0.5530605031998688)
Pooled deviation 0.10361567523296436 
-------
Rois in wide_lumen [19, 20, 21, 22, 23, 24]
min_n (2) < 3, Attempting testing a su

In [6]:
# rois in same class, per repeat
def differences_between_rois_in_same_class_same_region(section, alpha=0.05):
    print('=='*20, '\n','ROIs in same class in', section)
    for class_name, class_colour in classes2['ordering']:
        print('Rois in', class_name, [roi for roi in classes2[section][class_name]])
        
        for rep in range(secs[section][1]):
            print('    repeat',rep+1, end=') ')
            dpr = []
            lengths = []
            for i_roi in classes2[section][class_name]:
                d = v2b_data.query(f"section=='{section}' & replicate=={rep} & roi=={i_roi-1}")['radius_um']
                #d = v2b_data.query(f"section=='{section}' & replicate in [0,1,2,3,4] & roi=={i_roi-1}")['radius_um']
                lengths.append((i_roi, len(d)))
                dpr.append(d)
            min_n = min([len(d) for d in dpr])
            # samples with less than 3 data points cannot be tested
            if min_n < 3:
                print(f'min_n ({min_n}) < 3, Attempting testing a subset of the data.', end=' ')
                original_dpr = dpr
                dpr = [d for d in dpr if len(d)>3]
                if len(dpr) < 2:
                    print('There are not sufficient data points for testing')
                    break
                print('Elements skipped:', [pair for pair in lengths if pair[1] < 3], end='\n\t      ')
                min_n = min([len(d) for d in dpr])

            # one way t-test assumes homoscedasticity, we must check the samples have the same variance
            parametric = False
            if min_n > 99:
                print(f'min_n ({min_n}) > 100, Normality not essential, skipping test', end=' ')
                # same variances?
                levene_stat, levene_p = sp_stats.levene(*dpr)
                if levene_p > 0.05:
                    parametric = True
                    print('Fail to reject Homoscedasticity', end=' ')
                else:
                    print('Significantly different variances.', end=' ')

            elif min_n > 25:
                print(f'25 < min_n ({min_n}) < 100,', end='\n\t')
                if min([test_normality(d) for d in dpr]) < alpha:
                    print('\n\tAt least one distribution is significantly non-normal.', end=' ')
                else:
                    print('\n\tDistributions not significantly different from normal', end=' ')
                    # same variances?
                    levene_stat, levene_p = sp_stats.levene(*dpr)
                    if levene_p > 0.05:
                        parametric = True
                        print('Fail to reject Homoscedasticity', end=' ')
                    else:
                        print('Significantly different variances.', end=' ')
            else:
                print(f'3 < min_n ({min_n}) < 25,', end=' ')

            if parametric:
                print('\n\tParametric test.\n\t', sp_stats.f_oneway(*dpr))
            else:
                print('\n\tNon-parametric test.\n\t', sp_stats.kruskal(*dpr))

In [7]:
#differences_between_rois_in_same_class_same_region('ampulla')
differences_between_rois_in_same_class_same_region('isthmus')

 ROIs in same class in isthmus
Rois in narrow_end [1, 2, 3, 4, 5, 6]
    repeat 1) 25 < min_n (57) < 100,
	normality test p=0.018 (sig. non-normal) normality test p=1.303E-04 (sig. non-normal) normality test p=9.797E-11 (sig. non-normal) normality test p=5.120E-11 (sig. non-normal) normality test p=1.387E-06 (sig. non-normal) normality test p=5.085E-10 (sig. non-normal) 
	At least one distribution is significantly non-normal. 
	Non-parametric test.
	 KruskalResult(statistic=1.899771433374041, pvalue=0.8628325075141969)
    repeat 2) 25 < min_n (55) < 100,
	normality test p=0.002 (sig. non-normal) normality test p=0.008 (sig. non-normal) normality test p=4.262E-11 (sig. non-normal) normality test p=4.206E-08 (sig. non-normal) normality test p=4.642E-10 (sig. non-normal) normality test p=2.301E-06 (sig. non-normal) 
	At least one distribution is significantly non-normal. 
	Non-parametric test.
	 KruskalResult(statistic=6.638089815958316, pvalue=0.2489768895847032)
    repeat 3) 25 < min_

## Rois from the same cross-section in different classes, per repeat

In [65]:
# rois in different class
def differences_between_rois_in_different_class_same_region(section, alpha=0.05):
    """
    Test for significant differences between distinct ROI classes by using all
    the elements in each class per repeat.
    We use combinations of the classes to test them against the others
    """
    print('=='*20, '\n','ROIs in different class in', section)
    for roi_class_1, roi_class_2 in [p for p in combinations([t[0] for t in classes2['ordering']], 2)]:
        print('Rois in', roi_class_1, [roi for roi in classes2[section][roi_class_1]])
        print('Rois in', roi_class_2, [roi for roi in classes2[section][roi_class_2]])
        
        for rep in range(secs[section][1]):
            dpr1, dpr2 = [], []
            lengths1, lengths2 = [], []
            print('    repeat',rep+1, end=') ')
            for i_roi in classes2[section][roi_class_1]:
                d1 = v2b_data.query(f"section=='{section}' & replicate=={rep} & roi=={i_roi-1}")['radius_um']
                lengths1.append((i_roi, len(d1)))
                dpr1.append(d1)
            for i_roi in classes2[section][roi_class_2]:
                d2 = v2b_data.query(f"section=='{section}' & replicate=={rep} & roi=={i_roi-1}")['radius_um']
                lengths2.append((i_roi, len(d2)))
                dpr2.append(d2)

            # all the test must be done to both lists of data
            min_n1 = min([len(d) for d in dpr1])
            min_n2 = min([len(d) for d in dpr2])
            # samples with less than 3 data points cannot be tested
            if min_n1 < 3:
                print('min_n1 < 3, Attempting testing a subset of the data.', end=' ')
                original_dpr1 = dpr1
                dpr1 = [d for d in dpr1 if len(d)>3]
                print('Elements skipped:', [pair for pair in lengths1 if pair[1] < 3], end='\n\t      ')
                if len(dpr1) < 2:
                    print('There are not sufficient data points for testing')
                    break
                min_n1 = min([len(d) for d in dpr1])
            if min_n2 < 3:
                print('min_n2 < 3, Attempting testing a subset of the data.', end=' ')
                original_dpr2 = dpr2
                dpr2 = [d for d in dpr2 if len(d)>3]
                print('Elements skipped:', [pair for pair in lengths2 if pair[1] < 3], end='\n\t      ')
                if len(dpr2) < 2:
                    print('There are not sufficient data points for testing')
                    break
                min_n2 = min([len(d) for d in dpr2])

            # one way t-test assumes homoscedasticity, we must check the samples have the same variance
            parametric = False
            if min_n1 > 99 and min_n2 > 99:
                print(f'min_n1 ({min_n1}) > 100 and min_n2 ({min_n2}) > 99, Normality not highly important, skipping test', end=' ')
                # same variances?
                levene_stat, levene_p = sp_stats.levene(*dpr1, *dpr2)
                if levene_p > 0.05:
                    parametric = True
                    print('Fail to reject Homoscedasticity', end=' ')
                else:
                    print('Significantly different variances.', end=' ')
                    
            elif min_n1 > 25 and min_n2 > 25:
                print(f'25 < min_n1 ({min_n1}), min_n2 ({min_n2}) < 100,', end='\n\t')
                if min([test_normality(d) for d in dpr1 + dpr2]) < 0.05:
                    print('\n\tAt least one distribution is significantly non-normal.', end=' ')
                else:
                    print('\n\tDistributions not significantly different from normal', end=' ')
                    # same variances?
                    levene_stat, levene_p = sp_stats.levene(*dpr1, *dpr2)
                    if levene_p > 0.05:
                        parametric = True
                        print('Fail to reject Homoscedasticity', end=' ')
                    else:
                        print('Significantly different variances.', end=' ')
            else:
                print(f'3 < min_n ({min_n1}, {min_n2}) < 25,', end=' ')
            
            if parametric:
                res = sp_stats.f_oneway(*dpr1, *dpr2)
                print(f'\n\tParametric test (one-way ANOVA). {res[1]:.4f}', end=' ')
            else:
                res = sp_stats.kruskal(*dpr1, *dpr2)
                print(f'\n\tNon-parametric test (Kruskal-Wallis). {res[1]:.4f}', end=' ')
            print('N.S.' if res[1] > alpha else 'Significant differences. Reject H0')
            dprs = dpr1 + dpr2
            pooled_dev = np.sqrt(np.sum([(r.count()-1) * (r.mean()**2) for r in dprs]) / (np.sum([r.count() for r in dprs]) - len(dprs)))
            print('Pooled deviation', pooled_dev,'\n-------')

In [105]:
differences_between_rois_in_different_class_same_region('ampulla')
differences_between_rois_in_different_class_same_region('isthmus')

 ROIs in different class in ampulla
Rois in narrow_end [1, 2, 3, 4, 5, 6]
Rois in wide_end [7, 8, 9, 10, 11, 12]
    repeat 1) 3 < min_n (7, 4) < 25, 
	Non-parametric test (Kruskal-Wallis). 0.8359 N.S.
Pooled deviation 0.10735938491217885 
-------
    repeat 2) 3 < min_n (10, 3) < 25, 
	Non-parametric test (Kruskal-Wallis). 0.7159 N.S.
Pooled deviation 0.0989568228622625 
-------
    repeat 3) min_n1 < 3, Attempting testing a subset of the data. Elements skipped: [(1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0)]
	      There are not sufficient data points for testing
Rois in narrow_end [1, 2, 3, 4, 5, 6]
Rois in narrow_lumen [13, 14, 15, 16, 17, 18]
    repeat 1) 3 < min_n (7, 3) < 25, 
	Non-parametric test (Kruskal-Wallis). 0.7115 N.S.
Pooled deviation 0.10515278379152997 
-------
    repeat 2) 3 < min_n (10, 3) < 25, 
	Non-parametric test (Kruskal-Wallis). 0.2490 N.S.
Pooled deviation 0.09847183268701627 
-------
    repeat 3) min_n1 < 3, Attempting testing a subset of the data. Eleme

## Rois from the same cross-section in different classes, pooled across repeats

In [67]:
def differences_between_rois_in_different_class_same_region_pooled(section, alpha=0.05):
    """
    Test for significant differences between distinct ROI classes by 
    pooling the elements in each class across repeats
    Combinations of the classes are used to cross test the different classes.
    """
    print('=='*20, '\n','ROIs in different class in', section)
    for roi_class_1, roi_class_2 in [p for p in combinations([t[0] for t in classes2['ordering']], 2)]:
        print('Rois in', roi_class_1, [roi for roi in classes2[section][roi_class_1]])
        print('Rois in', roi_class_2, [roi for roi in classes2[section][roi_class_2]])
        
        #for rep in range(5):
        dpr1, dpr2 = [], []
        lengths1, lengths2 = [], []
        #print('    repeat',rep+1, end=') ')
        
        for i_roi in classes2[section][roi_class_1]:
            d1 = v2b_data.query(f"section=='{section}' & replicate in {[i for i in range(secs[section][1])]} & roi=={i_roi-1}")['radius_um']
            lengths1.append((i_roi, len(d1)))
            dpr1.append(d1)
        for i_roi in classes2[section][roi_class_2]:
            d2 = v2b_data.query(f"section=='{section}' & replicate in {[i for i in range(secs[section][1])]} & roi=={i_roi-1}")['radius_um']
            lengths2.append((i_roi, len(d2)))
            dpr2.append(d2)

        # all the test must be done to both lists of data
        min_n1 = min([len(d) for d in dpr1])
        min_n2 = min([len(d) for d in dpr2])
        # samples with less than 3 data points cannot be tested
        if min_n1 < 3:
            print('min_n1 < 3, Attempting testing a subset of the data.', end=' ')
            original_dpr1 = dpr1
            dpr1 = [d for d in dpr1 if len(d)>3]
            print('Elements skipped:', [pair for pair in lengths1 if pair[1] < 3], end='\n\t      ')
            if len(dpr1) < 2:
                print('There are not sufficient data points for testing')
                break
            min_n1 = min([len(d) for d in dpr1])
        if min_n2 < 3:
            print('min_n2 < 3, Attempting testing a subset of the data.', end=' ')
            original_dpr2 = dpr2
            dpr2 = [d for d in dpr2 if len(d)>3]
            print('Elements skipped:', [pair for pair in lengths2 if pair[1] < 3], end='\n\t      ')
            if len(dpr2) < 2:
                print('There are not sufficient data points for testing')
                break
            min_n2 = min([len(d) for d in dpr2])

        # one way t-test assumes homoscedasticity, we must check the samples have the same variance
        parametric = False
        if min_n1 > 99 and min_n2 > 99:
            print(f'min_n1 ({min_n1}) > 100 and min_n2 ({min_n2}) > 99, Normality not highly important, skipping test', end=' ')
            # same variances?
            levene_stat, levene_p = sp_stats.levene(*dpr1, *dpr2)
            if levene_p > 0.05:
                parametric = True
                print('Fail to reject Homoscedasticity', end=' ')
            else:
                print('Significantly different variances.', end=' ')

        elif min_n1 > 25 and min_n2 > 25:
            print(f'25 < min_n1 ({min_n1}), min_n2 ({min_n2}) < 100,', end='\n\t')
            if min([test_normality(d) for d in dpr1 + dpr2]) < 0.05:
                print('\n\tAt least one distribution is significantly non-normal.', end=' ')
            else:
                print('\n\tDistributions not significantly different from normal', end=' ')
                # same variances?
                levene_stat, levene_p = sp_stats.levene(*dpr1, *dpr2)
                if levene_p > 0.05:
                    parametric = True
                    print('Fail to reject Homoscedasticity', end=' ')
                else:
                    print('Significantly different variances.', end=' ')
        else:
            print(f'3 < min_n ({min_n1}, {min_n2}) < 25,', end=' ')

        if parametric:
            res = sp_stats.f_oneway(*dpr1, *dpr2)
            print(f'\n\tParametric test. {res[1]:.4f}', end=' ')
        else:
            res = sp_stats.kruskal(*dpr1, *dpr2)
            print(f'\n\tNon-parametric test. {res[1]:.4f}', end=' ')
        print('N.S.' if res[1] > alpha else 'Significant differences. Reject H0')
        dprs = dpr1 + dpr2
        pooled_dev = np.sqrt(np.sum([(r.count()-1) * (r.mean()**2) for r in dprs]) / (np.sum([r.count() for r in dprs]) - len(dprs)))
        print('Pooled deviation', pooled_dev,'\n-------')

In [106]:
differences_between_rois_in_different_class_same_region_pooled('ampulla')
differences_between_rois_in_different_class_same_region_pooled('isthmus')

 ROIs in different class in ampulla
Rois in narrow_end [1, 2, 3, 4, 5, 6]
Rois in wide_end [7, 8, 9, 10, 11, 12]
3 < min_n (17, 7) < 25, 
	Non-parametric test. 0.4290 N.S.
Pooled deviation 0.1033884766534306 
-------
Rois in narrow_end [1, 2, 3, 4, 5, 6]
Rois in narrow_lumen [13, 14, 15, 16, 17, 18]
3 < min_n (17, 6) < 25, 
	Non-parametric test. 0.3056 N.S.
Pooled deviation 0.10175214822001426 
-------
Rois in narrow_end [1, 2, 3, 4, 5, 6]
Rois in wide_lumen [19, 20, 21, 22, 23, 24]
min_n2 < 3, Attempting testing a subset of the data. Elements skipped: [(20, 1), (21, 2), (22, 1)]
	      3 < min_n (17, 4) < 25, 
	Non-parametric test. 0.4102 N.S.
Pooled deviation 0.10356211792759458 
-------
Rois in wide_end [7, 8, 9, 10, 11, 12]
Rois in narrow_lumen [13, 14, 15, 16, 17, 18]
3 < min_n (7, 6) < 25, 
	Non-parametric test. 0.4789 N.S.
Pooled deviation 0.09976783842028569 
-------
Rois in wide_end [7, 8, 9, 10, 11, 12]
Rois in wide_lumen [19, 20, 21, 22, 23, 24]
min_n2 < 3, Attempting testin

## Same class - different cross-section, per repeat

In [8]:
# rois in different class
def differences_between_rois_in_same_class_different_region(alpha=0.05):
    """
    Test for significant differences between distinct ROI classes by using all the elements in each class
    In order to test each class against the others, we need to use combinations of the classes
    """
    sections = ['isthmus', 'ampulla']
    print('=='*20, '\n','ROIs in different class in', sections)
    for class_name, class_colour in classes2['ordering']:
        rois_class_1 = [roi for roi in classes2[sections[0]][class_name]]
        rois_class_2 = [roi for roi in classes2[sections[1]][class_name]]
        print(f'Rois in {sections[0]}-{class_name}', rois_class_1)
        print(f'Rois in {sections[1]}-{class_name}', rois_class_2)
        
        min_rep = secs[sections[0]][1] if secs[sections[0]][1] < secs[sections[1]][1] else secs[sections[1]][1]
        for rep in range(min_rep):
            dpr1, dpr2 = [], []
            lengths1, lengths2 = [], []
            print('    repeat',rep+1, end=') ')
            for i_roi in rois_class_1:
                d1 = v2b_data.query(f"section=='{sections[0]}' & replicate=={rep} & roi=={i_roi-1}")['radius_um']
                lengths1.append((i_roi, len(d1)))
                dpr1.append(d1)
            for i_roi in rois_class_2:
                d2 = v2b_data.query(f"section=='{sections[1]}' & replicate=={rep} & roi=={i_roi-1}")['radius_um']
                lengths2.append((i_roi, len(d2)))
                dpr2.append(d2)

            # all the test must be done to both lists of data
            min_n1 = min([len(d) for d in dpr1])
            min_n2 = min([len(d) for d in dpr2])
            # samples with less than 3 data points cannot be tested
            if min_n1 < 3:
                print('min_n1 < 3, Attempting testing a subset of the data.', end=' ')
                original_dpr1 = dpr1
                dpr1 = [d for d in dpr1 if len(d)>3]
                print('Elements skipped:', [pair for pair in lengths1 if pair[1] < 3], end='\n\t      ')
                if len(dpr1) < 2:
                    print('There are not sufficient data points for testing')
                    print('-------')
                    continue
                min_n1 = min([len(d) for d in dpr1])
            if min_n2 < 3:
                print('min_n2 < 3, Attempting testing a subset of the data.', end=' ')
                original_dpr2 = dpr2
                dpr2 = [d for d in dpr2 if len(d)>3]
                print('Elements skipped:', [pair for pair in lengths2 if pair[1] < 3], end='\n\t      ')
                if len(dpr2) < 2:
                    print('There are not sufficient data points for testing')
                    print('-------')
                    continue
                min_n2 = min([len(d) for d in dpr2])

            # one way t-test assumes homoscedasticity, we must check the samples have the same variance
            parametric = False
            if min_n1 > 99 and min_n2 > 99:
                print(f'min_n1 ({min_n1}) > 100 and min_n2 ({min_n2}) > 99, Normality not highly important, skipping test', end=' ')
                # same variances?
                levene_stat, levene_p = sp_stats.levene(*dpr1, *dpr2)
                if levene_p > 0.05:
                    parametric = True
                    print('Fail to reject Homoscedasticity', end=' ')
                else:
                    print('Significantly different variances.', end=' ')
                    
            elif min_n1 > 25 and min_n2 > 25:
                print(f'25 < min_n1 ({min_n1}), min_n2 ({min_n2}) < 100,', end='\n\t')
                if min([test_normality(d) for d in dpr1 + dpr2]) < 0.05:
                    print('\n\tAt least one distribution is significantly non-normal.', end=' ')
                else:
                    print('\n\tDistributions not significantly different from normal', end=' ')
                    # same variances?
                    levene_stat, levene_p = sp_stats.levene(*dpr1, *dpr2)
                    if levene_p > 0.05:
                        parametric = True
                        print('Fail to reject Homoscedasticity', end=' ')
                    else:
                        print('Significantly different variances.', end=' ')
            else:
                print(f'3 < min_n ({min_n1}, {min_n2}) < 25,', end=' ')
            
            if parametric:
                res = sp_stats.f_oneway(*dpr1, *dpr2)
                print(f'\n\tParametric test. {res[1]:.4f}', end=' ')
            else:
                res = sp_stats.kruskal(*dpr1, *dpr2)
                print(f'\n\tNon-parametric test. {res[1]:.4f}', end=' ')
            print('N.S.' if res[1] > alpha else 'Significant differences. Reject H0')
            
            dprs = dpr1 + dpr2
            pooled_dev = np.sqrt(np.sum([(r.count()-1) * (r.mean()**2) for r in dprs]) / (np.sum([r.count() for r in dprs]) - len(dprs)))
            print('Pooled deviation', pooled_dev,'\n-------')

In [9]:
differences_between_rois_in_same_class_different_region()

 ROIs in different class in ['isthmus', 'ampulla']
Rois in isthmus-narrow_end [1, 2, 3, 4, 5, 6]
Rois in ampulla-narrow_end [1, 2, 3, 4, 5, 6]
    repeat 1) 3 < min_n (57, 10) < 25, 
	Non-parametric test. 0.9075 N.S.
Pooled deviation 0.10170999853732479 
-------
    repeat 2) 3 < min_n (55, 9) < 25, 
	Non-parametric test. 0.7465 N.S.
Pooled deviation 0.10268529284677572 
-------
    repeat 3) 3 < min_n (60, 12) < 25, 
	Non-parametric test. 0.6338 N.S.
Pooled deviation 0.10073822455335978 
-------
Rois in isthmus-wide_end [7, 8, 9]
Rois in ampulla-wide_end [7, 8, 9, 10, 11, 12]
    repeat 1) 3 < min_n (35, 4) < 25, 
	Non-parametric test. 0.4032 N.S.
Pooled deviation 0.10411699874737314 
-------
    repeat 2) 3 < min_n (41, 4) < 25, 
	Non-parametric test. 0.7406 N.S.
Pooled deviation 0.10334712369484578 
-------
    repeat 3) 3 < min_n (32, 7) < 25, 
	Non-parametric test. 0.1705 N.S.
Pooled deviation 0.1056506987376516 
-------
Rois in isthmus-narrow_lumen [10, 11, 12, 13, 14, 15]
Rois i

## Same class - different cross-section, pooled by repeats

In [92]:
# rois in different class
def differences_between_rois_in_same_class_different_region_pooled(alpha=0.05):
    """
    Test for significant differences between distinct ROI classes by using all the elements in each class
    In order to test each class against the others, we need to use combinations of the classes
    """
    sections = ['isthmus', 'ampulla']
    print('=='*20, '\n','ROIs in same class in', sections)
    for class_name, class_colour in classes2['ordering']:
        rois_class_1 = [roi for roi in classes2[sections[0]][class_name]]
        rois_class_2 = [roi for roi in classes2[sections[1]][class_name]]
        print(f'Rois in {sections[0]}-{class_name}', rois_class_1)
        print(f'Rois in {sections[1]}-{class_name}', rois_class_2)
        

        #for rep in range(5):
        dpr1, dpr2 = [], []
        lengths1, lengths2 = [], []

        for i_roi in rois_class_1:
            d1 = v2b_data.query(f"section=='{sections[0]}' & replicate in {[i for i in range(secs[sections[0]][1])]} & roi=={i_roi-1}")['radius_um']
            lengths1.append((i_roi, len(d1)))
            dpr1.append(d1)
        for i_roi in rois_class_2:
            d2 = v2b_data.query(f"section=='{sections[1]}' & replicate in {[i for i in range(secs[sections[1]][1])]} & roi=={i_roi-1}")['radius_um']
            lengths2.append((i_roi, len(d2)))
            dpr2.append(d2)

        # all the test must be done to both lists of data
        min_n1 = min([len(d) for d in dpr1])
        min_n2 = min([len(d) for d in dpr2])
        # samples with less than 3 data points cannot be tested
        if min_n1 < 3:
            print('min_n1 < 3, Attempting testing a subset of the data.', end=' ')
            original_dpr1 = dpr1
            dpr1 = [d for d in dpr1 if len(d)>3]
            print('Elements skipped:', [pair for pair in lengths1 if pair[1] < 3], end='\n\t      ')
            if len(dpr1) < 2:
                print('There are not sufficient data points for testing')
                print('-------')
                continue
            min_n1 = min([len(d) for d in dpr1])
        if min_n2 < 3:
            print('min_n2 < 3, Attempting testing a subset of the data.', end=' ')
            original_dpr2 = dpr2
            dpr2 = [d for d in dpr2 if len(d)>3]
            print('Elements skipped:', [pair for pair in lengths2 if pair[1] < 3], end='\n\t      ')
            if len(dpr2) < 2:
                print('There are not sufficient data points for testing')
                print('-------')
                continue
            min_n2 = min([len(d) for d in dpr2])

        # one way t-test assumes homoscedasticity, we must check the samples have the same variance
        parametric = False
        if min_n1 > 99 and min_n2 > 99:
            print(f'min_n1 ({min_n1}) > 100 and min_n2 ({min_n2}) > 99, Normality not highly important, skipping test', end=' ')
            # same variances?
            levene_stat, levene_p = sp_stats.levene(*dpr1, *dpr2)
            if levene_p > 0.05:
                parametric = True
                print('Fail to reject Homoscedasticity', end=' ')
            else:
                print('Significantly different variances.', end=' ')

        elif min_n1 > 25 and min_n2 > 25:
            print(f'25 < min_n1 ({min_n1}), min_n2 ({min_n2}) < 100,', end='\n\t')
            if min([test_normality(d) for d in dpr1 + dpr2]) < 0.05:
                print('\n\tAt least one distribution is significantly non-normal.', end=' ')
            else:
                print('\n\tDistributions not significantly different from normal', end=' ')
                # same variances?
                levene_stat, levene_p = sp_stats.levene(*dpr1, *dpr2)
                if levene_p > 0.05:
                    parametric = True
                    print('Fail to reject Homoscedasticity', end=' ')
                else:
                    print('Significantly different variances.', end=' ')
        else:
            print(f'3 < min_n ({min_n1},{min_n2}) < 25,', end=' ')

        if parametric:
            res = sp_stats.f_oneway(*dpr1, *dpr2)
            print(f'\n\tParametric test. {res[1]:.4f}', end=' ')
        else:
            res = sp_stats.kruskal(*dpr1, *dpr2)
            print(f'\n\tNon-parametric test. {res[1]:.4f}', end=' ')
        print('N.S.' if res[1] > alpha else 'Significant differences. Reject H0')
        
        dprs = dpr1 + dpr2
        pooled_dev = np.sqrt(np.sum([(r.count()-1) * (r.mean()**2) for r in dprs]) / (np.sum([r.count() for r in dprs]) - len(dprs)))
        print('Pooled deviation', pooled_dev,'-------')

In [108]:
differences_between_rois_in_same_class_different_region_pooled()

 ROIs in same class in ['isthmus', 'ampulla']
Rois in isthmus-narrow_end [1, 2, 3, 4, 5, 6]
Rois in ampulla-narrow_end [1, 2, 3, 4, 5, 6]
3 < min_n (160,17) < 25, 
	Non-parametric test. 0.7460 N.S.
Pooled deviation 0.1015466369661551 -------
Rois in isthmus-wide_end [7, 8, 9]
Rois in ampulla-wide_end [7, 8, 9, 10, 11, 12]
3 < min_n (110,7) < 25, 
	Non-parametric test. 0.6629 N.S.
Pooled deviation 0.10115568321026572 -------
Rois in isthmus-narrow_lumen [10, 11, 12, 13, 14, 15]
Rois in ampulla-narrow_lumen [13, 14, 15, 16, 17, 18]
3 < min_n (84,6) < 25, 
	Non-parametric test. 0.3961 N.S.
Pooled deviation 0.09919847147516875 -------
Rois in isthmus-wide_lumen [16, 17, 18, 19, 20, 21]
Rois in ampulla-wide_lumen [19, 20, 21, 22, 23, 24]
min_n2 < 3, Attempting testing a subset of the data. Elements skipped: [(20, 1), (21, 2), (22, 1)]
	      3 < min_n (43,4) < 25, 
	Non-parametric test. 0.0228 Significant differences. Reject H0
Pooled deviation 0.09545786089603209 -------


## Different class - different region, per repeat

In [90]:
# rois in different class
def differences_between_rois_in_different_class_different_region(sections = ['isthmus', 'ampulla'], alpha=0.05):
    """
    Test for significant differences between same ROI classes in distinct region by using all the elements in each class
    In order to test each class against the others
    """
    print('=='*20, '\n','ROIs in different class in sections', sections,'per repeat')
    
    for roi_class1, roi_class2 in [p for p in combinations([t[0] for t in classes2['ordering']], 2)]:
        # fetch all the ROis in class x in the section1
        # fetch all the ROis in class x in the section2
        # compare ROIs in both sections
        rois_class1 = [roi for roi in classes2[sections[0]][roi_class1]]
        rois_class2 = [roi for roi in classes2[sections[1]][roi_class2]]
        print(f'Rois in {sections[0]}-{roi_class1}', rois_class1)
        print(f'Rois in {sections[1]}-{roi_class2}', rois_class2)

        for rep in range(secs[sections[0]][1] if secs[sections[0]][1] < secs[sections[1]][1] else secs[sections[1]][1]):
            dpr1, dpr2 = [], []
            lengths1, lengths2 = [], []
            print('    repeat',rep+1, end=') ')
            for i_roi in rois_class1:
                d1 = v2b_data.query(f"section=='{sections[0]}' & replicate=={rep} & roi=={i_roi-1}")['radius_um']
                lengths1.append((i_roi, len(d1)))
                dpr1.append(d1)
            for i_roi in rois_class2:
                d2 = v2b_data.query(f"section=='{sections[1]}' & replicate=={rep} & roi=={i_roi-1}")['radius_um']
                lengths2.append((i_roi, len(d2)))
                dpr2.append(d2)

            # all the test must be done to both lists of data
            min_n1 = min([len(d) for d in dpr1])
            min_n2 = min([len(d) for d in dpr2])
            # samples with less than 3 data points cannot be tested
            if min_n1 < 3:
                print('min_n1 < 3, Attempting testing a subset of the data.', end=' ')
                original_dpr1 = dpr1
                dpr1 = [d for d in dpr1 if len(d)>3]
                print('Elements skipped:', [pair for pair in lengths1 if pair[1] < 3], end='\n\t      ')
                if len(dpr1) < 2:
                    print('There are not sufficient data points for testing')
                    print('-------')
                    continue
                min_n1 = min([len(d) for d in dpr1])
            if min_n2 < 3:
                print('min_n2 < 3, Attempting testing a subset of the data.', end=' ')
                original_dpr2 = dpr2
                dpr2 = [d for d in dpr2 if len(d)>3]
                print('Elements skipped:', [pair for pair in lengths2 if pair[1] < 3], end='\n\t      ')
                if len(dpr2) < 2:
                    print('There are not sufficient data points for testing')
                    print('-------')
                    continue
                min_n2 = min([len(d) for d in dpr2])

            # one way t-test assumes homoscedasticity, we must check the samples have the same variance
            parametric = False
            if min_n1 > 99 and min_n2 > 99:
                print(f'min_n1 ({min_n1}) > 100 and min_n2 ({min_n2}) > 99, Normality not highly important, skipping test', end=' ')
                # same variances?
                levene_stat, levene_p = sp_stats.levene(*dpr1, *dpr2)
                if levene_p > 0.05:
                    parametric = True
                    print('Fail to reject Homoscedasticity', end=' ')
                else:
                    print('Significantly different variances.', end=' ')
                    
            elif min_n1 > 25 and min_n2 > 25:
                print(f'25 < min_n1 ({min_n1}), min_n2 ({min_n2}) < 100,', end='\n\t')
                if min([test_normality(d) for d in dpr1 + dpr2]) < 0.05:
                    print('\n\tAt least one distribution is significantly non-normal.', end=' ')
                else:
                    print('\n\tDistributions not significantly different from normal', end=' ')
                    # same variances?
                    levene_stat, levene_p = sp_stats.levene(*dpr1, *dpr2)
                    if levene_p > 0.05:
                        parametric = True
                        print('Fail to reject Homoscedasticity', end=' ')
                    else:
                        print('Significantly different variances.', end=' ')
            else:
                print(f'3 < min_n ({min_n1}, {min_n2}) < 25,', end=' ')
            
            if parametric:
                res = sp_stats.f_oneway(*dpr1, *dpr2)
                print('\n\tParametric test.\n\t', res[1], end=' ')
            else:
                res = sp_stats.kruskal(*dpr1, *dpr2)
                print('\n\tNon-parametric test.\n\t', res[1], end=' ')
            print('N.S.' if res[1] > alpha else 'Significant differences. Reject H0')
            
            dprs = dpr1 + dpr2
            pooled_dev = np.sqrt(np.sum([(r.count()-1) * (r.mean()**2) for r in dprs]) / (np.sum([r.count() for r in dprs]) - len(dprs)))
            print('Pooled deviation', pooled_dev,'\n-------')

In [109]:
differences_between_rois_in_different_class_different_region()

 ROIs in different class in sections ['isthmus', 'ampulla'] per repeat
Rois in isthmus-narrow_end [1, 2, 3, 4, 5, 6]
Rois in ampulla-wide_end [7, 8, 9, 10, 11, 12]
    repeat 1) 3 < min_n (57, 4) < 25, 
	Non-parametric test.
	 0.27751930065604635 N.S.
Pooled deviation 0.10161156879451386 
-------
    repeat 2) 3 < min_n (46, 3) < 25, 
	Non-parametric test.
	 0.8667086679397221 N.S.
Pooled deviation 0.10152536329758484 
-------
    repeat 3) min_n2 < 3, Attempting testing a subset of the data. Elements skipped: [(7, 0), (8, 0), (9, 0), (10, 0), (11, 0), (12, 0)]
	      There are not sufficient data points for testing
-------
Rois in isthmus-narrow_end [1, 2, 3, 4, 5, 6]
Rois in ampulla-narrow_lumen [13, 14, 15, 16, 17, 18]
    repeat 1) 3 < min_n (57, 3) < 25, 
	Non-parametric test.
	 0.34083779296387984 N.S.
Pooled deviation 0.1011394484004768 
-------
    repeat 2) 3 < min_n (46, 3) < 25, 
	Non-parametric test.
	 0.3290168646137654 N.S.
Pooled deviation 0.10123119234907195 
-------
  

## Different class - different region, pooled

In [81]:
# rois in different class
def differences_between_rois_in_different_class_different_region_pooled(alpha=0.05):
    """
    Test for significant differences between same ROI classes in distinct region by using all the elements in each class
    In order to test each class against the others
    """
    sections = ['isthmus', 'ampulla']
    print('=='*20, '\n','ROIs in different class in sections', sections, 'pooled across repeats')
    
    for roi_class1, roi_class2 in [p for p in combinations([t[0] for t in classes2['ordering']], 2)]:
        # fetch all the ROis in class x in the isthmus
        # fetch all the ROis in class x in the ampulla
        # compare ROIs in both sectinons ysung 
        rois_class1 = [roi for roi in classes2[sections[0]][roi_class1]]
        rois_class2 = [roi for roi in classes2[sections[1]][roi_class2]]
        print(f'Rois in {sections[0]}-{roi_class1}', rois_class1)
        print(f'Rois in {sections[1]}-{roi_class2}', rois_class2)
        
        #for rep in range(5):
        dpr1, dpr2 = [], []
        lengths1, lengths2 = [], []

        min_rep = secs[sections[0]][1] if secs[sections[0]][1] < secs[sections[1]][1] else secs[sections[1]][1]
        for i_roi in rois_class1:
            d1 = v2b_data.query(f"section=='{sections[0]}' & replicate in {[i for i in range(min_rep)]} & roi=={i_roi-1}")['radius_um']
            lengths1.append((i_roi, len(d1)))
            dpr1.append(d1)
        for i_roi in rois_class2:
            d2 = v2b_data.query(f"section=='{sections[1]}' & replicate in {[i for i in range(min_rep)]} & roi=={i_roi-1}")['radius_um']
            lengths2.append((i_roi, len(d2)))
            dpr2.append(d2)

        # all the test must be done to both lists of data
        min_n1 = min([len(d) for d in dpr1])
        min_n2 = min([len(d) for d in dpr2])
        # samples with less than 3 data points cannot be tested
        if min_n1 < 3:
            print('min_n1 < 3, Attempting testing a subset of the data.', end=' ')
            original_dpr1 = dpr1
            dpr1 = [d for d in dpr1 if len(d)>3]
            print('Elements skipped:', [pair for pair in lengths1 if pair[1] < 3], end='\n\t      ')
            if len(dpr1) < 2:
                print('There are not sufficient data points for testing')
                print('-------')
                continue
            min_n1 = min([len(d) for d in dpr1])
        if min_n2 < 3:
            print('min_n2 < 3, Attempting testing a subset of the data.', end=' ')
            original_dpr2 = dpr2
            dpr2 = [d for d in dpr2 if len(d)>3]
            print('Elements skipped:', [pair for pair in lengths2 if pair[1] < 3], end='\n\t      ')
            if len(dpr2) < 2:
                print('There are not sufficient data points for testing')
                print('-------')
                continue
            min_n2 = min([len(d) for d in dpr2])

        # one way t-test assumes homoscedasticity, we must check the samples have the same variance
        parametric = False
        if min_n1 > 99 and min_n2 > 99:
            print(f'min_n1 ({min_n1}) > 100 and min_n2 ({min_n2}) > 99, Normality not highly important, skipping test', end=' ')
            # same variances?
            levene_stat, levene_p = sp_stats.levene(*dpr1, *dpr2)
            if levene_p > 0.05:
                parametric = True
                print('Fail to reject Homoscedasticity', end=' ')
            else:
                print('Significantly different variances.', end=' ')

        elif min_n1 > 25 and min_n2 > 25:
            print(f'25 < min_n1 ({min_n1}), min_n2 ({min_n2}) < 100,', end='\n\t')
            if min([test_normality(d) for d in dpr1 + dpr2]) < 0.05:
                print('\n\tAt least one distribution is significantly non-normal.', end=' ')
            else:
                print('\n\tDistributions not significantly different from normal', end=' ')
                # same variances?
                levene_stat, levene_p = sp_stats.levene(*dpr1, *dpr2)
                if levene_p > 0.05:
                    parametric = True
                    print('Fail to reject Homoscedasticity', end=' ')
                else:
                    print('Significantly different variances.', end=' ')
        else:
            print(f'3 < min_n ({min_n1}, {min_n2}) < 25,', end=' ')

        if parametric:
            res = sp_stats.f_oneway(*dpr1, *dpr2)
            print('\n\tParametric test.\n\t', res[1], end=' ')
        else:
            res = sp_stats.kruskal(*dpr1, *dpr2)
            print('\n\tNon-parametric test.\n\t', res[1], end=' ')
        print('N.S.' if res[1] > alpha else 'Significant differences. Reject H0')
        
        dprs = dpr1 + dpr2
        pooled_dev = np.sqrt(np.sum([(r.count()-1) * (r.mean()**2) for r in dprs]) / (np.sum([r.count() for r in dprs]) - len(dprs)))
        print('Pooled deviation', pooled_dev,'\n-------')

In [110]:
differences_between_rois_in_different_class_different_region_pooled()

 ROIs in different class in sections ['isthmus', 'ampulla'] pooled across repeats
Rois in isthmus-narrow_end [1, 2, 3, 4, 5, 6]
Rois in ampulla-wide_end [7, 8, 9, 10, 11, 12]
3 < min_n (160, 7) < 25, 
	Non-parametric test.
	 0.8966913646102965 N.S.
Pooled deviation 0.10125968217008907 
-------
Rois in isthmus-narrow_end [1, 2, 3, 4, 5, 6]
Rois in ampulla-narrow_lumen [13, 14, 15, 16, 17, 18]
3 < min_n (160, 6) < 25, 
	Non-parametric test.
	 0.7979406458495825 N.S.
Pooled deviation 0.10098086641248166 
-------
Rois in isthmus-narrow_end [1, 2, 3, 4, 5, 6]
Rois in ampulla-wide_lumen [19, 20, 21, 22, 23, 24]
min_n2 < 3, Attempting testing a subset of the data. Elements skipped: [(20, 1), (21, 2), (22, 1)]
	      3 < min_n (160, 4) < 25, 
	Non-parametric test.
	 0.9661114067855778 N.S.
Pooled deviation 0.10118772861964512 
-------
Rois in isthmus-wide_end [7, 8, 9]
Rois in ampulla-narrow_lumen [13, 14, 15, 16, 17, 18]
3 < min_n (110, 6) < 25, 
	Non-parametric test.
	 0.5450632631895989 N.S