In [1]:
import pandas as pd
import numpy as np
import scipy.stats as ss
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from PIL import Image
from decimal import Decimal
import statsmodels.api as sa
import scikit_posthocs as sp

Post-processing histograms output by detect.py: (1) normalize (2) threshold (3) generate means

In [None]:
dir_of_images = 'path/to/directory/with/sobel/filtered/histograms'
other_dir_of_images = 'likely/more/than/one/path'

titration_ls = [dir_of_images, other_dir_of_images]
container = []
for element in titration_ls:
    bead_container = []
    for file in Path(element).glob("**/*[0-9].csv"):
        csv = pd.read_csv(file,header=None)
        trunc = csv[:100] #threshold your histogram appropriately
        n_pix = csv.sum(axis = 0) #prep to normalize the histogram
        h = trunc / n_pix[0] #sum(h) equals 1
        i = np.array(trunc[0].index) #i goes from 0 to 100 (arbitrary threshold)
        pre_mu = []
        for j in range(len(h)):
            elem_pair = h[0][j]*i[j]
            pre_mu.append(elem_pair)
        mu = np.sum(pre_mu) #mean
        bead_container.append(mu) #all of the beads for a specific population
    container.append(bead_container) #all of the beads for all populations. each pop an element of list

Plotting 2 conditions (positive and negative). If there is a directory for each condition, this visualization can be expanded. Bar chart works best for two conditions; categorical scatter works best for a titration.

In [None]:
plt.bar(x='positive condition',height=np.array(container[0]).mean(),yerr = np.array(container[0]).std(), width=0.5, color='tab:orange')
plt.bar(x='negative condition',height=np.array(container[1]).mean(),yerr = np.array(container[1]).std(),width=0.5,color='b')
plt.ylabel('Mean Pixel Value (Sobel Filtered Bead)')
plt.xticks(rotation=45)

Determining statistical significance between condition populations:

In [None]:
d = {'condition 1': container[0], 'condition 2': container[1]} #create a dictionary to organize the list of lists

df = pd.concat([pd.Series(v, name=k) for k, v in d.items()], axis=1) #convert the lists to a df
pops = df.stack().reset_index() #stack the conditions to create one big population of conditions

stats_df = pd.DataFrame() #this is the object we will run statistical tests on
stats_df['condition'] = pops['level_1']
stats_df['means'] = pops[0]

Are your conditions different? Try the non-parametric one-way ANOVA (aka Kruskal-Wallis test)

In [None]:
ls = []
for element in container:
    ls.append(np.asarray(element)) #elements need to be numpy arrays for this test

In [None]:
H, p = ss.kruskal(*ls) #p will suggest to us: "yes, these conditions are meaningfully distinct" or "no they are not"

In [None]:
p #check p

If conditions are different, it's good to follow up with a post-hoc test. Here we choose Conover's test.

In [None]:
sp.posthoc_conover(stats_df, val_col='means', group_col='dilution', p_adjust = 'holm') #take a look at pairwise comparisons!