In [1]:
import ipywidgets as widgets
import pandas as pd
from IPython.core.display import display
from IPython.display import clear_output
from ipyfilechooser import FileChooser
from scipy import stats
import numpy as np
import statsmodels.api as sm
from statsmodels.graphics.gofplots import qqplot_2samples
import matplotlib.pyplot as plt
wt = FileChooser('C:/Users/LAB-ADMIN/Desktop/pipeline-program/analysis_results')
wt.filter_pattern = ['wt_*.csv']
wt.title = "WT distribution"
wt_dist = None

output = widgets.Output()
voxel_size = 7.536e-6

def wt_selection(chooser):
    global wt_dist
    wt_dist = pd.read_csv(chooser.selected, index_col=False).iloc[:,1:] # drop index col
    ncr1_dist = pd.read_csv(f"{chooser.selected_path}/ncr1{chooser.selected_filename[2:]}", index_col=False).iloc[:,1:]
    npc2_dist = pd.read_csv(f"{chooser.selected_path}/npc2{chooser.selected_filename[2:]}", index_col=False).iloc[:,1:]
    with output:
        clear_output()
        display(f"Statistics for {chooser.selected_filename}...")
        # get pure number arrays
        wt_array = np.array(wt_dist)[:,0] * voxel_size
        ncr1_array = np.array(ncr1_dist)[:,0] * voxel_size
        npc2_array = np.array(npc2_dist)[:,0] * voxel_size
        # KDE
        wt_kde = stats.gaussian_kde(wt_array)        
        ncr1_kde = stats.gaussian_kde(ncr1_array)
        npc2_kde = stats.gaussian_kde(npc2_array)
        # get x values to plot
        min_x = min(wt_array.min(),ncr1_array.min(),npc2_array.min())
        max_x = max(wt_array.max(),ncr1_array.max(),npc2_array.max())
        dif = max_x - min_x
        xx = np.linspace(min_x - dif/10,max_x + dif/10 , 10000)
        # plot each KDE
        fig, ax = plt.subplots(figsize=(9,7))
        ax.plot(xx, wt_kde(xx), label=f"WT n={len(wt_array)}")
        ax.plot(xx, ncr1_kde(xx), label=f"NCR1 n={len(ncr1_array)}")
        ax.plot(xx, npc2_kde(xx), label=f"NPC2 n={len(npc2_array)}")
        ax.set_xlabel('volume µm³')
        ax.set_ylabel('density')
        #ax.set_yscale("log")
        ax.legend()
        plt.title(title_selector.value)
        plt.show()

        # Mann-Whitney U-Test  
        U, p = stats.mannwhitneyu(wt_array, ncr1_array, method="exact")
        display(f"Mann-Whitney U-Test (NCR1) - U: {U} p-value: {p}")
        U, p = stats.mannwhitneyu(wt_array, npc2_array, method="exact")
        display(f"Mann-Whitney U-Test (NPC2) - U: {U} p-value: {p}")
        # Kolmogorov–Smirnov test
        KS, p = stats.kstest(ncr1_array,wt_array)
        display(f"Kolmogorov–Smirnov test (NCR1) - KS: {KS} p-value: {p}")
        KS, p = stats.kstest(npc2_array,wt_array)
        display(f"Kolmogorov–Smirnov test (NPC2) - KS: {KS} p-value: {p}")        
        # Q-Q plot
        display("Q-Q plot (NCR1)")
        qqplot_2samples(wt_array, ncr1_array, xlabel="Quantiles of WT", ylabel=f"Quantiles of NCR1", line="45")
        plt.show()
        display("Q-Q plot (NPC2)")
        qqplot_2samples(wt_array, npc2_array, xlabel="Quantiles of WT", ylabel=f"Quantiles of NPC2", line="45")        
        #fig = sm.qqplot(cmp_array, kde_dist, line="45")
        plt.show()


wt.register_callback(wt_selection)

title_selector = widgets.Text(value='', placeholder='Title to use in plot', description='')

display(wt)
display(title_selector)
display(output)

FileChooser(path='C:\Users\LAB-ADMIN\Desktop\pipeline-program\analysis_results', filename='', title='WT distri…

Text(value='', placeholder='Title to use in plot')

Output()