# Boxplots of individual features

Create boxplots of features from confocal overviews

In [None]:
from pathlib import Path

import pandas as pd
import seaborn as sns
import numpy as np
from matplotlib import colors as mcolors
from matplotlib import pyplot as plt
import pingouin as pg

In [None]:
## 1) read feature tables

# filenames
csv_feats_texture = 'data/texture_feats.csv'
csv_feats_other = 'data/other_feats.csv'
exp_overview_csv = 'data/auto_sir_experiment_overview.csv'

# join texture and other features
df = pd.read_csv(csv_feats_texture).set_index(['file', 'label'])
df = df.merge(pd.read_csv(csv_feats_other), on=['file', 'label'])

# get experiment overview table (replicates, conditions, etc.), join as well
df_exp_overview = pd.read_csv(exp_overview_csv, sep=';')[['file', 'treatment', 'replicate_technical', 'replicate_biological', 'overlapping_tiles']]
df['file_stem'] = df.reset_index().file.apply(lambda f: Path(f).stem)
df = df.merge(df_exp_overview, left_on='file_stem', right_on='file', suffixes=(None, '_duplicate') )

# interpret replicate numbers as strings/categorical
df['replicate_technical'] = df['replicate_technical'].apply(str)
df['replicate_biological'] = df['replicate_biological'].apply(str)

df['treatment_icm_grouped'] = df.treatment.str.split('_').str[-1]
df['replicate_biological_with_treat'] = df['treatment'] + '_' + df['replicate_biological']

# add column grouping 3d icm and 6&9d icm separately 
treatment_map = {
    'old': 'old',
    'young': 'young',
    '3d_icm': '3d_icm',
    '6d_icm': '6d_9d_icm',
    '9d_icm': '6d_9d_icm',
}
df['treatment_icm_grouped_2'] = df.treatment.apply(lambda t: treatment_map[t])

# remove non-overlapping overviews (run 3f2f7d32d280ce05293143834aa15a08)
df = df[df.overlapping_tiles]

df.info()

In [None]:
## 2) plot boxplot of selected features

# condition order and colors
order = ['young', 'old', '3d_icm', '6d_9d_icm']
palette_d = {
    'old': np.array([117, 109, 169])/255,
    'young': mcolors.hex2color(mcolors.XKCD_COLORS['xkcd:light orange']),
    '3d_icm': mcolors.hex2color(mcolors.XKCD_COLORS['xkcd:sky blue']),
    '6d_9d_icm': np.array([148, 212, 220])/255
}
palette = [palette_d[condition] for condition in order]

# what to plot & plot parameters
to_plot = (
    {'feature':'other_area', 'logy':False, 'title':'Cell area', 'ylabel':'area[px^2]'},
    {'feature':'other_eccentricity', 'logy':False, 'title':'Cell eccentricity', 'ylabel':'eccentricity'},
    {'feature':'tex_energy_4_0.0', 'logy':True, 'title':'Energy of GLCM\nat 4px distance along x-axis', 'ylabel':'GLCM Energy (log scale)'},
    {'feature':'tex_dissimilarity_2_90.0', 'logy':False, 'title':'Dissimilarity of GLCM\nat 2px distance along y-axis', 'ylabel':'GLCM Dissimilarity'},
)

siz = 5
fig, axs = plt.subplots(ncols=len(to_plot), figsize=(siz * len(to_plot), siz))

for args, ax in zip(to_plot, axs):

    feature, log_y, title, ylab = args.values()

    # drop lowest and highest .5% of data to make nicer plot
    ql, qu = np.quantile(df[feature], (0.005, 0.995))
    df_plot = df[ (df[feature] > ql) & (df[feature] < qu) ]

    sns.boxplot(ax=ax, data=df_plot, y=feature, x='treatment_icm_grouped_2', hue='treatment_icm_grouped_2', log_scale=(False, log_y),  order=order, palette=palette,
                flierprops = {'marker': '+', 'markersize': 5, 'alpha':0.2})

    ax.set_xlabel('Condition')
    ax.set_ylabel(ylab)
    ax.set_title(title)

fig.tight_layout()

# fig.savefig('example_feature_boxplots.pdf')

In [None]:
## 3) do pairwise significance tests for plots above

res = []
for args in to_plot:
    feature, log_y, title, ylab = args.values()

    # does Mann-Whitney-U test (parametric=False), with Bonferroni-Holm correction (padjust='holm')
    df_significance_i = pg.pairwise_tests(data=df, dv=feature, between='treatment_icm_grouped_2', parametric=False, padjust='holm')
    # add feature that was compared to result table
    df_significance_i['feature'] = feature
    res.append(df_significance_i)

# combine results into one table, show
df_significance = pd.concat(res)
df_significance