In [None]:
from pathlib import Path

import numpy as np
import seaborn as sns
import pandas as pd
from matplotlib import pyplot as plt

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from openTSNE import TSNE as oTSNE

# 1) Embedding features from STED data

In [None]:
## CSV file with features
# features from intensity normaized per image
feature_csv = '/scratch/hoerl/auto_sir_dna_comp/20220829_glcm_good95_imagenorm.csv'
# alternative: intensity normalized per replicate
# feature_csv = '/scratch/hoerl/auto_sir_dna_comp/20220829_glcm_good95_replicatenorm.csv'

## CSV with experimental conditions
exp_overview_csv = '/scratch/hoerl/auto_sir_experiment_overview.csv'

# load the feature csv
df = pd.read_csv(feature_csv)

# get file stem for merge with standardized replicate names
df['file_stem'] = df.filename.apply(lambda f: Path(f).stem)

# remove one outlying replicate (looks blurry -> air bubble/dirty objective?)
df = df[~df[['cell_class', 'replicate']].isin([('IMR90_young_untreated', '20200705_rep2')]).all(axis=1)]

# read and add standardized replicate/condition info
df_exp_overview = pd.read_csv(exp_overview_csv, sep=';')[['file', 'treatment', 'replicate_technical', 'replicate_biological', 'overlapping_tiles']]
df_exp_overview['replicate_technical'] = df_exp_overview['replicate_technical'].apply(str)
df_exp_overview['replicate_biological'] = df_exp_overview['replicate_biological'].apply(str)

df = df.merge(df_exp_overview, left_on='file_stem', right_on='file', suffixes=(None, '_duplicate') )

# auxillary columns for grouping
df['treatment_icm_grouped'] = df.treatment.str.split('_').str[-1]
df['replicate_biological_with_treat'] = df['treatment'] + '_' + df['replicate_biological']

In [None]:
# quick counts
df.groupby(['treatment', 'replicate_biological']).size()

In [None]:
## get features

# drop non-features and auxillary features
columns_to_drop = [
    'file', 'file_stem', 'treatment', 'replicate_technical', 'replicate_biological', 'overlapping_tiles',
    'dataset_name', 'filename', 'replicate', 'cell_class', 'condition',
    'treatment_icm_grouped', 'replicate_biological_with_treat',
    'classification_manual', 'classification_auto',
    'img_height', 'img_width', 
    #  'mask_area',
    'num_blank_rows', 'num_blank_cols',
    #  'intensity_mu', 'intensity_sigma', 
    'perc_high', 'perc_low', 'fg_mean',
    'perc_high_image', 'perc_low_image'
] 

df_feats = df.drop(columns=columns_to_drop)
tex_values = df_feats.values

# we have NaNs -> impute
tex_values = SimpleImputer().fit_transform(tex_values)

# normalize features
scaler = StandardScaler()
tex_values = scaler.fit_transform(tex_values)

In [None]:
# calculate tSNE
ts = oTSNE(perplexity=200, n_jobs=-1).fit(tex_values).transform(tex_values)

df[['embedding_comp0', 'embedding_comp1']] = ts

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(28,8))

sns.set_palette('deep')

# group_col = 'treatment'
# hue_col = 'replicate_biological'

group_col = 'treatment_icm_grouped'
hue_col = 'replicate_biological_with_treat'


for ax, (i, dfi) in zip(axs, df.groupby(group_col)):

    df_others = df[df[group_col] != i]

    sns.scatterplot(ax=ax, data=df_others, x='embedding_comp0', y='embedding_comp1', color='lightgray', alpha=1, s=25)
    # sns.scatterplot(ax=ax, data=dfi.reset_index().sample(frac=1), x='embedding_comp0', y='embedding_comp1', color='firebrick', alpha=0.6, s=25)
    sns.scatterplot(ax=ax, data=dfi, x='embedding_comp0', y='embedding_comp1', hue=hue_col, alpha=1, s=25)
    ax.set_title(f'{i} ({len(dfi)} cells)')
    ax.set_xlabel('t-SNE component 1')
    ax.set_ylabel('t-SNE component 2')

plt.rc('pdf', fonttype='42')
fig.savefig('/home/hoerl/ageing_dna_texture_figure_parts/tsne_per-treatment_sted.pdf')

### Plot example images from parts of embedding

In [None]:
import h5py as h5
from skimage.exposure import rescale_intensity

def load_single_image_from_h5(filename, dataset_name, norm_quantiles):
    with h5.File(filename, 'r') as fd:
        img = fd[f'/experiment/{dataset_name}/0/0'][...].squeeze()
    return rescale_intensity(img.astype(np.float32), in_range=norm_quantiles)


def plot_examples_sted(df, selection_pos, selection_size, n_imgs_to_sample = 2, plot=True):
    selection = ((df[['embedding_comp0', 'embedding_comp1']] < (np.array(selection_pos) + np.array(selection_size))).all(axis=1) & 
                (df[['embedding_comp0', 'embedding_comp1']] > np.array(selection_pos)).all(axis=1))

    df_selection = df[selection]  

    imgs = []
    sample = df_selection[['filename', 'dataset_name', 'perc_low', 'perc_high', 'treatment', 'replicate_biological', 'replicate_technical']].sample(n_imgs_to_sample)
    for _, (filename, dataset_name, perc_low, perc_high, _, _, _) in sample.iterrows():
        imgs.append(load_single_image_from_h5(filename, dataset_name, (perc_low, perc_high)))

    if plot:
        fig, axs = plt.subplots(ncols=n_imgs_to_sample, figsize=(12,6))
        for ax, img_ in zip(axs.flat, imgs):
            ax.imshow(img_, cmap='gray')
            ax.axis('off')
        fig.tight_layout()

    return imgs, sample

In [None]:
from matplotlib.patches import Rectangle

# MANUAL selection, alternatively define in napari, see below
selection_positions = [
    (3, -20), # treat 1
    (20, 5), # treat 2
    (-5, -8), # young 2
    (-25, 5) # old grainy
]
selection_sizes = [(7, 7)] * len(selection_positions) 

# select single index to plot
idx = 1
selection_pos = selection_positions[idx]
selection_size = selection_sizes[idx]

def scatterplot_with_rectangle(df, selection_pos, selection_size):
    _, ax = plt.subplots(figsize=(8,8))
    sns.scatterplot(ax=ax, data=df, x='embedding_comp0', y='embedding_comp1', color='lightgray', alpha=1, s=25)

    rec = Rectangle(selection_pos, *selection_size, fill=None, color='red')
    ax.add_artist(rec)

scatterplot_with_rectangle(df, selection_pos, selection_size)
plot_examples_sted(df, selection_pos, selection_size);

### Save examples and tSNE with rectangle for all selections

In [None]:
from tifffile import imwrite

base_out_path = Path('/home/hoerl/ageing_dna_texture_figure_parts/')

for i, (selection_pos, selection_size) in enumerate(zip(selection_positions, selection_sizes)):
    
    outpath_i = base_out_path / f'sted_examples_selection_{i}'
    outpath_i.mkdir(exist_ok=True)

    scatterplot_with_rectangle(df, selection_pos, selection_size)
    
    plt.rc('pdf', fonttype='42')
    plt.savefig(outpath_i / f'tsne_selection{i}_sted.pdf')

    sample_imgs, df_sample = plot_examples_sted(df, selection_pos, selection_size, n_imgs_to_sample=14, plot=False)

    for (_, r), c in zip(df_sample.iterrows(), sample_imgs):
        fname = f'{Path(r.filename).stem}_{r.dataset_name}_{r.treatment}_{r.replicate_biological}_{r.replicate_technical}.tif'
        imwrite(outpath_i / fname, c, imagej=True)

# 2) Embedding of features from overview data

In [None]:
from pathlib import Path

## 1) read both texture and other feature tables, join

# NOTE: select global norm texture feats or per cell normalized
csv_feats_texture = '/scratch/hoerl/20230507_imr90_stitching/20230507_imr90_ov_stitch_output/segmentation_cellpose_maxproj_d70_ft07/texture_feats.csv'
# csv_feats_texture = '/scratch/hoerl/20230507_imr90_stitching/20230507_imr90_ov_stitch_output/segmentation_cellpose_maxproj_d70_ft07/texture_feats_normalized_per_cell.csv'
csv_feats_other = '/scratch/hoerl/20230507_imr90_stitching/20230507_imr90_ov_stitch_output/segmentation_cellpose_maxproj_d70_ft07/other_feats.csv'

## Spinning disk data (not used atm)
# csv_feats_texture = '/Users/david/Desktop/IMR90_30112022/segmentation_cellpose_d120_ft06/texture_feats.csv'
# csv_feats_texture = '/Users/david/Desktop/IMR90_30112022/segmentation_cellpose_d120_ft06/texture_feats_normalized_per_cell.csv'
# csv_feats_other = '/Users/david/Desktop/IMR90_30112022/segmentation_cellpose_d120_ft06/other_feats.csv'

df = pd.read_csv(csv_feats_texture).set_index(['file', 'label'])
df = df.merge(pd.read_csv(csv_feats_other), on=['file', 'label'])

df['file_stem'] = df.reset_index().file.apply(lambda f: Path(f).stem)


In [None]:
exp_overview_csv = '/scratch/hoerl/auto_sir_experiment_overview.csv'
df_exp_overview = pd.read_csv(exp_overview_csv, sep=';')[['file', 'treatment', 'replicate_technical', 'replicate_biological', 'overlapping_tiles']]
df_exp_overview['replicate_technical'] = df_exp_overview['replicate_technical'].apply(str)
df_exp_overview['replicate_biological'] = df_exp_overview['replicate_biological'].apply(str)

df = df.merge(df_exp_overview, left_on='file_stem', right_on='file', suffixes=(None, '_duplicate') )

# remove non-overlapping overviews (run 3f2f7d32d280ce05293143834aa15a08)
df = df[df.overlapping_tiles]

In [None]:
df['treatment_icm_grouped'] = df.treatment.str.split('_').str[-1]
df['replicate_biological_with_treat'] = df['treatment'] + '_' + df['replicate_biological']

In [None]:
df['replicate_technical'] = df['replicate_technical'].apply(str)
df['replicate_biological'] = df['replicate_biological'].apply(str)

In [None]:
# set index again
df = df.set_index(['file', 'label'])

df

In [None]:
## 2) prepare features

# get all columns that start with 'tex' --> texture features
tex_values = df[[c for c in df.columns if c.startswith('tex')] + ['other_mean_intensity', 'other_area', 'other_eccentricity']].values

# we have NaNs -> impute
# probably not necessary here, but copied from above anyway
tex_values = SimpleImputer().fit_transform(tex_values)

# normalize features
scaler = StandardScaler()
tex_values = scaler.fit_transform(tex_values)

In [None]:
# calculate tSNE

ts = oTSNE(perplexity=100, n_jobs=-1).fit(tex_values).transform(tex_values)

df[['embedding_comp0', 'embedding_comp1']] = ts

In [None]:
fig, axs = plt.subplots(ncols=3, figsize=(28,8))

sns.set_palette('deep')

# group_col = 'treatment'
# hue_col = 'replicate_biological'

group_col = 'treatment_icm_grouped'
hue_col = 'replicate_biological_with_treat'


for ax, (i, dfi) in zip(axs, df.groupby(group_col)):

    df_others = df[df[group_col] != i]

    sns.scatterplot(ax=ax, data=df_others, x='embedding_comp0', y='embedding_comp1', color='lightgray', alpha=1, s=25)
    # sns.scatterplot(ax=ax, data=dfi.reset_index().sample(frac=1), x='embedding_comp0', y='embedding_comp1', color='firebrick', alpha=0.6, s=25)
    sns.scatterplot(ax=ax, data=dfi, x='embedding_comp0', y='embedding_comp1', hue=hue_col, alpha=1, s=25)
    ax.set_title(f'{i} ({len(dfi)} cells)')
    ax.set_xlabel('t-SNE component 1')
    ax.set_ylabel('t-SNE component 2')

# save
plt.rc('pdf', fonttype='42')
# fig.savefig('/home/hoerl/ageing_dna_texture_figure_parts/tsne_per-treatment_confocal.pdf')

### Plots split into young / old / 3d / 6d&9d

In [None]:
# add column grouping 3d icm and 6&9d icm separately 
treatment_map = {
    'old': 'old',
    'young': 'young',
    '3d_icm': '3d_icm',
    '6d_icm': '6d_9d_icm',
    '9d_icm': '6d_9d_icm',
}
df['treatment_icm_grouped_2'] = df.treatment.apply(lambda t: treatment_map[t])

# check
df['treatment_icm_grouped_2'].unique()

In [None]:
import colorsys
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import colors as mcolors

def get_colors_as_img(rgb_colors, patch_size=(50, 50), conc_axis=1):
    patches = []
    for c in rgb_colors:
        patches.append(np.zeros(patch_size + (3,)) + np.array(c))
    img = np.concatenate(patches, conc_axis)
    return img

def get_l_shades(rgb_color, n=5, extent=0.4, min_l=0.2, max_l=1):
    if max_l - min_l < extent:
        raise ValueError(f'luminance extent {extent} is larger than max {max_l} - min {min_l} difference.')
    h, l, s = colorsys.rgb_to_hls(*rgb_color)

    l_shades = [colorsys.hls_to_rgb(h, lp, s) for lp in np.linspace(max(min_l, l - extent/2), min(max_l, l + extent/2), n)]
    return l_shades

palettes = {
    # 'old': get_l_shades(mcolors.hex2color(mcolors.XKCD_COLORS['xkcd:blue purple']), n=3, min_l=0.4, extent=0.5),
    'old': get_l_shades(np.array([117, 109, 169])/255, n=3, min_l=0.5, extent=0.3),
    'young': get_l_shades(mcolors.hex2color(mcolors.XKCD_COLORS['xkcd:light orange']), n=3, min_l=0.5, extent=0.4),
    '3d_icm': get_l_shades(mcolors.hex2color(mcolors.XKCD_COLORS['xkcd:sky blue']), n=2, max_l=0.7, extent=0.39),
    # '6d_9d_icm': get_l_shades(mcolors.hex2color(mcolors.XKCD_COLORS['xkcd:sky blue']), n=5, extent=0.6, min_l=0, max_l=0.7)
    '6d_9d_icm': get_l_shades(np.array([148, 212, 220])/255, n=5, extent=0.6, min_l=0, max_l=0.7)
}

for k, v in palettes.items():
    fig, ax = plt.subplots()
    ax.imshow(get_colors_as_img(v))
    ax.set_title(k)


In [None]:
group_col = 'treatment_icm_grouped_2'
hue_col = 'replicate_biological_with_treat'

for (i, dfi) in df.groupby(group_col):
    
    dfi = dfi.sort_values(hue_col)

    # get all cells not in current group
    df_others = df[df[group_col] != i]

    fig, ax = plt.subplots(figsize=(8,8))

    # plot others in gray
    sns.scatterplot(ax=ax, data=df_others, x='embedding_comp0', y='embedding_comp1', color='lightgray', alpha=1, s=25, label='others')
    
    # plot group in color
    sns.scatterplot(ax=ax, data=dfi, x='embedding_comp0', y='embedding_comp1', palette=reversed(palettes[i]), hue=hue_col, alpha=1, s=25)
    ax.set_title(f'{i} ({len(dfi)} cells)')
    ax.set_xlabel('t-SNE component 1')
    ax.set_ylabel('t-SNE component 2')

    # save
    plt.rc('pdf', fonttype='42')
    fig.savefig(f'/home/hoerl/ageing_dna_texture_figure_parts/tsne_per-treatment_confocal_v2_{i}.pdf')



### functions for extracting examples from part of embedding

In [None]:
from nd2reader import ND2Reader
from pathlib import Path
from tifffile import memmap
from scipy.ndimage import gaussian_filter


def plot_examples_nd2(df, selection_pos, selection_size, base_path, n_imgs_to_sample = 6, plot=True):
    selection = ((df[['embedding_comp0', 'embedding_comp1']] < (np.array(selection_pos) + np.array(selection_size))).all(axis=1) & 
                (df[['embedding_comp0', 'embedding_comp1']] > np.array(selection_pos)).all(axis=1))

    df_selection = df[selection]
    df_sample = df_selection.sample(n_imgs_to_sample)

    sample_cuts = []
    for (file, lab), props in df_sample.iterrows():
        with ND2Reader(str(base_path / file)) as reader:
            y0, x0, y1, x1 = props[[f'other_bbox-{i}' for i in range(4)]].values.astype(int)
            img_ = np.array(reader[0][y0:y1, x0:x1])
            sample_cuts.append(img_)

    if plot:
        fig, axs = plt.subplots(ncols=3, nrows=2, figsize=(12,9))
        for ax, img_ in zip(axs.flat, sample_cuts):
            ax.imshow(img_, cmap='gray')
            ax.axis('off')
        fig.tight_layout()

    return sample_cuts, df_sample


def plot_examples_tiffstack(df, selection_pos, selection_size, base_path, n_imgs_to_sample = 3, plot=True, pad_bbox=0):
    selection = ((df[['embedding_comp0', 'embedding_comp1']] < (np.array(selection_pos) + np.array(selection_size))).all(axis=1) & 
                (df[['embedding_comp0', 'embedding_comp1']] > np.array(selection_pos)).all(axis=1))

    df_selection = df[selection]
    df_sample = df_selection.sample(n_imgs_to_sample)

    sample_cuts = []
    for (file, lab), props in df_sample.iterrows():
        img_ = memmap(base_path / file)
        y0, x0, y1, x1 = props[[f'other_bbox-{i}' for i in range(4)]].values.astype(int)

        # cut a bit more, make sure we remain inside img
        y0 -= pad_bbox
        y0 = 0 if y0<0 else y0
        x0 -= pad_bbox
        x0 = 0 if x0<0 else x0

        y1 += pad_bbox
        y1 = img_.shape[1] if y1 > img_.shape[1] else y1
        x1 += pad_bbox
        x1 = img_.shape[2] if x1 > img_.shape[2] else x1

        img_ = img_[:, y0:y1, x0:x1].max(axis=0)
        sample_cuts.append(gaussian_filter(img_, 0.5))
    
    if plot:
        fig, axs = plt.subplots(ncols=3, nrows=1, figsize=(12,6))
        for ax, img_ in zip(axs.flat, sample_cuts):
            ax.imshow(img_, cmap='gray')
            ax.axis('off')
        fig.tight_layout()
        
    return sample_cuts, df_sample


In [None]:
from matplotlib.patches import Rectangle

# MANUAL selection, alternatively define in napari, see below
selection_positions = [
    (-30, -20), # treated 1
    (-40, 5), # treated 2
    (-5, 25), # treated 3 (young-like)
    (-15, -5), # old 1
    (10, -25), # young 1
    (10, -5), # young 2
]

selection_sizes = [(10, 10)] * len(selection_positions) 

# select single index to plot
idx = 1
selection_pos = selection_positions[idx]
selection_size = selection_sizes[idx]

# base_path = Path('/Users/david/Desktop/IMR90_30112022/')
base_path = Path('/scratch/hoerl/20230507_imr90_stitching/20230507_imr90_ov_stitch_output')

def scatterplot_with_rectangle(df, selection_pos, selection_size):
    _, ax = plt.subplots(figsize=(8,8))
    sns.scatterplot(ax=ax, data=df, x='embedding_comp0', y='embedding_comp1', color='lightgray', alpha=1, s=25)

    rec = Rectangle(selection_pos, *selection_size, fill=None, color='red')
    ax.add_artist(rec)

scatterplot_with_rectangle(df, selection_pos, selection_size)
plot_examples_tiffstack(df, selection_pos, selection_size, base_path, pad_bbox=10);

In [None]:
# do multiple example plots of rectanglualr bboxes in embedding

from tifffile import imwrite

base_out_path = Path('/home/hoerl/ageing_dna_texture_figure_parts/')

for i, (selection_pos, selection_size) in enumerate(zip(selection_positions, selection_sizes)):
    
    outpath_i = base_out_path / f'confocal_examples_selection_{i}'
    outpath_i.mkdir(exist_ok=True)

    scatterplot_with_rectangle(df, selection_pos, selection_size)
    
    plt.rc('pdf', fonttype='42')
    plt.savefig(outpath_i / f'tsne_selection{i}_confocal.pdf')

    sample_imgs, df_sample = plot_examples_tiffstack(df, selection_pos, selection_size, base_path, n_imgs_to_sample=14, plot=False, pad_bbox=10)

    for ((f, lab), r), c in zip(df_sample.iterrows(), sample_imgs): 
        fname = f'{r.file_stem}_{lab}_{r.treatment}_{r.replicate_biological}_{r.replicate_technical}.tif'
        imwrite(outpath_i / fname, rescale_intensity(c.astype(np.float32)), imagej=True)

### Optional: define selections in napari

In [None]:
import napari

if napari.current_viewer() is not None:
    napari.current_viewer().close()

viewer = napari.view_points(df[['embedding_comp0','embedding_comp1']], size=3, edge_width=0.3, face_color='lightblue', edge_color='gray')
viewer.add_shapes() # add empty shape layer

In [None]:
# get rectangles (or bounding boxes of other shapes) from shape layer
viewer.layers[1].data

selection_positions = []
selection_sizes = []

for arr in viewer.layers[1].data:
    min_selection = np.min(arr, axis=0)
    max_selection = np.max(arr, axis=0)
    selection_size_i = max_selection - min_selection
    selection_positions.append(min_selection)
    selection_sizes.append(selection_size_i)

# selection_positions, selection_sizes

### Plot embedding colored by simple features

In [None]:
sns.set_palette('bright')

fig, axs = plt.subplots(nrows=3, figsize=(8,23))
sns.scatterplot(ax=axs[0], data=df, x='embedding_comp0', y='embedding_comp1', hue='other_mean_intensity', alpha=0.85, s=50)
sns.scatterplot(ax=axs[1], data=df, x='embedding_comp0', y='embedding_comp1', hue='other_area', alpha=0.85, s=50)
sns.scatterplot(ax=axs[2], data=df, x='embedding_comp0', y='embedding_comp1', hue='other_eccentricity', alpha=0.85, s=50)
# plt.xlabel('tSNE comp. 1'); plt.ylabel('tSNE comp. 2');
fig.tight_layout()

# Old/testing code

In [None]:
# for spinning disk data: get growth surface from filename
# PL-.... or glass-....
df['growth_surface'] = df.file.str.split('-', expand=True)[0]

In [None]:
# select only one replicate
df = df[df['replicate_biological'] == '1']

In [None]:
sns.boxplot(data=df, x='treatment', y='other_area')

In [None]:
from sklearn.cluster import KMeans

cluster_pred = KMeans(10).fit_predict(tex_values)
df['cluster_pred'] = cluster_pred.astype(str)

In [None]:
df.groupby(['cluster_pred', 'treatment']).count()

In [None]:
remove_features_above_quantile = 0.95

sorted_feature_magnitude = np.argsort(np.linalg.norm(tex_values, axis=1))
tex_values = tex_values[sorted_feature_magnitude[:int(len(sorted_feature_magnitude) * remove_features_above_quantile)]]

df = df.iloc[sorted_feature_magnitude[:int(len(sorted_feature_magnitude) * remove_features_above_quantile)]]
# len(tex_values), int(len(sorted_feature_magnitude) * remove_features_above_quantile)

### embedding colored by growth surface (spinning disk data only)

In [None]:
sns.set_palette('bright')

fig, axs = plt.subplots(nrows=2, figsize=(8,16))

sns.scatterplot(ax=axs[1], data=df, x='embedding_comp0', y='embedding_comp1', hue='growth_surface', alpha=0.85, s=50)
# plt.xlabel('tSNE comp. 1'); plt.ylabel('tSNE comp. 2');

fig.tight_layout()

In [None]:
fig, ax = plt.subplots(figsize=(8,8))
sns.scatterplot(ax=ax, data=df.reset_index().sample(frac=1), x='embedding_comp0', y='embedding_comp1', 
                hue='treatment', hue_order=['3d_icm', 'young', 'old', '9d_icm', '6d_icm'], alpha=0.1, s=50)

## Make summary csv of autoSTED experiments

For manual curation of condition table

In [None]:
import glob
import os
from pathlib import Path

basepath = '/data/cooperation_data/ArgyrisPapantonis-nuclear_architecture/Simona_Nasiscionyte/STED'
h5s = glob.glob(os.path.join(basepath, '**/*.h5'), recursive=True)

paths = [os.path.relpath(h, basepath) for h in h5s]
files = [Path(p).stem for p in paths]

df = pd.DataFrame({'path' : paths, 'file': files})
df['experiment_group'] = df.path.str.split('/').str[0]

df
# df.to_csv('~/auto_sir_experiment_overview.csv')