In [2]:
import os
import sys
from pathlib import Path
from typing import List

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage.io
from skimage.measure import regionprops_table
from skimage import exposure, measure
from tqdm.notebook import trange, tqdm


In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
# Import path
module_path = str(Path.cwd().parents[0])
if module_path not in sys.path:
    sys.path.append(module_path)

from config import *

In [5]:
module_path = str(Path.cwd().parents[0] / "src")

if module_path not in sys.path:
    sys.path.append(module_path)

import utils as my_utils

In [6]:
# Define image and masks folder
info_csv = data_meta / "all_info.csv"

df = pd.read_csv(info_csv)

In [7]:
df_subset = df[(df.Location == 36) & (df.After_bleach == False)]

In [8]:
df_subset 

Unnamed: 0,Cycle,Location,After_bleach,Z_stack,Channel,Marker,Path,Path_corrected
2625,Cycle10,36,False,1,CH1,Hoeschst,Y:\coskun-lab\Thomas\Spatial signaling\dataset...,Y:\coskun-lab\Thomas\Spatial signaling\dataset...
2626,Cycle10,36,False,1,CH2,Concanavalin A,Y:\coskun-lab\Thomas\Spatial signaling\dataset...,Y:\coskun-lab\Thomas\Spatial signaling\dataset...
2627,Cycle10,36,False,1,CH4,PCNA,Y:\coskun-lab\Thomas\Spatial signaling\dataset...,Y:\coskun-lab\Thomas\Spatial signaling\dataset...
2628,Cycle10,36,False,2,CH1,Hoeschst,Y:\coskun-lab\Thomas\Spatial signaling\dataset...,Y:\coskun-lab\Thomas\Spatial signaling\dataset...
2629,Cycle10,36,False,2,CH2,Concanavalin A,Y:\coskun-lab\Thomas\Spatial signaling\dataset...,Y:\coskun-lab\Thomas\Spatial signaling\dataset...
...,...,...,...,...,...,...,...,...
84519,Cycle9,36,False,24,CH4,H3k4me3-647,Y:\coskun-lab\Thomas\Spatial signaling\dataset...,Y:\coskun-lab\Thomas\Spatial signaling\dataset...
84520,Cycle9,36,False,25,CH1,Hoeschst,Y:\coskun-lab\Thomas\Spatial signaling\dataset...,Y:\coskun-lab\Thomas\Spatial signaling\dataset...
84521,Cycle9,36,False,25,CH2,H3k27me3-488,Y:\coskun-lab\Thomas\Spatial signaling\dataset...,Y:\coskun-lab\Thomas\Spatial signaling\dataset...
84522,Cycle9,36,False,25,CH3,H3k9Ac-555,Y:\coskun-lab\Thomas\Spatial signaling\dataset...,Y:\coskun-lab\Thomas\Spatial signaling\dataset...


# Get images

In [20]:
from joblib import Parallel, delayed\

def read_img(path):
    return contrast_streching(skimage.io.imread(path))

def contrast_streching(img):
    p2, p98 = np.percentile(img, (0.5, 99.5))
    return exposure.rescale_intensity(img, in_range=(p2, p98))

def joblib_loop(task, pics):
    return Parallel(n_jobs=20)(delayed(task)(i) for i in pics)

def non_border_cell(cell_mask):
    labels = []
    row_max, col_max = cell_mask.shape
    props = measure.regionprops(cell_mask)
    for prop in props:
        if prop['bbox'][0] == 0 or prop['bbox'][1] == 0 or prop['bbox'][2] == row_max or prop['bbox'][3] == col_max:
            continue
        else:
            labels.append(prop['label'])
    return labels

In [21]:
z_stacks = df_subset.Z_stack.unique()
df_subset = df_subset[~df_subset.Marker.isin(['Concanavalin A', 'Phalloidin', 'Hoeschst', 'WGA', 'B-actin', 'PCNA'])]

In [22]:
masks = my_utils.get_masks(data_mask)
masks = masks['36']

In [23]:
df_all = []

for z in tqdm(z_stacks, total=len(z_stacks)):
    df_z = df_subset[df_subset.Z_stack == z]
    
    paths = df_z.Path_corrected.tolist()
    markers = df_z.Marker.tolist()
    imgs = joblib_loop(read_img, paths)
    try:
        pixels = np.stack(imgs, axis=0)

        # Get cell and nuclei mask
        mask_cyto = masks["cyto"]
        mask_nuclei = masks["nuclei"]
        cell, _, _ = my_utils.qc_nuclei(mask_cyto, mask_nuclei)

        rows, cols = np.where(cell>0)
        cell_pixels = pixels[:, rows, cols]

        # Create dataframe
        df_pixels = pd.DataFrame(cell_pixels.T, columns=markers)
        df_pixels["X"] = rows
        df_pixels["Y"] = cols
        df_pixels["Id"] = cell[rows, cols]
        df_pixels["Z"] = z
        df_all.append(df_pixels.dropna())
    except: continue

  0%|          | 0/28 [00:00<?, ?it/s]

In [24]:
df_combined = pd.concat(df_all)

In [25]:
df_combined = df_combined.dropna()

In [26]:
df_combined.to_csv(data_meta / "pixel_intensity_3D.csv", index=False)

# Filter out background

In [9]:
df_combined = pd.read_csv(data_meta / "pixel_intensity_3D.csv")

In [10]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Scale data
scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(df_combined.iloc[:, :-4])
pixels_scaled = pd.DataFrame(x_scaled, columns=df_combined.iloc[:, :-4].columns)

In [13]:
pixel_dark = pixels_scaled.le(0.4).all(axis=1)
display(pixel_dark.value_counts())
pixels_bright = pixels_scaled[~pixel_dark]
pixels_bright

False    21448818
True      7906239
dtype: int64

Unnamed: 0,p-EGFR,Non-p-B-catenin,APC,DKK1,Cyclin E,EMMPRIN,Wnt1,p-AKT,p-B-catenin,RNF 43,...,p-mTOR,mTOR,DKK2,AXIN1,EGFR,Cyclin D1,B-tubin,H3k27me3-488,H3k9Ac-555,H3k4me3-647
67,0.000000,0.000000,0.262913,0.039445,0.000000,0.000000,0.000000,0.033051,0.0,0.000000,...,0.000000,0.453925,0.000000,0.000000,0.000000,0.000000,0.0,0.080232,0.000000,0.054536
92,0.046082,0.152102,0.000000,0.000000,0.000000,0.000000,0.000000,0.094865,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.529625,0.000000,0.0,0.176516,0.000000,0.000000
98,0.000000,0.306325,0.497475,0.122911,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.084459,0.000000,0.056184
100,0.000000,0.000000,0.714366,0.000000,0.000000,0.060288,0.000000,0.000000,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.059953,0.000000,0.042969
101,0.000000,0.000000,0.417395,0.062211,0.000000,0.000000,0.000000,0.034012,0.0,0.063279,...,0.000000,0.022461,0.106279,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29355050,0.000000,0.000000,0.000000,0.134112,0.000000,0.000000,0.030243,0.000000,0.0,0.542229,...,0.000000,0.434257,0.328634,0.000000,0.000000,0.116762,0.0,0.000000,0.000000,0.000000
29355051,0.000000,0.000000,0.000000,0.217304,0.208682,0.376684,0.022675,0.000000,0.0,0.204349,...,0.000000,0.049790,0.000000,0.000000,0.610895,0.260472,0.0,0.000000,0.254734,0.000000
29355052,0.104784,0.000000,0.000000,0.000000,0.183352,0.212665,0.292058,0.000000,0.0,0.389639,...,0.000000,0.264927,0.636927,0.448508,0.276356,0.000000,0.0,0.000000,0.000000,0.000000
29355054,0.058213,0.000000,0.000000,0.081483,0.000000,0.449088,0.103014,0.000000,0.0,0.302449,...,0.007782,0.000000,0.000000,0.230365,0.276356,0.255985,0.0,0.000000,0.000000,0.000000


# Clustering

In [None]:
import dask.dataframe as dd
import dask_ml.cluster

X = dd.from_pandas(pixels_bright, chunksize=100000)
X = X.persist()
km = dask_ml.cluster.KMeans(n_clusters=60, oversampling_factor=10)
km.fit(X)
labels = km.labels_
labels = labels.compute()

# Viz

In [None]:
pixels_bright_position = pixels_pixels.join(df_combined[[ "X", "Y", 'Z',"Id"]])