### Astrocyte density around blood vessels

Calculates astrocyte body density and EGFP density as a function of the distance from blood vessels (from step (e) of the block diagram of the supplementary material)

In [1]:
from pathlib import Path
import os
import pickle

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import scipy.ndimage as ndi
import pandas as pd

CHANNEL_VESSEL = 'CD31'
CHANNEL_FIBER = 'GFAP'    # Not used
CHANNEL_ASTRO = 'GFP'

In [2]:
def get_first_key(a_dict):
    
    return next(iter(a_dict.keys()))

def load_roi(file):
    '''Load ROI of P0 samples.'''
    
    if 'P0' in str(file):
        file_roi = str(file).replace('GFP(astrocytes)', 'masks').replace('.tif', '.png')
        img_roi = plt.imread(file_roi).astype(np.uint8)
        if img_roi.ndim==3:
            img_roi = img_roi[:, :, 0]
        if img_roi.max()>1:
            img_roi = 1*(img_roi==255)
        return img_roi
    else:
        return None

def remove_small_comp(img_bin, tam_threshold=20, img_label=None, structure=None):
    """For a binary image, remove connected components smaller than `tam_threshold`. If `img_label`
    is not None, use the provided labels as components.

    Parameters
    ----------
    img_bin : ndarray
        Binary image.
    tam_threshold : int
        Size threshold for removing components.
    img_label : ndarray, optional
        Array containing image components. Must have the same format as the array returned
        by scipy.ndimage.label(...). Zero values are ignored.
    structure : ndarray, optional
        Structuring element used for detecting connected components.

    Returns
    -------
    img_bin_final : ndarray
        Binary image with small components removed.
    """

    if img_label is None:
        img_lab, num_comp = ndi.label(img_bin, structure)
    else:
        num_comp = img_label.max()
    tam_comp = ndi.sum(img_bin, img_lab, range(num_comp+1))

    mask = tam_comp>tam_threshold
    mask[0] = False

    img_bin_final = mask[img_lab].astype(np.uint8)

    return img_bin_final
    
def double_distance_img(img):

    img_inv = 1 - img
    img_dist_obj = ndi.distance_transform_edt(img)
    img_dist_back = ndi.distance_transform_edt(img_inv)
    max_obj_val = img_dist_obj.max()
    img_dist = max_obj_val - img_dist_obj
    img_dist[img_dist_back>0] = img_dist_back[img_dist_back>0] + max_obj_val - 1
    
    return img_dist
    
def relative_amount(img_ref, img_obj, dr=2, r_max=None, mode='smaller', use_full_distance=False, img_roi=None):
    '''Area of objects in `img_obj` as a function of distance from objects in `img_ref`.'''

    if r_max is None:
        r_max = np.sqrt(sum(np.array(img_ref.shape)**2))
    if mode not in ['shell', 'smaller']:
        raise ValueError("'mode' must be either 'shell' or 'smaller'")
    if img_roi is None:
        img_roi = np.ones_like(img_ref)
    
    if use_full_distance:
        r_vals = np.arange(dr, r_max+dr/2, dr)
        img_dist = double_distance_img(img_ref)
    else:
        r_vals = np.arange(0, r_max+dr/2, dr)   # Used when considering a simple distance transform
        img_ref_inv = 1 - img_ref
        img_dist = ndi.distance_transform_edt(img_ref_inv)

    obj_amount = np.zeros(r_vals.size)
    area = np.zeros(r_vals.size)
    for r_idx in range(len(r_vals)):

        if use_full_distance==False and r_idx==0:
            img_region = img_ref
        else:
            if mode=='shell':
                img_region = np.logical_and(img_dist>r_vals[r_idx-1], img_dist<=r_vals[r_idx])
            elif mode=='smaller':
                img_region = img_dist<=r_vals[r_idx]
        vals_in_shell = img_obj[(img_region==1) & (img_roi==1)]
        if len(vals_in_shell)==0:
            obj_amount[r_idx] = np.nan
            area[r_idx] = np.nan
        else:
            obj_amount[r_idx] = vals_in_shell.sum()
            area[r_idx] = len(vals_in_shell)

    return obj_amount, area, r_vals

def relative_density(img_ref, img_obj, dr=2, r_max=None, mode='smaller', use_full_distance=False, img_roi=None):
    '''Relative density between two structures.'''

    obj_amount, area, r_vals = relative_amount(img_ref, img_obj, dr=dr, r_max=r_max, mode=mode, 
                                               use_full_distance=use_full_distance, img_roi=img_roi)
    density = area.copy()
    mask_valid = area>0
    density[mask_valid] = obj_amount[mask_valid]/area[mask_valid]

    return density, obj_amount, area, r_vals

def read_data(file_astro, input_folders, exclude_GFAP):
    
    file_vessel = file_astro.replace(CHANNEL_ASTRO, CHANNEL_VESSEL)
    file_fiber = file_astro.replace(CHANNEL_ASTRO, CHANNEL_FIBER)
    img_bin_astro = np.array(Image.open(input_folders['astro']/file_astro))//255
    img_bin_vessel = np.array(Image.open(input_folders['vessel']/file_vessel))//255
    if exclude_GFAP:
        img_bin_fibers = None
    else:
        img_bin_fibers = np.array(Image.open(input_folders['GFAP']/file_fiber))//255
    img_bin_GFP = np.array(Image.open(input_folders['GFP']/file_astro))//255

    if 'P0' in file_astro:
        img_roi = load_roi(input_folders['ROI']/file_astro)
    else:
        img_roi = np.ones(img_bin_astro.shape, dtype=np.uint8)
        
    img_data = {'astro':img_bin_astro, 'vessel':img_bin_vessel, 'GFAP':img_bin_fibers, 'GFP':img_bin_GFP}
        
    return img_data, img_roi

def get_channel_props(img_data, img_roi, exclude_GFAP=False):
    
    channel_data = {k:{} for k in img_data}
    for channel, img in img_data.items():
        if exclude_GFAP and (channel=='GFAP'):
            overall_density = np.nan
            overall_obj_amount = np.nan
            overall_area = np.nan
        else:
            vals_in_roi = img[img_roi==1]
            overall_obj_amount = vals_in_roi.sum()
            overall_area = len(vals_in_roi)
            overall_density = overall_obj_amount/overall_area

        channel_data[channel]['overall_density'] = overall_density
        channel_data[channel]['overall_obj_amount'] = overall_obj_amount
        channel_data[channel]['overall_area'] = overall_area
        
    return channel_data

def get_colocalization_props(img_data, combinations, dr, r_max, use_full_distance, img_roi, exclude_GFAP):
    '''Main function for calculating co-localization properties. Using one channel as reference, ROIs
    are created every dr pixels from the reference. For instance, if dr=2, ROIs at distances 0<=d<dr,
    dr<=r<2*dr, 2*dr<=r<3*dr and so on will be created. For each ROI, the density of the other channel
    is calculated. The channels used in the calculation are given by variable `combinations` which
    has the form "channel1|channel2".'''

    if use_full_distance:
        r_vals = np.arange(dr, r_max+dr/2, dr)
    else:
        r_vals = np.arange(0, r_max+dr/2, dr)
    
    colocalization_data = {c:{'density':{}, 'obj_amount':{}, 'area':{}} for c in combinations}
    for combination in combinations:
        channel1, channel2 = combination.split('|')
        if exclude_GFAP and (channel1=='GFAP' or channel2=="GFAP"):
            density = [np.nan]*r_vals.size
            obj_amount = [np.nan]*r_vals.size
            area = [np.nan]*r_vals.size
        else:
            density, obj_amount, area, _ = relative_density(img_data[channel1], img_data[channel2], dr=dr, 
                                                            r_max=r_max, use_full_distance=use_full_distance,
                                                            img_roi=img_roi)
        colocalization_data[combination]['density'] = density
        colocalization_data[combination]['obj_amount'] = obj_amount
        colocalization_data[combination]['area'] = area
        
    return colocalization_data, r_vals

def blood_vessel_coverage(dataframes, threshold_radius):
    """How much of the blood vessel is covered by GFP? This question can only be answered as is if we 
    disregard the density of the GFP in the image. This is because increasing the density of GFP will
    naturally lead to blood vessels being more covered.
    To calculate que required value, we cannot use the vessels as reference, because the GFP density
    will be the same if a lot of GFP is concentrated on the same region, or if it is spread out troughout 
    the vessel"""

    combination = 'GFP|vessel'
    vessel_area = dataframes['img_props']['overall_obj_amount']['vessel']
    df_vessel_amount = dataframes['coloc_props']['obj_amount'][combination][threshold_radius]
    df_vessel_covered = df_vessel_amount/vessel_area
    
    return df_vessel_covered

def create_dataframes(density_data, combinations, channels, r_vals):
    '''Converts dictionary containing co-localization properties into Pandas dataframes.'''

    coloc_meas = ['density', 'obj_amount', 'area']
    files_tags = list(density_data['img_props'].keys())    
    dataframes = {'coloc_props':{}, 'coloc_props':{}}
    dataframes['coloc_props'] = {mea:{} for mea in coloc_meas}
    dataframes['img_props'] = {}
    for mea in ['overall_density', 'overall_obj_amount', 'overall_area']:
        dataframe_dict = {}
        for channel in channels:
            dataframe_dict[channel] = []
            for file_tag in files_tags:
                dataframe_dict[channel].append(density_data['img_props'][file_tag][channel][mea])
        dataframes['img_props'][mea] = pd.DataFrame(data=dataframe_dict, 
                                                    index=files_tags)

    for mea in coloc_meas:
        for combination in combinations:
            dataframe_list = []
            for file_tag in files_tags:
                dataframe_list.append(density_data['coloc_props'][file_tag][combination][mea])
            dataframes['coloc_props'][mea][combination] = pd.DataFrame(data=dataframe_list, columns=r_vals,  index=files_tags)
          
    return dataframes

In [3]:
input_folders = {
    'astro':Path('astrocytes/UNET_segmentations'),  # Astrocyte bodies
    'vessel':Path('vessels/UNET_segmentations'),    # Segmented blood vessels
    'GFAP':Path('GFAP/pipeline_labels'),            # GFAP signal
    'GFP':Path('astrocytes/pipeline_labels') ,      # Astrocyte processes
    'ROI':Path('ROI')                               # Regions of interest for P0
}   

dr = 2                  # Width of the ROI region around structures to calculate density. 
r_max = 150             # Maximum distance to consider
threshold_radius = 2.0  # Threshold for calculating fraction of blood vessels covered by astrocyte processes
use_full_distance = False

pixel_size = (0.454, 0.454)   # microns
channels = ['astro', 'vessel', 'GFAP', 'GFP']
combinations = ['astro|vessel', 'astro|GFAP', 'astro|GFP', 'vessel|astro', 'vessel|GFAP', 'vessel|GFP',  # First channel sets the reference and the density is calculated for the second channel
                'GFAP|astro', 'GFAP|vessel', 'GFAP|GFP', 'GFP|astro', 'GFP|vessel', 'GFP|GFAP']

In [None]:
pixel_size = np.array(pixel_size)
files = os.listdir(input_folders['astro'])

density_data = {'img_props':{}, 'coloc_props':{}}
files_tags = []
for file_astro in files:
    
    print(file_astro)
    filename = file_astro.split('.')[0]
    file_tag = '@'.join(filename.split('@')[:-1])
    files_tags.append(file_tag)
    
    img_data, img_roi = read_data(file_astro, input_folders, exclude_GFAP=True)
    density_data['img_props'][file_tag] = get_channel_props(img_data, img_roi, exclude_GFAP=True)
    density_data['coloc_props'][file_tag], r_vals = get_colocalization_props(img_data, combinations, dr, 
                                                                   r_max, use_full_distance, img_roi, exclude_GFAP=True)

dataframes = create_dataframes(density_data, combinations, channels, r_vals)
df_vessel_covered = blood_vessel_coverage(dataframes, threshold_radius)
pickle.dump([dataframes, df_vessel_covered], open('colocalization_data.pickle', 'wb'))

### Plot data

In [8]:
def plot_band(df, color, label, pix_size):
    '''Plot average and EOM of astrocyte density as a function of distance from blood vessels.'''
    
    df_m = df.mean()
    df_s = df.std()/len(df.index)**0.5

    plt.fill_between(df.columns*pix_size, df_m-df_s, df_m+df_s, color=color, alpha=0.2)
    plt.plot(df.columns*pix_size, df_m, '-o', color=color, label=label)
    
def get_age_dataframe(dataframe, age):
        
    return dataframe[list(map(lambda x: age in x, dataframe.index))]

ages = ['P0', 'P5', 'P7', 'P14', 'P21']
colors = ['C0', 'C1', 'C2', 'C3', 'C4']

In [None]:
pix_size = 0.454

header = [
    'Astrocyte cell body density around blood vessels (normalized by astrocyte cell body density in the sample)', 
    'GFP density around blood vessels (normalized by GFP density in the sample)', 
]

for idx, combination in enumerate([
    'vessel|astro', 
    'vessel|GFP',
    ]):
    
    channel_ref, channel_obj = combination.split("|")
    df_density = dataframes['coloc_props']['density'][combination]
    df_overall_density = dataframes['img_props']['overall_density'][channel_obj]

    plt.figure(figsize=[5.1,4])
    for age_idx, age in enumerate(ages):
        df_density_age = get_age_dataframe(df_density, age)
        df_overall_density_age = get_age_dataframe(df_overall_density, age)
        df_age_norm = (df_density_age.transpose()/df_overall_density_age).transpose()
        plot_band(df_age_norm, colors[age_idx], label=age, pix_size=pix_size)
    plt.xlim((0, 13.62))
    plt.xlabel('Distance from vessel wall ($\mu$m)', size=14)
    plt.ylabel('Normalized density', size=14)
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'../results/Shared astrocytes results/{header[idx]}.pdf')