In [1]:
import numpy as np
import pandas as pd
from scipy.stats import t
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms

from bokeh.plotting import show
from bokeh.models.annotations import Title
from bokeh.models import Plot, ColumnDataSource, Ellipse, Grid, LinearAxis, Text
from bokeh.io import output_notebook, export_png

from harrison_functions.utils.std.text import camel_to_snake_case
from harrison_functions.utils.plotting.plotly import (plot_single_scatter,
                                                      plot_multiple_scatter,
                                                      plot_violin, save_fig_as_png)

pd.options.display.max_columns = None
output_notebook()

In [2]:
def confidence_ellipse(P, n_std=3.0, facecolor='none', **kwargs):
    """
    Create a plot of the covariance confidence ellipse of `x` and `y`
    Source code from: https://matplotlib.org/stable/gallery/statistics/confidence_ellipse.html
    Modified for my own use
    """
    
    x, y = P
    # if x.size != y.size:
    #     raise ValueError("x and y must be the same size")  # note: this is impossible

    # dimension check
    # for a 2d image, d=2 and N is the number of points
    d, N = P.shape
    if N <= d:
        return np.nan, np.nan, np.nan, np.nan, np.nan

    cov = np.cov(x, y)
    pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
    
    # Using a special case to obtain the eigenvalues of this two-dimensionl dataset.
    major_axis_length = np.sqrt(1 + pearson)
    minor_axis_length = np.sqrt(1 - pearson)
    
    # Calculating the stdandard deviation of x
    scale_x = np.sqrt(cov[0, 0]) * n_std
    center_x = np.mean(x)

    # calculating the stdandard deviation of y
    scale_y = np.sqrt(cov[1, 1]) * n_std
    center_y = np.mean(y)
    
    
    # calculating orientation "angle"
    eig_vals, eig_vecs = np.linalg.eig(cov)

    if eig_vals[0] > eig_vals[1]:
        width = major_axis_length * 2
        height = minor_axis_length * 2
    else:
        width = minor_axis_length * 2
        height = major_axis_length * 2
    orientation = np.arccos(eig_vecs[0, 0])/np.pi*180  # not entire certain this is it
    
    minor_axis_length, major_axis_length = sorted((width * scale_x, height * scale_y))
    
    return center_x, center_y, minor_axis_length, major_axis_length, orientation
    
    # matplotlib
    
    # from matplotlib.patches import Ellipse
    # ellipse = Ellipse((0, 0),
    #     width=width,
    #     height=height,
    #     facecolor=facecolor,
    #     **kwargs)    

    # transf = transforms.Affine2D() \
    #     .rotate_deg(orientation) \
    #     .scale(scale_x, scale_y) \
    #     .translate(center_x, center_y)

    # ellipse.set_transform(transf + ax.transData)
    # return ax.add_patch(ellipse)

In [1]:
def find_nearest_point(point, points:list):
    """O(n^2) algorithm to find the nearest point
    Can make this faster with binary search on one of the variables
    However, since this is a small dataset (20 nuclei per image), whatever
    
    >>> find_nearest_point(
        point=(281.415801, 135.945238),
        points=[(693.094713, 59.080090), (295.184921, 118.996760), (282.528024, 182.998269)],
    )
    (295.184921, 118.99676)
    """
    
    d = np.inf
    for x, y in points:
        d_current = np.sqrt((point[0]-x)**2+(point[1]-y)**2)
        if d_current < d:
            closest_point = (x, y)
            d = d_current
        
    return closest_point

def flatten_columns(multicols):
    """Flattens a 2 level multi-index
    """
    return [f'{cols[0].lower()}_{cols[1]}'.strip('_') for cols in multicols]


In [4]:
puncta_cols = [
    'image_number', 'object_number', 'parent_manual_nuclei', 'center_x',
    'center_y', 'bounding_box_min_x', 'bounding_box_max_x',
    'bounding_box_min_y', 'bounding_box_max_y', 'bounding_box_area',
    'orientation', 'major_axis_length', 'minor_axis_length', 'area',
    'convex_area', 'perimeter', 'eccentricity', 'form_factor',
    'compactness', 'integrated_intensity', 'min_intensity', 'max_intensity',
    'mean_intensity', 'median_intensity', 'edge_integrated_intensity',
    'edge_min_intensity', 'edge_max_intensity', 'edge_mean_intensity',
]

# Data

In [5]:
# read in data
nuclei = pd.read_csv("data/nuclei_subset.csv")
puncta = pd.read_csv("data/puncta_subset.csv")

In [6]:
# ----------------------------------------------------------------------
# Reassign nuclei
puncta_centers = (
    puncta
    .groupby(["image_number", "parent_manual_nuclei"])[["center_x", "center_y"]]
    .mean()
    .reset_index()
)
puncta_centers['center'] = puncta_centers[['center_x', 'center_y']].apply(list, axis=1)


# use find_nearest_point to find the center of the closest nuclei
# there are more nuclei than puncta, so this is fine
puncta_centers[["closest_nuclei_x", "closest_nuclei_y"]] = pd.DataFrame(
    puncta_centers[['image_number', 'center']].apply(
    lambda x: find_nearest_point(
        point=x['center'],
        points=nuclei.loc[(nuclei['image_number']==x['image_number']),
                          ["center_x", "center_y"]].to_records(index=False)
    )
    , axis=1).to_list(),
    columns=["closest_nuclei_x", "closest_nuclei_y"],
)

# left join nuclei_table on closest_nuclei_x and closest_nuclei_y
puncta_centers['nuclei_object_number'] = pd.merge(
    left=puncta_centers[["closest_nuclei_x", "closest_nuclei_y", 'image_number', 'parent_manual_nuclei']],
    right=nuclei[['center_x', 'center_y', 'image_number', 'object_number']],
    left_on=["closest_nuclei_x", "closest_nuclei_y", 'image_number',],
    right_on=['center_x', 'center_y', 'image_number',],
    how='left',
    suffixes=('', '_nuclei')
)['object_number']


# add back to puncta
puncta = pd.merge(
    left=puncta[puncta_cols],
    right=puncta_centers[['image_number', 'parent_manual_nuclei', 'nuclei_object_number']],
    left_on=['image_number', 'parent_manual_nuclei'],
    right_on=['image_number', 'parent_manual_nuclei',],
    how='left',
    suffixes=('', '_')
)


# filter puncta that are too far away from the nuclei
puncta = pd.merge(
    left=puncta[list(puncta_cols)+['nuclei_object_number']],
    right=nuclei[['image_number', 'object_number', 'bounding_box_min_x', 'bounding_box_max_x', 'bounding_box_min_y', 'bounding_box_max_y']],
    left_on=['image_number', 'nuclei_object_number'],
    right_on=['image_number', 'object_number'],
    how='left',
    suffixes=('', '_nuclei')
)  # left join nuclei data

puncta = puncta[
    (puncta['center_x'] >= puncta['bounding_box_min_x_nuclei']) & 
    (puncta['center_x'] <= puncta['bounding_box_max_x_nuclei']) &
    (puncta['center_y'] >= puncta['bounding_box_min_y_nuclei']) &
    (puncta['center_y'] <= puncta['bounding_box_max_y_nuclei'])
].copy()  # filter


# regenerate puncta_centers using filtered data
puncta_centers = (
    puncta
    .groupby(["image_number", "nuclei_object_number"])[["center_x", "center_y"]]
    .mean()
    .reset_index()
)

In [7]:
# filters
nuclei_tmp = nuclei[
    (nuclei['eccentricity'] < 0.69)
    & (nuclei['major_axis_length'] < 128)
].copy()

puncta = pd.merge(
    left=nuclei_tmp[["image_number", 'object_number']],
    right=puncta.loc[:, puncta.columns != 'object_number'],
    left_on=["image_number", 'object_number'],
    right_on=['image_number', 'nuclei_object_number'],
    how="left",
).dropna(subset=['nuclei_object_number'])  # left join without duplicates

# Confidence Ellipse Around Nuclei

In [8]:
image_number = 3

In [9]:
puncta['center'] = puncta[['center_x', 'center_y']].apply(list, axis=1)
tmp = puncta.groupby(['image_number', 'object_number'])[['center']].agg(list).reset_index().copy()
P = np.transpose(np.array(tmp.loc[0, 'center']))

In [10]:
# sanity check
# center_x, center_y, min_x, max_x, min_y, max_y = min_vol_ellipse(P, tolerance=0.01)
center_x, center_y, minor_axis_length, major_axis_length, orientation = confidence_ellipse(P)
center_x, center_y, minor_axis_length, major_axis_length, orientation

(281.4158008658008,
 135.945238095238,
 15.994151874417318,
 28.428012359584674,
 36.68319660169882)

In [11]:
tmp[["center_x", "center_y", "minor_axis_length", "major_axis_length", "orientation"]] = pd.DataFrame(
    tmp["center"]
    .apply(lambda x: confidence_ellipse(np.transpose(np.array(x)), n_std=2))
    .to_list()
)
tmp

Unnamed: 0,image_number,object_number,center,center_x,center_y,minor_axis_length,major_axis_length,orientation
0,3,2,"[[275.0, 131.0], [280.0, 131.0], [285.5, 133.0...",281.415801,135.945238,10.662768,18.952008,36.683197
1,3,3,"[[310.0, 188.0], [309.25, 196.0], [306.8, 198....",308.584722,197.160417,11.300052,14.352111,165.130432
2,3,5,"[[314.875, 197.875]]",,,,,
3,3,7,"[[1103.5, 376.0], [1099.95, 377.4], [1100.7777...",1101.156032,380.154921,9.503341,12.610978,171.066963
4,3,10,"[[217.0, 431.0], [218.0, 436.5], [215.0, 438.0...",223.689128,448.006676,25.862934,44.931304,42.593092
...,...,...,...,...,...,...,...,...
92,20,49,"[[661.5, 867.5]]",,,,,
93,20,50,"[[132.0, 913.0], [140.875, 918.125], [144.1666...",142.769881,923.256310,27.231745,29.418273,27.675758
94,20,51,"[[1056.0, 899.5], [1062.33333333333, 900.33333...",1060.745804,907.430531,12.946057,24.054890,178.718460
95,20,52,"[[260.5, 866.0], [272.0, 876.0], [264.0, 877.0...",256.533333,889.666667,50.507114,51.634533,20.743831


In [12]:
image_number = 3

# subset by image
nuclei_subset = tmp[tmp['image_number']==image_number].copy()
puncta_subset = pd.merge(
    left=nuclei_subset[["image_number", 'object_number']],
    right=puncta.loc[:, puncta.columns != 'object_number'],
    left_on=["image_number", 'object_number'],
    right_on=['image_number', 'nuclei_object_number'],
    how="left",
).dropna(subset=['nuclei_object_number'])


# add nuclei
nuclei_subset['angle'] = nuclei_subset['orientation'].apply(lambda x: x/180*np.pi)
nuclei_source = ColumnDataSource(nuclei_subset.loc[
    # (nuclei_subset['object_number']==2)
    :, ["object_number", "center_x", "center_y", "major_axis_length", "minor_axis_length", "angle"]
].rename(
    columns={
        "center_x": "x",
        "center_y": "y",
        "major_axis_length": "h",
        "minor_axis_length": "w"
    }
).dropna().to_dict("list"))
nuclei_glyph = Ellipse(x="x", y="y", width="w", height="h", angle='angle', line_color='#FFFFFF', fill_color='#000fff', line_width=1.2)
text_glyph = Text(x="x", y="y", text="object_number", text_color="white", text_font_size = {'value': '13px'})


# add puncta
puncta_subset['angle'] = puncta_subset['orientation'].apply(lambda x: x/180*np.pi)

puncta_source = ColumnDataSource(puncta_subset[
    ["center_x", "center_y", "major_axis_length", "minor_axis_length", "angle"]
].rename(
    columns={
        "center_x": "x",
        "center_y": "y",
        "major_axis_length": "h",
        "minor_axis_length": "w"
    }
).to_dict("list"))
puncta_glyph = Ellipse(x="x", y="y", width="w", height="h", angle='angle', fill_color='#ff2b00', line_alpha=0, )

puncta_text_source = ColumnDataSource(puncta_centers.loc[
    (puncta_centers['image_number']==image_number)
    & (puncta_centers['nuclei_object_number'].isin(nuclei_subset['object_number']))
    , ["image_number", "nuclei_object_number", "center_x", "center_y",]
].to_dict("list"))
puncta_text_glyph = Text(
    x="center_x", y="center_y", text="nuclei_object_number", text_color="orange", text_font_size = {'value': '13px'}
)


# add puncta
plot = Plot(
    title=Title(text=f"Image {image_number}"),
    width=1000, height=800,
    match_aspect=True,
    toolbar_location=None
)
plot.add_glyph(nuclei_source, nuclei_glyph)
plot.add_glyph(nuclei_source, text_glyph)
plot.add_glyph(puncta_source, puncta_glyph)
plot.add_glyph(puncta_text_source, puncta_text_glyph)


xaxis = LinearAxis()
plot.add_layout(xaxis, 'above')
plot.x_range.start = 0
plot.x_range.end = 1290

yaxis = LinearAxis()
plot.add_layout(yaxis, 'left')
plot.y_range.start = 1000
plot.y_range.end = 0

plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))

show(plot)