In [1]:
# conclusions: filter on eccentricity > 0.69 and major_axis_length > 128

In [2]:
import numpy as np
import pandas as pd

from bokeh.plotting import show
from bokeh.models.annotations import Title
from bokeh.models import Plot, ColumnDataSource, Ellipse, Grid, LinearAxis, Text
from bokeh.io import output_notebook, export_png

from harrison_functions.utils.std.text import camel_to_snake_case
from harrison_functions.utils.plotting.plotly import plot_multiple_scatter, save_fig_as_png

pd.options.display.max_columns = None
output_notebook()

# troubleshooting
# from selenium import webdriver
# driver = webdriver.Firefox(executable_path='/home/harrisonized/geckodriver')

In [5]:
save=False

In [6]:
def find_nearest_point(point, points:list):
    """O(n^2) algorithm to find the nearest point
    Can make this faster with binary search on one of the variables
    However, since this is a small dataset (20 nuclei per image), whatever
    
    >>> find_nearest_point(
        point=(281.415801, 135.945238),
        points=[(693.094713, 59.080090), (295.184921, 118.996760), (282.528024, 182.998269)],
    )
    (295.184921, 118.99676)
    """
    
    d = np.inf
    for x, y in points:
        d_current = np.sqrt((point[0]-x)**2+(point[1]-y)**2)
        if d_current < d:
            closest_point = (x, y)
            d = d_current
        
    return closest_point

In [7]:
puncta_cols = [
    'image_number', 'object_number', 'parent_manual_nuclei', 'center_x',
    'center_y', 'bounding_box_min_x', 'bounding_box_max_x',
    'bounding_box_min_y', 'bounding_box_max_y', 'bounding_box_area',
    'orientation', 'major_axis_length', 'minor_axis_length', 'area',
    'convex_area', 'perimeter', 'eccentricity', 'form_factor',
    'compactness', 'integrated_intensity', 'min_intensity', 'max_intensity',
    'mean_intensity', 'median_intensity', 'edge_integrated_intensity',
    'edge_min_intensity', 'edge_max_intensity', 'edge_mean_intensity',
]

# Data

In [8]:
# read in data
nuclei = pd.read_csv("data/nuclei_subset.csv")
puncta = pd.read_csv("data/puncta_subset.csv")

In [9]:
# ----------------------------------------------------------------------
# Reassign nuclei
puncta_centers = (
    puncta
    .groupby(["image_number", "parent_manual_nuclei"])[["center_x", "center_y"]]
    .mean()
    .reset_index()
)
puncta_centers['center'] = puncta_centers[['center_x', 'center_y']].apply(list, axis=1)


# use find_nearest_point to find the center of the closest nuclei
# there are more nuclei than puncta, so this is fine
puncta_centers[["closest_nuclei_x", "closest_nuclei_y"]] = pd.DataFrame(
    puncta_centers[['image_number', 'center']].apply(
    lambda x: find_nearest_point(
        point=x['center'],
        points=nuclei.loc[(nuclei['image_number']==x['image_number']),
                          ["center_x", "center_y"]].to_records(index=False)
    )
    , axis=1).to_list(),
    columns=["closest_nuclei_x", "closest_nuclei_y"],
)

# left join nuclei_table on closest_nuclei_x and closest_nuclei_y
puncta_centers['nuclei_object_number'] = pd.merge(
    left=puncta_centers[["closest_nuclei_x", "closest_nuclei_y", 'image_number', 'parent_manual_nuclei']],
    right=nuclei[['center_x', 'center_y', 'image_number', 'object_number']],
    left_on=["closest_nuclei_x", "closest_nuclei_y", 'image_number',],
    right_on=['center_x', 'center_y', 'image_number',],
    how='left',
    suffixes=('', '_nuclei')
)['object_number']


# add back to puncta
puncta = pd.merge(
    left=puncta[puncta_cols],
    right=puncta_centers[['image_number', 'parent_manual_nuclei', 'nuclei_object_number']],
    left_on=['image_number', 'parent_manual_nuclei'],
    right_on=['image_number', 'parent_manual_nuclei',],
    how='left',
    suffixes=('', '_')
)


# filter puncta that are too far away from the nuclei
puncta = pd.merge(
    left=puncta[list(puncta_cols)+['nuclei_object_number']],
    right=nuclei[['image_number', 'object_number', 'bounding_box_min_x', 'bounding_box_max_x', 'bounding_box_min_y', 'bounding_box_max_y']],
    left_on=['image_number', 'nuclei_object_number'],
    right_on=['image_number', 'object_number'],
    how='left',
    suffixes=('', '_nuclei')
)  # left join nuclei data

puncta = puncta[
    (puncta['center_x'] >= puncta['bounding_box_min_x_nuclei']) & 
    (puncta['center_x'] <= puncta['bounding_box_max_x_nuclei']) &
    (puncta['center_y'] >= puncta['bounding_box_min_y_nuclei']) &
    (puncta['center_y'] <= puncta['bounding_box_max_y_nuclei'])
].copy()  # filter


# regenerate puncta_centers using filtered data
puncta_centers = (
    puncta
    .groupby(["image_number", "nuclei_object_number"])[["center_x", "center_y"]]
    .mean()
    .reset_index()
)

# Filter

In [10]:
# filters
nuclei_tmp = nuclei[
    (nuclei['eccentricity'] < 0.69)
    & (nuclei['major_axis_length'] < 128)
].copy()

puncta = pd.merge(
    left=nuclei[["image_number", 'object_number']],
    right=puncta.loc[:, puncta.columns != 'object_number'],
    left_on=["image_number", 'object_number'],
    right_on=['image_number', 'nuclei_object_number'],
    how="left",
).dropna(subset=['nuclei_object_number'])  # left join without duplicates

# Plot One

In [12]:
image_number = 8

# subset by image
nuclei_subset = nuclei_tmp[nuclei_tmp['image_number']==image_number].copy()
puncta_subset = pd.merge(
    left=nuclei_subset[["image_number", 'object_number']],
    right=puncta.loc[:, puncta.columns != 'object_number'],
    left_on=["image_number", 'object_number'],
    right_on=['image_number', 'nuclei_object_number'],
    how="left",
).dropna(subset=['nuclei_object_number'])

In [13]:
# Plot

# add nuclei
nuclei_subset['angle'] = nuclei_subset['orientation'].apply(lambda x: x/360*3.14159)
nuclei_source = ColumnDataSource(nuclei_subset.loc[
    # (nuclei_subset['object_number']==2)
    :, ["object_number", "center_x", "center_y", "major_axis_length", "minor_axis_length", "angle"]
].rename(
    columns={
        "center_x": "x",
        "center_y": "y",
        "major_axis_length": "h",
        "minor_axis_length": "w"
    }
).to_dict("list"))
nuclei_glyph = Ellipse(x="x", y="y", width="w", height="h", angle='angle', line_color='#FFFFFF', fill_color='#000fff', line_width=1.2)
text_glyph = Text(x="x", y="y", text="object_number", text_color="white", text_font_size = {'value': '13px'})


# add puncta
puncta_subset['angle'] = puncta_subset['orientation'].apply(lambda x: x/360*3.14159)

puncta_source = ColumnDataSource(puncta_subset[
    ["center_x", "center_y", "major_axis_length", "minor_axis_length", "angle"]
].rename(
    columns={
        "center_x": "x",
        "center_y": "y",
        "major_axis_length": "h",
        "minor_axis_length": "w"
    }
).to_dict("list"))
puncta_glyph = Ellipse(x="x", y="y", width="w", height="h", angle='angle', fill_color='#ff2b00', line_alpha=0, )

puncta_text_source = ColumnDataSource(puncta_centers.loc[
    (puncta_centers['image_number']==image_number)
    & (puncta_centers['nuclei_object_number'].isin(nuclei_subset['object_number']))
    , ["image_number", "nuclei_object_number", "center_x", "center_y",]
].to_dict("list"))
puncta_text_glyph = Text(
    x="center_x", y="center_y", text="nuclei_object_number", text_color="orange", text_font_size = {'value': '13px'}
)


# add puncta
plot = Plot(
    title=Title(text=f"Image {image_number}"),
    width=1000, height=800,
    match_aspect=True,
    toolbar_location=None
)
plot.add_glyph(nuclei_source, nuclei_glyph)
plot.add_glyph(nuclei_source, text_glyph)
plot.add_glyph(puncta_source, puncta_glyph)
plot.add_glyph(puncta_text_source, puncta_text_glyph)

xaxis = LinearAxis()
plot.add_layout(xaxis, 'above')
plot.x_range.start = 0
plot.x_range.end = 1280

yaxis = LinearAxis()
plot.add_layout(yaxis, 'left')
plot.y_range.start = 1024
plot.y_range.end = 0

plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))

show(plot)

In [14]:
nuclei_subset

Unnamed: 0,image_number,object_number,path_name_tif,file_name_tif,center_x,center_y,bounding_box_min_x,bounding_box_max_x,bounding_box_min_y,bounding_box_max_y,bounding_box_area,orientation,major_axis_length,minor_axis_length,area,convex_area,perimeter,eccentricity,form_factor,compactness,angle
28,8,1,/Users/sarahpyfrom/Dropbox/Mac/Desktop/Sarah_S...,sSP79_24_B6_CRE_001.tif,519.327838,90.822578,472,568,47,131,8064,-86.978972,97.946233,83.300009,6369,6504,300.592929,0.526029,0.885775,1.128954,-0.759034
29,8,2,/Users/sarahpyfrom/Dropbox/Mac/Desktop/Sarah_S...,sSP79_24_B6_CRE_001.tif,659.799572,99.601487,602,715,43,156,12769,-49.266962,117.24601,107.057077,9819,9937,371.261977,0.407741,0.895192,1.117079,-0.429935
30,8,3,/Users/sarahpyfrom/Dropbox/Mac/Desktop/Sarah_S...,sSP79_24_B6_CRE_001.tif,828.701866,102.761046,785,878,55,149,8742,38.350982,91.535448,87.974176,6269,6483,306.149278,0.276221,0.840508,1.189757,0.334675
31,8,4,/Users/sarahpyfrom/Dropbox/Mac/Desktop/Sarah_S...,sSP79_24_B6_CRE_001.tif,274.411608,116.983843,229,321,71,167,8832,47.186081,93.874025,87.337133,6375,6568,306.776695,0.366634,0.851227,1.174775,0.411776
32,8,5,/Users/sarahpyfrom/Dropbox/Mac/Desktop/Sarah_S...,sSP79_24_B6_CRE_001.tif,313.081578,195.742197,262,366,143,247,10816,-66.181067,103.967063,96.992177,7882,8153,344.67619,0.360103,0.833727,1.199434,-0.577538
33,8,6,/Users/sarahpyfrom/Dropbox/Mac/Desktop/Sarah_S...,sSP79_24_B6_CRE_001.tif,1192.446637,234.823891,1151,1234,194,276,6806,47.978256,80.292636,75.229717,4713,4834,261.563492,0.349479,0.865671,1.155173,0.418689
35,8,8,/Users/sarahpyfrom/Dropbox/Mac/Desktop/Sarah_S...,sSP79_24_B6_CRE_001.tif,963.645363,291.728664,910,1016,243,338,10070,-74.519536,106.32947,94.104865,7839,7991,338.63456,0.465532,0.859029,1.164105,-0.650305
36,8,9,/Users/sarahpyfrom/Dropbox/Mac/Desktop/Sarah_S...,sSP79_24_B6_CRE_001.tif,250.555497,313.735638,212,288,269,362,7068,-3.219993,94.839017,76.120838,5640,5741,284.208153,0.596474,0.877437,1.139683,-0.0281
37,8,10,/Users/sarahpyfrom/Dropbox/Mac/Desktop/Sarah_S...,sSP79_24_B6_CRE_001.tif,509.541974,311.114247,449,569,260,364,12480,84.318675,122.008916,100.890857,9637,9823,371.948268,0.562329,0.87536,1.142387,0.735819
38,8,11,/Users/sarahpyfrom/Dropbox/Mac/Desktop/Sarah_S...,sSP79_24_B6_CRE_001.tif,1169.541855,405.748176,1126,1217,363,449,7826,-70.662076,88.894001,82.651146,5758,5866,287.906638,0.368136,0.872927,1.145571,-0.616642


In [15]:
puncta_subset

Unnamed: 0,image_number,object_number,parent_manual_nuclei,center_x,center_y,bounding_box_min_x,bounding_box_max_x,bounding_box_min_y,bounding_box_max_y,bounding_box_area,orientation,major_axis_length,minor_axis_length,area,convex_area,perimeter,eccentricity,form_factor,compactness,integrated_intensity,min_intensity,max_intensity,mean_intensity,median_intensity,edge_integrated_intensity,edge_min_intensity,edge_max_intensity,edge_mean_intensity,nuclei_object_number,object_number_nuclei,bounding_box_min_x_nuclei,bounding_box_max_x_nuclei,bounding_box_min_y_nuclei,bounding_box_max_y_nuclei,angle
0,8,1,1.0,498.000000,69.000000,498.0,499.0,69.0,70.0,1.0,45.000000,0.000000,0.000000,1.0,1.0,0.000000,0.000000,inf,0.000000,0.021408,0.021408,0.021408,0.021408,0.021408,0.021408,0.021408,0.021408,0.021408,1.0,1.0,472.0,568.0,47.0,131.0,0.392699
1,8,1,1.0,499.875000,69.250000,499.0,502.0,68.0,72.0,12.0,5.392149,3.879071,3.114933,8.0,10.0,7.035534,0.595966,2.030980,0.492373,0.175387,0.020081,0.023667,0.021923,0.022263,0.153826,0.020081,0.023667,0.021975,1.0,1.0,472.0,568.0,47.0,131.0,0.047055
2,8,1,1.0,490.666667,72.333333,490.0,492.0,72.0,74.0,4.0,-45.000000,2.309401,1.333333,3.0,3.0,3.414214,0.816497,3.234073,0.309208,0.065339,0.020401,0.023178,0.021780,0.022469,0.065339,0.020401,0.023178,0.021780,1.0,1.0,472.0,568.0,47.0,131.0,-0.392699
3,8,1,1.0,496.000000,73.000000,496.0,497.0,73.0,74.0,1.0,45.000000,0.000000,0.000000,1.0,1.0,0.000000,0.000000,inf,0.000000,0.020157,0.020157,0.020157,0.020157,0.020157,0.020157,0.020157,0.020157,0.020157,1.0,1.0,472.0,568.0,47.0,131.0,0.392699
4,8,1,1.0,504.000000,76.000000,504.0,505.0,76.0,77.0,1.0,45.000000,0.000000,0.000000,1.0,1.0,0.000000,0.000000,inf,0.000000,0.020996,0.020996,0.020996,0.020996,0.020996,0.020996,0.020996,0.020996,0.020996,1.0,1.0,472.0,568.0,47.0,131.0,0.392699
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,8,30,27.0,452.571429,823.428571,452.0,454.0,822.0,826.0,8.0,2.042808,4.714592,1.973594,7.0,8.0,7.207107,0.908164,1.693503,0.590492,0.149294,0.020233,0.022583,0.021328,0.021370,0.149294,0.020233,0.022583,0.021328,30.0,30.0,383.0,481.0,783.0,892.0,0.017827
428,8,30,27.0,444.000000,826.500000,444.0,445.0,826.0,828.0,2.0,0.000000,2.000000,0.000000,2.0,2.0,0.000000,1.000000,inf,0.000000,0.041108,0.020401,0.020706,0.020554,0.020706,0.041108,0.020401,0.020706,0.020554,30.0,30.0,383.0,481.0,783.0,892.0,0.000000
429,8,30,27.0,451.500000,827.500000,451.0,453.0,827.0,829.0,4.0,45.000000,2.000000,2.000000,4.0,4.0,4.000000,0.000000,3.141593,0.318310,0.085023,0.020798,0.021942,0.021256,0.021454,0.085023,0.020798,0.021942,0.021256,30.0,30.0,383.0,481.0,783.0,892.0,0.392699
430,8,30,27.0,437.000000,845.000000,437.0,438.0,845.0,846.0,1.0,45.000000,0.000000,0.000000,1.0,1.0,0.000000,0.000000,inf,0.000000,0.020310,0.020310,0.020310,0.020310,0.020310,0.020310,0.020310,0.020310,0.020310,30.0,30.0,383.0,481.0,783.0,892.0,0.392699


In [None]:
# need to turn this into histogram, etc.