In [1]:
# this is close enough for now, should put into package before continuing to develop

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import t
import matplotlib.pyplot as plt

from bokeh.plotting import show
from bokeh.models.annotations import Title
from bokeh.models import Plot, ColumnDataSource, Ellipse, Grid, LinearAxis, Text
from bokeh.io import output_notebook, export_png

from harrison_functions.utils.std.text import camel_to_snake_case
from harrison_functions.utils.plotting.plotly import (plot_single_scatter,
                                                      plot_multiple_scatter,
                                                      plot_violin, save_fig_as_png)

pd.options.display.max_columns = None
output_notebook()

# troubleshooting
# from selenium import webdriver
# driver = webdriver.Firefox(executable_path='/home/harrisonized/geckodriver')

In [2]:
save=False

In [341]:
def min_vol_ellipse(P, tolerance=0.01):
    # [A , c] = Mnp.linalg.involEllipse(P, tolerance)
    # Finds the minimum volume enclsing ellipsoid (MVEE) of a set of data
    # points stored in matrix P. The following optimization problem is solved: 
    #
    # minimize       log(det(A))
    # subject to     (P_i - c)' * A * (P_i - c) <= 1
    #                
    # in variables A and c, where P_i is the i-th column of the matrix P. 
    # The solver is based on Khachiyan Algorithm, and the final solution 
    # is different from the optimal value by the pre-spesified amount of 'tolerance'.
    #
    # inputs:
    #---------
    # P : (d x N) dimnesional matrix containing N points in R^d.
    # tolerance : error in the solution with respect to the optimal value.
    #
    # outputs:
    #---------
    # A : (d x d) matrix of the ellipse equation in the 'center form': 
    # (x-c)' * A * (x-c) = 1 
    # c : 'd' dimensional vector as the center of the ellipse. 
    # 
    # example:
    # --------
    #      P = rand(5,100)
    #      [A, c] = Mnp.linalg.involEllipse(P, .01)
    #
    #      To reduce the computation time, work with the boundary points only:
    #      
    #      K = convhulln(P')  
    #      K = unique(K(:))  
    #      Q = P(:,K)
    #      [A, c] = Mnp.linalg.involEllipse(Q, .01)
    #
    #
    # Nima Moshtagh (nima@seas.upenn.edu)
    # University of Pennsylvania
    #
    # December 2005
    # UPDATE: Jan 2009
    ##################### Solving the Dual problem###########################5
    # ---------------------------------
    # data points 
    # -----------------------------------
    
    d, N = P.shape
    if N <= d:
        return np.nan, np.nan, np.nan, np.nan, np.nan
    
    # Q = np.zeros((d+1,N))
    # Q(1:d,:) = P(1:d,1:N)
    # Q(d+1,:) = np.ones(1,N)
    Q = np.vstack([P, np.ones((1, N))])

    # initializations
    # -----------------------------------
    count = 1
    err = 1
    u = (1/N) * np.ones((N, 1))  # 1st iteration
    
    # Khachiyan Algorithm
    # -----------------------------------
    while err > tolerance:
        try:
            X = np.dot(np.dot(Q, u* np.identity(N)), np.transpose(Q))
            M = np.diag(np.dot(np.dot(np.transpose(Q), np.linalg.inv(X)), Q))  # M the np.diagonal vector of an NxN matrix

            j = np.argmax(M)
            maximum = max(M)

            step_size = (maximum - d -1)/((d+1)*(maximum-1))
            new_u = (1 - step_size)*u 
            new_u[j] = new_u[j] + step_size

            count = count + 1
            err = np.linalg.norm(new_u - u)
            u = new_u
        except:
            print(P)
            break
                    
    ################### Computing the Ellipse parameters######################
    # Finds the ellipse equation in the 'center form': 
    # (x-c)' * A * (x-c) = 1
    # It computes a dxd matrix 'A' and a d dimensional vector 'c' as the center
    # of the ellipse. 
    U = u * np.identity(N)
    # return P, u
    
    # the A matrix for the ellipse
    # A = (1/d) * inv(P * U * P' - (P * u)*(P*u)' );
    # --------------------------------------------
    A = (1/d) * np.linalg.inv(
        np.dot(np.dot(P, U), np.transpose(P)) - np.dot(np.dot(P, u), np.transpose(np.dot(P, u)))
    )
    
    # center of the ellipse 
    # --------------------------------------------
    c = np.dot(P, u)
    
    # original return value
    # return A, c
    
    center_x = c[0][0]
    center_y = c[1][0]

    u, s, vh = np.linalg.svd(A)
    # return u, s, vh  # troubleshooting
    
    try:
        items = np.sort(np.sqrt(1/s))
        minor_axis_length, major_axis_length = items[0], items[1]
    except:
        print(items)
        minor_axis_length, major_axis_length = np.nan, np.nan
    
    orientation = -np.arcsin(u[0][0])*180/np.pi
    
    return center_x, center_y, minor_axis_length, major_axis_length, orientation

    # bounding box
    inv_A = np.linalg.inv(A)  # diagonals are the important terms

    min_x = center_x - np.sqrt(inv_A[0][0])
    max_x = center_x + np.sqrt(inv_A[0][0])
    min_y = center_y - np.sqrt(inv_A[1][1])
    max_y = center_y + np.sqrt(inv_A[1][1])
    
    return center_x, center_y, min_x, max_x, min_y, max_y

In [224]:
def find_nearest_point(point, points:list):
    """O(n^2) algorithm to find the nearest point
    Can make this faster with binary search on one of the variables
    However, since this is a small dataset (20 nuclei per image), whatever
    
    >>> find_nearest_point(
        point=(281.415801, 135.945238),
        points=[(693.094713, 59.080090), (295.184921, 118.996760), (282.528024, 182.998269)],
    )
    (295.184921, 118.99676)
    """
    
    d = np.inf
    for x, y in points:
        d_current = np.sqrt((point[0]-x)**2+(point[1]-y)**2)
        if d_current < d:
            closest_point = (x, y)
            d = d_current
        
    return closest_point

def flatten_columns(multicols):
    """Flattens a 2 level multi-index
    """
    return [f'{cols[0].lower()}_{cols[1]}'.strip('_') for cols in multicols]

In [185]:
puncta_cols = [
    'image_number', 'object_number', 'parent_manual_nuclei', 'center_x',
    'center_y', 'bounding_box_min_x', 'bounding_box_max_x',
    'bounding_box_min_y', 'bounding_box_max_y', 'bounding_box_area',
    'orientation', 'major_axis_length', 'minor_axis_length', 'area',
    'convex_area', 'perimeter', 'eccentricity', 'form_factor',
    'compactness', 'integrated_intensity', 'min_intensity', 'max_intensity',
    'mean_intensity', 'median_intensity', 'edge_integrated_intensity',
    'edge_min_intensity', 'edge_max_intensity', 'edge_mean_intensity',
]

# Data

In [6]:
# read in data
nuclei = pd.read_csv("data/nuclei_subset.csv")
puncta = pd.read_csv("data/puncta_subset.csv")

In [7]:
# ----------------------------------------------------------------------
# Reassign nuclei
puncta_centers = (
    puncta
    .groupby(["image_number", "parent_manual_nuclei"])[["center_x", "center_y"]]
    .mean()
    .reset_index()
)
puncta_centers['center'] = puncta_centers[['center_x', 'center_y']].apply(list, axis=1)


# use find_nearest_point to find the center of the closest nuclei
# there are more nuclei than puncta, so this is fine
puncta_centers[["closest_nuclei_x", "closest_nuclei_y"]] = pd.DataFrame(
    puncta_centers[['image_number', 'center']].apply(
    lambda x: find_nearest_point(
        point=x['center'],
        points=nuclei.loc[(nuclei['image_number']==x['image_number']),
                          ["center_x", "center_y"]].to_records(index=False)
    )
    , axis=1).to_list(),
    columns=["closest_nuclei_x", "closest_nuclei_y"],
)

# left join nuclei_table on closest_nuclei_x and closest_nuclei_y
puncta_centers['nuclei_object_number'] = pd.merge(
    left=puncta_centers[["closest_nuclei_x", "closest_nuclei_y", 'image_number', 'parent_manual_nuclei']],
    right=nuclei[['center_x', 'center_y', 'image_number', 'object_number']],
    left_on=["closest_nuclei_x", "closest_nuclei_y", 'image_number',],
    right_on=['center_x', 'center_y', 'image_number',],
    how='left',
    suffixes=('', '_nuclei')
)['object_number']


# add back to puncta
puncta = pd.merge(
    left=puncta[puncta_cols],
    right=puncta_centers[['image_number', 'parent_manual_nuclei', 'nuclei_object_number']],
    left_on=['image_number', 'parent_manual_nuclei'],
    right_on=['image_number', 'parent_manual_nuclei',],
    how='left',
    suffixes=('', '_')
)


# filter puncta that are too far away from the nuclei
puncta = pd.merge(
    left=puncta[list(puncta_cols)+['nuclei_object_number']],
    right=nuclei[['image_number', 'object_number', 'bounding_box_min_x', 'bounding_box_max_x', 'bounding_box_min_y', 'bounding_box_max_y']],
    left_on=['image_number', 'nuclei_object_number'],
    right_on=['image_number', 'object_number'],
    how='left',
    suffixes=('', '_nuclei')
)  # left join nuclei data

puncta = puncta[
    (puncta['center_x'] >= puncta['bounding_box_min_x_nuclei']) & 
    (puncta['center_x'] <= puncta['bounding_box_max_x_nuclei']) &
    (puncta['center_y'] >= puncta['bounding_box_min_y_nuclei']) &
    (puncta['center_y'] <= puncta['bounding_box_max_y_nuclei'])
].copy()  # filter


# regenerate puncta_centers using filtered data
puncta_centers = (
    puncta
    .groupby(["image_number", "nuclei_object_number"])[["center_x", "center_y"]]
    .mean()
    .reset_index()
)

# Filter

In [217]:
# filters
nuclei_tmp = nuclei[
    (nuclei['eccentricity'] < 0.69)
    & (nuclei['major_axis_length'] < 128)
].copy()

puncta = pd.merge(
    left=nuclei_tmp[["image_number", 'object_number']],
    right=puncta.loc[:, puncta.columns != 'object_number'],
    left_on=["image_number", 'object_number'],
    right_on=['image_number', 'nuclei_object_number'],
    how="left",
).dropna(subset=['nuclei_object_number'])  # left join without duplicates

# Draw Boundaries around Puncta

In [218]:
nuclei['effective_radius_nuclei'] = nuclei['area'].apply(lambda x: np.sqrt(x/3.14159))

In [219]:
puncta_summary = puncta.groupby(["image_number", "nuclei_object_number"]).agg(
        {
            "area": [sum, "count"],
            "integrated_intensity": sum,
            "center_x": [np.mean, np.std],
            "center_y": [np.mean, np.std],
        }
    ).reset_index()
puncta_summary.columns = flatten_columns(puncta_summary.columns)

# derive effective radius
puncta_summary["center_std"] = np.sqrt(puncta_summary["center_x_std"]**2+puncta_summary["center_y_std"]**2)
puncta_summary["effective_radius_puncta"] = puncta_summary["center_std"].apply(lambda x: x*t.ppf(0.90, 2))  # 90% CI

# fillna
puncta_summary.loc[puncta_summary["effective_radius_puncta"].isna(), "effective_radius_puncta"
] = puncta_summary.loc[puncta_summary["effective_radius_puncta"].isna(), "area_sum"].apply(
    lambda x: np.sqrt(x / 3.14159)
)
puncta_summary["bounding_box_min_x"] = puncta_summary["center_x_mean"] - puncta_summary["effective_radius_puncta"]
puncta_summary["bounding_box_max_x"] = puncta_summary["center_x_mean"] + puncta_summary["effective_radius_puncta"]
puncta_summary["bounding_box_min_y"] = puncta_summary["center_y_mean"] - puncta_summary["effective_radius_puncta"]
puncta_summary["bounding_box_max_y"] = puncta_summary["center_y_mean"] + puncta_summary["effective_radius_puncta"]

puncta_summary

Unnamed: 0,image_number,nuclei_object_number,area_sum,area_count,integrated_intensity_sum,center_x_mean,center_x_std,center_y_mean,center_y_std,center_std,effective_radius_puncta,bounding_box_min_x,bounding_box_max_x,bounding_box_min_y,bounding_box_max_y
0,3,2.0,36.0,10,0.826917,281.415801,3.973904,135.945238,3.504848,5.298667,9.991262,271.424539,291.407063,125.953976,145.936500
1,3,3.0,41.0,6,1.006195,308.584722,2.340527,197.160417,4.868488,5.401874,10.185872,298.398850,318.770594,186.974545,207.346289
2,3,5.0,8.0,1,0.211475,314.875000,,197.875000,,,1.595770,313.279230,316.470770,196.279230,199.470770
3,3,7.0,62.0,5,1.647883,1101.156032,2.224032,380.154921,3.402007,4.064477,7.664051,1093.491981,1108.820083,372.490870,387.818972
4,3,10.0,286.0,28,8.031144,223.689128,9.258510,448.006676,8.897830,12.841005,24.213231,199.475897,247.902359,423.793446,472.219907
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,20,49.0,4.0,1,0.091157,661.500000,,867.500000,,,1.128380,660.371620,662.628380,866.371620,868.628380
93,20,50.0,37.0,10,0.870710,142.769881,7.776392,923.256310,6.621799,10.213740,19.259213,123.510668,162.029094,903.997096,942.515523
94,20,51.0,175.0,16,4.513405,1060.745804,3.282882,907.430531,5.931118,6.779046,12.782693,1047.963111,1073.528497,894.647838,920.213224
95,20,52.0,19.0,15,0.433860,256.533333,14.516575,889.666667,11.482388,18.508814,34.900554,221.632779,291.433887,854.766113,924.567221


In [220]:
# get nuclei boundaries
shapes = list(
    puncta_summary.loc[
        (puncta_summary['image_number']==3)
        , ["bounding_box_min_x",
         "bounding_box_max_x",
         "bounding_box_min_y",
         "bounding_box_max_y",]
    ]
    .rename(columns={
                "bounding_box_min_x": "x0",
                "bounding_box_max_x": "x1",
                "bounding_box_min_y": "y0",
                "bounding_box_max_y": "y1",}
           )
    .apply(lambda x: {**{"type": "circle", 'xref':"x", 'yref':"y", 'line':{'width':1.5}}, **dict(x)}, axis=1)
)

In [221]:
# plot puncta
fig = plot_single_scatter(
    puncta[puncta['image_number']==3].copy(),
    x='center_x',
    y='center_y',
    title='Puncta',
    xlabel='x',
    ylabel='y'
)

fig.layout.update(
    xaxis = {'range': [-50, 1250], 'constrain': "domain"},
    yaxis = {'range': [1050, -50], 'scaleanchor': 'x', 'scaleratio': 1},
    shapes=shapes,
    height=700,
)
fig.update_traces(
    marker=dict(size=3)
)

if save:
    save_fig_as_png(fig, 'figures/bounding_boxes/puncta_bounding_box_3.png', height=800, scale=1)

fig

# SVD Around Puncta

In [186]:
puncta['center'] = puncta[['center_x', 'center_y']].apply(list, axis=1)
tmp = puncta.groupby(['image_number', 'object_number'])[['center']].agg(list).reset_index().copy()
P = np.transpose(np.array(tmp.loc[0, 'center']))

In [363]:
P = np.array(tmp[(tmp['image_number']==image_number) & (tmp['object_number']==3)]['center'].iloc[0]).T
P

array([[310.        , 309.25      , 306.8       , 311.33333333,
        309.25      , 304.875     ],
       [188.        , 196.        , 198.4       , 198.66666667,
        200.33333333, 201.5625    ]])

In [364]:
# sanity check
# center_x, center_y, min_x, max_x, min_y, max_y = min_vol_ellipse(P, tolerance=0.01)
center_x, center_y, minor_axis_length, major_axis_length, orientation = min_vol_ellipse(P, tolerance=0.01)
center_x, center_y, minor_axis_length, major_axis_length, orientation

(308.7364972773019,
 196.13880627214218,
 3.2598210352610546,
 8.435214186404822,
 74.04746228573099)

In [348]:
tmp[["center_x", "center_y", "minor_axis_length", "major_axis_length", "orientation"]] = pd.DataFrame(
    tmp["center"]
    .apply(lambda x: min_vol_ellipse(np.transpose(np.array(x)), tolerance=0.001))
    .to_list()
)

In [358]:
tmp[(tmp['image_number']==image_number) & (tmp['object_number']==3)]

Unnamed: 0,image_number,object_number,center,center_x,center_y,minor_axis_length,major_axis_length,orientation,center_x2,center_y2
1,3,3,"[[310.0, 188.0], [309.25, 196.0], [306.8, 198....",308.737698,196.07284,3.279052,8.523844,74.001168,308.584722,197.160417


In [344]:
tmp[["center_x2", "center_y2"]] = pd.DataFrame(tmp['center'].apply(lambda x: np.mean(np.array(x), axis=0)).to_list())

In [386]:
image_number = 3

# subset by image
nuclei_subset = tmp[tmp['image_number']==image_number].copy()
puncta_subset = pd.merge(
    left=nuclei_subset[["image_number", 'object_number']],
    right=puncta.loc[:, puncta.columns != 'object_number'],
    left_on=["image_number", 'object_number'],
    right_on=['image_number', 'nuclei_object_number'],
    how="left",
).dropna(subset=['nuclei_object_number'])


# add nuclei
nuclei_subset['angle'] = nuclei_subset['orientation'].apply(lambda x: x/180*np.pi)
nuclei_source = ColumnDataSource(nuclei_subset.loc[
    # (nuclei_subset['object_number']==2)
    :, ["object_number", "center_x", "center_y", "major_axis_length", "minor_axis_length", "angle"]
].rename(
    columns={
        "center_x": "x",
        "center_y": "y",
        "major_axis_length": "h",
        "minor_axis_length": "w"
    }
).to_dict("list"))
nuclei_glyph = Ellipse(x="x", y="y", width="w", height="h", angle='angle', line_color='#FFFFFF', fill_color='#000fff', line_width=1.2)
text_glyph = Text(x="x", y="y", text="object_number", text_color="white", text_font_size = {'value': '13px'})


# add puncta
puncta_subset['angle'] = puncta_subset['orientation'].apply(lambda x: x/180*np.pi)

puncta_source = ColumnDataSource(puncta_subset[
    ["center_x", "center_y", "major_axis_length", "minor_axis_length", "angle"]
].rename(
    columns={
        "center_x": "x",
        "center_y": "y",
        "major_axis_length": "h",
        "minor_axis_length": "w"
    }
).to_dict("list"))
puncta_glyph = Ellipse(x="x", y="y", width="w", height="h", angle='angle', fill_color='#ff2b00', line_alpha=0, )

puncta_text_source = ColumnDataSource(puncta_centers.loc[
    (puncta_centers['image_number']==image_number)
    & (puncta_centers['nuclei_object_number'].isin(nuclei_subset['object_number']))
    , ["image_number", "nuclei_object_number", "center_x", "center_y",]
].to_dict("list"))
puncta_text_glyph = Text(
    x="center_x", y="center_y", text="nuclei_object_number", text_color="orange", text_font_size = {'value': '13px'}
)


# add puncta
plot = Plot(
    title=Title(text=f"Image {image_number}"),
    width=1000, height=800,
    match_aspect=True,
    toolbar_location=None
)
plot.add_glyph(nuclei_source, nuclei_glyph)
plot.add_glyph(nuclei_source, text_glyph)
plot.add_glyph(puncta_source, puncta_glyph)
plot.add_glyph(puncta_text_source, puncta_text_glyph)


xaxis = LinearAxis()
plot.add_layout(xaxis, 'above')
plot.x_range.start = 0
plot.x_range.end = 1290

yaxis = LinearAxis()
plot.add_layout(yaxis, 'left')
plot.y_range.start = 1000
plot.y_range.end = 0

plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))

show(plot)

In [385]:
image_number = 3

# subset by image
nuclei_subset = nuclei[nuclei['image_number']==image_number].copy()
puncta_subset = pd.merge(
    left=nuclei_subset[["image_number", 'object_number']],
    right=puncta.loc[:, puncta.columns != 'object_number'],
    left_on=["image_number", 'object_number'],
    right_on=['image_number', 'nuclei_object_number'],
    how="left",
).dropna(subset=['nuclei_object_number'])


# add nuclei
nuclei_subset['angle'] = nuclei_subset['orientation'].apply(lambda x: x/180*np.pi)
nuclei_source = ColumnDataSource(nuclei_subset.loc[
    # (nuclei_subset['object_number']==2)
    :, ["object_number", "center_x", "center_y", "major_axis_length", "minor_axis_length", "angle"]
].rename(
    columns={
        "center_x": "x",
        "center_y": "y",
        "major_axis_length": "h",
        "minor_axis_length": "w"
    }
).to_dict("list"))
nuclei_glyph = Ellipse(x="x", y="y", width="w", height="h", angle='angle', line_color='#FFFFFF', fill_color='#000fff', line_width=1.2)
text_glyph = Text(x="x", y="y", text="object_number", text_color="white", text_font_size = {'value': '13px'})


# add puncta
puncta_subset['angle'] = puncta_subset['orientation'].apply(lambda x: x/360*np.pi)

puncta_source = ColumnDataSource(puncta_subset[
    ["center_x", "center_y", "major_axis_length", "minor_axis_length", "angle"]
].rename(
    columns={
        "center_x": "x",
        "center_y": "y",
        "major_axis_length": "h",
        "minor_axis_length": "w"
    }
).to_dict("list"))
puncta_glyph = Ellipse(x="x", y="y", width="w", height="h", angle='angle', fill_color='#ff2b00', line_alpha=0, )

puncta_text_source = ColumnDataSource(puncta_centers.loc[
    (puncta_centers['image_number']==image_number)
    & (puncta_centers['nuclei_object_number'].isin(nuclei_subset['object_number']))
    , ["image_number", "nuclei_object_number", "center_x", "center_y",]
].to_dict("list"))
puncta_text_glyph = Text(
    x="center_x", y="center_y", text="nuclei_object_number", text_color="orange", text_font_size = {'value': '13px'}
)


# add puncta
plot = Plot(
    title=Title(text=f"Image {image_number}"),
    width=1000, height=800,
    match_aspect=True,
    toolbar_location=None
)
plot.add_glyph(nuclei_source, nuclei_glyph)
plot.add_glyph(nuclei_source, text_glyph)
plot.add_glyph(puncta_source, puncta_glyph)
plot.add_glyph(puncta_text_source, puncta_text_glyph)

xaxis = LinearAxis()
plot.add_layout(xaxis, 'above')
plot.x_range.start = 625
plot.x_range.end = 680

yaxis = LinearAxis()
plot.add_layout(yaxis, 'left')
plot.y_range.start = 540
plot.y_range.end = 440

plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))

show(plot)

In [127]:
puncta_subset[
    ["center_x", "center_y", "major_axis_length", "minor_axis_length", "angle"]
].rename(
    columns={
        "center_x": "x",
        "center_y": "y",
        "major_axis_length": "h",
        "minor_axis_length": "w"
    }
)

Unnamed: 0,x,y,h,w,angle
1,275.000000,131.000000,0.000000,0.000000,0.392699
2,280.000000,131.000000,0.000000,0.000000,0.392699
3,285.500000,133.000000,2.000000,0.000000,0.785397
4,280.142857,134.785714,5.195207,3.772838,-0.378292
5,277.000000,136.000000,0.000000,0.000000,0.392699
...,...,...,...,...,...
356,813.080000,932.240000,6.719555,5.297507,0.198468
357,75.400000,871.000000,3.200000,2.529822,0.785397
358,77.000000,876.000000,0.000000,0.000000,0.392699
359,83.800000,877.400000,3.098387,1.788854,-0.624522


In [125]:
puncta_source

# Scatter Plots

In [14]:
summary = pd.merge(
    left=nuclei[['image_number', 'object_number', 'area', 'effective_radius_nuclei']],
    right=puncta_summary,
    left_on=['image_number', 'object_number'],
    right_on=['image_number', 'nuclei_object_number'],
    suffixes=('_nuclei', '_puncta'),
    how='left'
).fillna({'area_puncta': 0, 'integrated_intensity': 0})

# fillna
summary['nuclei_object_number'] = summary['object_number']
summary.loc[
    (summary["effective_radius_puncta"].isna()), "effective_radius_puncta"
] = summary.loc[
    (summary["effective_radius_puncta"].isna()), "effective_radius_nuclei"
] 

summary['pct_puncta'] = (
    summary['effective_radius_puncta'] / summary['effective_radius_nuclei']
).fillna(0)

summary.loc[summary['integrated_intensity_sum'].isna(), 'integrated_intensity_sum'] = 0

summary['image_number_str'] = summary['image_number'].astype(str)

In [15]:
fig = plot_multiple_scatter(
    summary,
    x='area_sum',
    y='integrated_intensity_sum',
    c='image_number_str',
    xlabel='Total Area',
    ylabel='Total Integrated Intensity',
    title='Puncta Total Intensity vs. Total Area'
)

if save:
    save_fig_as_png(fig, 'figures/scatter/scatter_intensity_area.png', height=800, scale=1)

fig

In [16]:
fig = plot_multiple_scatter(
    summary,
    x='pct_puncta',
    y='integrated_intensity_sum',
    c='image_number_str',
    xlabel='Effective Radius Ratio Puncta / Nucleus',
    ylabel='Total Integrated Intensity',
    title='Puncta Total Intensity vs. Effective Radius Ratio'
)

if save:
    save_fig_as_png(fig, 'figures/scatter/scatter_intensity_radius_ratio.png',
                    height=800, scale=1)

fig

In [17]:
# check why some are gt 1
summary[(summary['effective_radius_puncta'] > summary['effective_radius_nuclei'])]

Unnamed: 0,image_number,object_number,area,effective_radius_nuclei,nuclei_object_number,area_sum,area_count,integrated_intensity_sum,center_x_mean,center_x_std,center_y_mean,center_y_std,center_std,effective_radius_puncta,bounding_box_min_x,bounding_box_max_x,bounding_box_min_y,bounding_box_max_y,pct_puncta,image_number_str,color
70,18,11,4201,36.568032,11,11.0,6.0,0.322499,317.0,27.299166,345.138889,20.251863,33.990917,64.093887,252.906113,381.093887,281.045002,409.232776,1.75273,18,#2ca02c
73,18,14,7615,49.233442,14,6.0,5.0,0.175021,221.4,20.305172,395.1,17.096783,26.544303,50.052417,171.347583,271.452417,345.047583,445.152417,1.016635,18,#2ca02c
98,20,10,4872,39.380288,10,17.0,9.0,0.386191,670.185185,15.198786,208.851852,20.511082,25.528564,48.137121,622.048064,718.322306,160.714731,256.988973,1.222366,20,#d62728
115,20,27,4565,38.119363,27,46.0,21.0,1.051164,621.704762,13.251852,565.461905,17.551588,21.992495,41.469446,580.235315,663.174208,523.992458,606.931351,1.087884,20,#d62728
116,20,28,5749,42.778091,28,15.0,9.0,0.341527,319.518519,21.523799,550.592593,13.758864,25.54565,48.16934,271.349178,367.687859,502.423253,598.761933,1.126028,20,#d62728
135,20,47,7351,48.372492,47,17.0,7.0,0.395056,490.452381,18.939649,846.588435,19.741262,27.357408,51.585623,438.866758,542.038004,795.002813,898.174058,1.066425,20,#d62728


In [19]:
fig = plot_violin(
    summary,
    x='image_number_str',
    y='pct_puncta',
    xlabel='Image Number',
    ylabel='Effective Radius Ratio Puncta / Nucleus',
    title='Distribution of Puncta / Nucleus Ratio'
)

fig.update_xaxes(type='category')

if save:
    save_fig_as_png(fig, 'figures/scatter/violin_radius_ratio.png',
                    height=800, scale=1)
fig