In [1]:
import numpy as np
import pandas as pd
import scipy as sp
from scipy.spatial.distance import cdist
from matplotlib import pyplot as plt
from typing import List, Tuple
from tqdm import tqdm

In [2]:
selected_magnification = "60x"
raw_thr = 0.15
dw__thr = 5.3

# Selecting reference dots

By matching dots between `60x_dw` and `60x_raw`.

In [3]:
dots_data = pd.read_csv("/mnt/data/Imaging/202105-Deconwolf/data_210726/dots_data.clean.tsv.gz", sep="\t")

In [4]:
match_output: List[np.ndarray] = []
match_counts: List[Tuple[int, int, int, int]] = []
for series_id in tqdm(np.unique(dots_data["sid"])):
    dots_data2 = dots_data.loc[series_id == dots_data["sid"], :].copy()
    dots_data2 = dots_data2.loc[selected_magnification == dots_data2["magnification"], :].copy()
    
    raw = dots_data2.loc["raw" == dots_data2["image_type"], :].copy(
        ).sort_values("Value2", ascending=False).reset_index(drop=True)
    dw_ = dots_data2.loc[ "dw" == dots_data2["image_type"], :].copy(
        ).sort_values("Value2", ascending=False).reset_index(drop=True)
    
    selected_raw = raw.loc[raw_thr <= raw["Value2"], ("x", "y")].copy().reset_index(drop=True)
    selected__dw = dw_.loc[dw__thr <= dw_["Value2"], ("x", "y")].copy().reset_index(drop=True)
    pdist = cdist(selected_raw, selected__dw)
    
    matched: List[Tuple[int, int, float]] = []
    raw_id = 0
    while raw_id < pdist.shape[0]:
        dw__id = np.nanargmin(pdist[raw_id, :])
        if np.nanargmin(pdist[:, dw__id]) == raw_id:
            matched.append((raw_id, dw__id, pdist[raw_id, dw__id]))
            pdist[raw_id, :] = np.nan
            pdist[:, dw__id] = np.nan
        raw_id += 1

    matched_a = np.array(matched)
    match_output.append(pd.DataFrame(dict(
        series=series_id,
        raw_id=matched_a[:, 0].astype("i"),
        dw__id=matched_a[:, 1].astype("i"),
        eudist=matched_a[:, 2]
    )))
    match_counts.append((series_id, matched_a.shape[0], selected_raw.shape[0], selected__dw.shape[0]))

100%|██████████| 7/7 [00:04<00:00,  1.42it/s]


In [5]:
match_counts_a = np.array(match_counts)
p_raw = match_counts_a.sum(0)[1]/match_counts_a.sum(0)[2]*100
p__dw = match_counts_a.sum(0)[1]/match_counts_a.sum(0)[3]*100
print(f"{p_raw:.2f}% raw dots matched to dw")
print(f"{p__dw:.2f}% dw dots matched to raw")

92.36% raw dots matched to dw
92.00% dw dots matched to raw


### Remove dots with distance >= 3 px and export

In [6]:
matched_dots = pd.concat(match_output)
matched_dots.drop(matched_dots.loc[matched_dots["eudist"] >= 3,:].index, inplace=True)

In [7]:
matched_dots.to_csv("../../data/60x_reference/matched_dots.tsv", sep="\t", index=False)