In [1]:
import numpy as np
import pandas as pd
import scipy as sp
from scipy.spatial.distance import cdist
from matplotlib import pyplot as plt
from typing import List, Tuple
from tqdm import tqdm

In [2]:
selected_magnification = "60x"
raw_thr = 0.15
dw__thr = 5.3
max_dist = 3

# Selecting reference dots

By matching dots between `60x_dw` and `60x_raw`.

In [3]:
dots_data = pd.read_csv("/mnt/data/Imaging/202105-Deconwolf/data_210726/dots_data.clean.tsv.gz", sep="\t")

In [4]:
def get_unassigned_count(pdist: np.ndarray) -> int:
    return np.sum(np.nansum(np.isnan(pdist), 1) != pdist.shape[1])

def match_dots(dotsA: np.ndarray, dotsB: np.ndarray) -> List[Tuple[int, int, float]]:
    pdist = cdist(dotsA, dotsB)
    
    matched: List[Tuple[int, int, float]] = []
    unassigned_rows = pdist.shape[0] + 1
    iteration_counter = 0
    print("Iteration #", end="")
    while unassigned_rows > get_unassigned_count(pdist) and 0 < get_unassigned_count(pdist):
        iteration_counter += 1
        print(f"...{iteration_counter}", end="")

        unassigned_rows = get_unassigned_count(pdist)
        A_id = 0
        while A_id < pdist.shape[0]:
            if pdist.shape[1] == np.sum(np.isnan(pdist[A_id, :])):
                A_id += 1
                continue
            B_id = np.nanargmin(pdist[A_id, :])
            if np.nanargmin(pdist[:, B_id]) == A_id:
                matched.append((A_id, B_id, pdist[A_id, B_id]))
                pdist[A_id, :] = np.nan
                pdist[:, B_id] = np.nan
            A_id += 1
    print("")
    return matched

In [5]:
match_output: List[np.ndarray] = []
match_counts: List[Tuple[int, int, int, int]] = []
for series_id in tqdm(np.unique(dots_data["sid"])):
    dots_data2 = dots_data.loc[series_id == dots_data["sid"], :].copy()
    dots_data2 = dots_data2.loc[selected_magnification == dots_data2["magnification"], :].copy()
    
    raw = dots_data2.loc["raw" == dots_data2["image_type"], :].copy(
        ).sort_values("Value2", ascending=False).reset_index(drop=True)
    dw_ = dots_data2.loc[ "dw" == dots_data2["image_type"], :].copy(
        ).sort_values("Value2", ascending=False).reset_index(drop=True)
    
    selected_raw = raw.loc[raw_thr <= raw["Value2"], ("x", "y")].copy().reset_index(drop=True)
    selected__dw = dw_.loc[dw__thr <= dw_["Value2"], ("x", "y")].copy().reset_index(drop=True)
    
    matched: List[Tuple[int, int, float]] = match_dots(selected_raw, selected__dw)

    matched_a = np.array(matched)
    matched_field = pd.DataFrame(dict(
        series=series_id,
        raw_id=matched_a[:, 0].astype("i"),
        dw__id=matched_a[:, 1].astype("i"),
        eudist=matched_a[:, 2]
    ))
    matched_field_within_dist = matched_field[matched_field["eudist"] <= max_dist]
    match_output.append(matched_field_within_dist)
    match_counts.append((series_id,
                         matched_field_within_dist.shape[0],
                         matched_field.shape[0]-matched_field_within_dist.shape[0],
                         selected_raw.shape[0],
                         selected__dw.shape[0]))

  0%|          | 0/7 [00:00<?, ?it/s]

Iteration #...1...2...3

 14%|█▍        | 1/7 [00:00<00:04,  1.28it/s]

...4...5...6
Iteration #...1...2...3...4...5...6...7...8...9

 29%|██▊       | 2/7 [00:02<00:04,  1.05it/s]


Iteration #...1...2...3...4...5...6

 43%|████▎     | 3/7 [00:02<00:03,  1.14it/s]

...7...8
Iteration #...1...2...3...4...5...6...7...8

 57%|█████▋    | 4/7 [00:03<00:02,  1.08it/s]

...9...10...11
Iteration #...1...2...3...4...5...6

 71%|███████▏  | 5/7 [00:05<00:02,  1.13s/it]

...7


 86%|████████▌ | 6/7 [00:06<00:01,  1.00s/it]

Iteration #...1...2...3...4


100%|██████████| 7/7 [00:06<00:00,  1.02it/s]

Iteration #...1...2...3...4...5...6





In [7]:
match_counts_a = np.array(match_counts)
n_total_matched = np.sum(match_counts_a[:, 1])
n_total_over_dist = np.sum(match_counts_a[:, 2])
n_total_raw = np.sum(match_counts_a[:, 3])
n_total__dw = np.sum(match_counts_a[:, 4])
print(f"{n_total_matched}/{n_total_raw} ({n_total_matched/n_total_raw*100:.2f}%) raw dots matched to dw")
print(f"{n_total_matched}/{n_total__dw} ({n_total_matched/n_total__dw*100:.2f}%) dw dots matched to raw")
print(f"{n_total_over_dist} ({n_total_over_dist/n_total_raw*100:.2f}% raw, {n_total_over_dist/n_total__dw*100:.2f}% dw) dots matched with distance > {max_dist}")

9440/10292 (91.72%) raw dots matched to dw
9440/10333 (91.36%) dw dots matched to raw
743 (7.22% raw, 7.19% dw) dots matched with distance > 3


In [8]:
for series_id in range(match_counts_a.shape[0]):
    n_matched = match_counts_a[series_id, 1]
    n_over_dist = match_counts_a[series_id, 2]
    n_raw = match_counts_a[series_id, 3]
    n__dw = match_counts_a[series_id, 4]
    print(f"""Field #{series_id}
    {n_matched}/{n_raw} ({n_matched/n_raw*100:.2f}%) raw dots matched to dw
    {n_matched}/{n__dw} ({n_matched/n__dw*100:.2f}%) dw dots matched to raw
    {n_over_dist} ({n_over_dist/n_raw*100:.2f}% raw, {n_over_dist/n__dw*100:.2f}% dw) dots matched with distance > {max_dist}
    """)

Field #0
    1308/1411 (92.70%) raw dots matched to dw
    1308/1398 (93.56%) dw dots matched to raw
    90 (6.38% raw, 6.44% dw) dots matched with distance > 3
    
Field #1
    1921/2069 (92.85%) raw dots matched to dw
    1921/2076 (92.53%) dw dots matched to raw
    148 (7.15% raw, 7.13% dw) dots matched with distance > 3
    
Field #2
    1045/1138 (91.83%) raw dots matched to dw
    1045/1115 (93.72%) dw dots matched to raw
    70 (6.15% raw, 6.28% dw) dots matched with distance > 3
    
Field #3
    1548/1679 (92.20%) raw dots matched to dw
    1548/1684 (91.92%) dw dots matched to raw
    131 (7.80% raw, 7.78% dw) dots matched with distance > 3
    
Field #4
    2404/2648 (90.79%) raw dots matched to dw
    2404/2575 (93.36%) dw dots matched to raw
    171 (6.46% raw, 6.64% dw) dots matched with distance > 3
    
Field #5
    691/759 (91.04%) raw dots matched to dw
    691/809 (85.41%) dw dots matched to raw
    68 (8.96% raw, 8.41% dw) dots matched with distance > 3
    
Field

### Export

In [9]:
matched_dots = pd.concat(match_output)

In [10]:
matched_dots.to_csv("../../data/60x_reference/matched_dots.global_thr.tsv", sep="\t", index=False)

In [11]:
pd.DataFrame(match_counts_a, columns=["series_id", "matched", "matched_over_dist", "raw_total", "dw_total"]
            ).to_csv("../../data/60x_reference/matched_dots.global_thr.counts.tsv", sep="\t", index=False)