Decon + mirror plot

In [6]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

DIR = Path(r"F:\tissue\embedding_distance_nodrop\decon")

def list_runs(directory: Path):
    a_files = sorted(directory.rglob("*runA_mass.txt"))
    b_files = sorted(directory.rglob("*runB_mass.txt"))
    return a_files, b_files

def make_pair_key(p: Path):
    return p.name.replace("runA", "{RUN}").replace("runB", "{RUN}")

def load_mass_file(p: Path) -> pd.DataFrame:
    df = pd.read_csv(p, sep=r"\s+", header=None, names=["mass", "intensity"], engine="python")
    df = df[np.isfinite(df["mass"]) & np.isfinite(df["intensity"])].copy()
    return df

def align_runs(dfA: pd.DataFrame, dfB: pd.DataFrame) -> pd.DataFrame:
    merged = pd.merge(dfA, dfB, on="mass", how="inner", suffixes=("_A", "_B")).sort_values("mass")
    if len(merged) < min(len(dfA), len(dfB)) * 0.5:
        a, b = dfA.copy(), dfB.copy()
        a["mass_r"] = a["mass"].round(0)
        b["mass_r"] = b["mass"].round(0)
        merged = pd.merge(
            a.groupby("mass_r", as_index=False)["intensity"].mean().rename(columns={"intensity": "intensity_A"}),
            b.groupby("mass_r", as_index=False)["intensity"].mean().rename(columns={"intensity": "intensity_B"}),
            on="mass_r", how="inner"
        ).rename(columns={"mass_r": "mass"}).sort_values("mass")
    return merged

def cosine_similarity(u: np.ndarray, v: np.ndarray) -> float:
    num = float(np.dot(u, v))
    den = float(np.linalg.norm(u) * np.linalg.norm(v))
    return (num / den) if den > 0 else np.nan

def auto_ylim_from_raw(a: np.ndarray, b: np.ndarray) -> float:
    """
    Robust symmetric y-limit from RAW intensities:
    use 99th percentile of |values| across A and B, add 10% headroom.
    """
    vals = np.concatenate([np.abs(a), np.abs(b)])
    if vals.size == 0:
        return 1.0
    p99 = np.percentile(vals, 99.0)
    ymax = max(p99, vals.max(), 1e-6) * 1.10
    return float(ymax)

def plot_mirror(merged: pd.DataFrame, title: str, cos: float, out_png: Path):
    # --- NO NORMALIZATION: use raw intensities directly ---
    a_raw = merged["intensity_A"].to_numpy()
    b_raw = merged["intensity_B"].to_numpy()

    # Bar width from median spacing
    spacing = np.median(np.diff(merged["mass"])) if len(merged) >= 2 else 10.0
    bar_width = max(float(spacing) * 0.8, 0.1)

    # Dark colors
    color_A = "#8B0000"  # dark red
    color_B = "#000000"  # black

    fig, ax = plt.subplots(figsize=(12, 6))
    ax.bar(merged["mass"], a_raw,  width=bar_width, align="center",
           color=color_A, alpha=1.0, linewidth=0, label="RunA")
    ax.bar(merged["mass"], -b_raw, width=bar_width, align="center",
           color=color_B, alpha=1.0, linewidth=0, label="RunB")

    # Auto symmetric y-limits from RAW intensities (robust to spikes)
    ymax = auto_ylim_from_raw(a_raw, b_raw)
    ax.set_ylim(-ymax, ymax)

    # Cosmetics
    ax.axhline(0, linewidth=1, color="#666666")
    ax.set_xlabel("Mass (Da)")
    ax.set_ylabel("Intensity (raw)")
    ax.set_title(title)
    ax.legend(frameon=False)

    # Cosine similarity INSIDE the figure
    text = f"Cosine Similarity = {cos:.4f}" if not np.isnan(cos) else "Cosine Similarity = NaN"
    ax.text(0.98, 0.9, text, transform=ax.transAxes, fontsize=12, color="#0b62a4",
            ha="right", va="center", bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.7))

    fig.savefig(out_png, dpi=300, bbox_inches="tight")
    plt.close(fig)

# Process all pairs
a_files, b_files = list_runs(DIR)
a_dict = {make_pair_key(p): p for p in a_files}
b_dict = {make_pair_key(p): p for p in b_files}

rows = []
for key in sorted(set(a_dict) & set(b_dict)):
    a_path, b_path = a_dict[key], b_dict[key]
    try:
        dfA, dfB = load_mass_file(a_path), load_mass_file(b_path)
        merged = align_runs(dfA, dfB)
        if merged.empty:
            cos = np.nan
            status = "no_overlap"
        else:
            # Cosine from RAW intensities
            cos = cosine_similarity(merged["intensity_A"].to_numpy(), merged["intensity_B"].to_numpy())
            status = "ok"
        base = key.replace("{RUN}", "A_vs_B")
        out_png = a_path.with_name(f"mirror__{base}.png")
        if not merged.empty:
            plot_mirror(merged, f"{a_path.name} vs {b_path.name}", cos, out_png)
        rows.append({
            "pair_key": key,
            "runA": str(a_path),
            "runB": str(b_path),
            "n_points": int(len(merged)),
            "cosine_similarity": float(cos) if cos == cos else None,
            "status": status,
            "plot_png": str(out_png) if not merged.empty else None
        })
    except Exception as e:
        rows.append({
            "pair_key": key,
            "runA": str(a_path),
            "runB": str(b_path),
            "n_points": 0,
            "cosine_similarity": None,
            "status": f"error: {type(e).__name__}: {e}",
            "plot_png": None
        })

summary = pd.DataFrame(rows)
out_csv = DIR / "mirror_plot_summary_raw.csv"
summary.to_csv(out_csv, index=False)

print(f"✅ Done. Summary saved to {out_csv}")


  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="tight")
  fig.savefig(out_png, dpi=300, bbox_inches="t

✅ Done. Summary saved to F:\tissue\embedding_distance_nodrop\decon\mirror_plot_summary_raw.csv
