In [None]:
#!/usr/bin/env python3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# === User-defined parameters ===
MOUSE_FILES = {
    "AL47 - CON": "/Users/alexlawson/Masters-Data-Final/Live-imaging/Results/Statistics/Combined Stats/CON/Unknown/AL47/Track_Length.csv",
    "AL51 - CON": "/Users/alexlawson/Masters-Data-Final/Live-imaging/Results/Statistics/Combined Stats/CON/Unknown/AL51/Track_Length.csv",
    # "AL52 - COLD": "/path/to/al52.csv",
}
ID_COL          = "MicrogliaID"
VALUE_COL       = "Track Length"        # e.g., "AvgPrimaryBranchLength"
FRAME_COL       = "Frame"
TIME_PER_FRAME  = 8.0                  # minutes per frame
XTICK_LABEL     = "Control"
Y_LABEL         = "Track Length (µm)"  # or "Average Length (µm)" µm³
PLOT_TITLE      = "Total Distance (track) Travelled (µm)"       # or "Average Primary Branch Length"
SAVE_PATH       = None                 # e.g. "/tmp/soma_volume.png" to save, or None to display

# reproducible jitter (optional)
np.random.seed(42)

def _load_concat_with_mouse(mouse_files: dict) -> pd.DataFrame:
    dfs = []
    for mouse, path in mouse_files.items():
        df = pd.read_csv(path)
        df["Mouse"] = mouse
        dfs.append(df)
    df_all = pd.concat(dfs, ignore_index=True)

    # Clean & coerce
    df_all.columns = [c.strip() for c in df_all.columns]
    for col in [ID_COL, VALUE_COL, FRAME_COL]:
        if col not in df_all.columns:
            raise KeyError(f"Missing required column '{col}'. Found: {list(df_all.columns)}")
    df_all[VALUE_COL] = pd.to_numeric(df_all[VALUE_COL], errors="coerce")
    df_all[FRAME_COL] = pd.to_numeric(df_all[FRAME_COL], errors="coerce")
    df_all = df_all.dropna(subset=["Mouse", ID_COL, VALUE_COL, FRAME_COL]).copy()
    # Add integer frame and time (minutes)
    df_all["FrameInt"] = df_all[FRAME_COL].round().astype(int)
    df_all["TimeMin"]  = df_all["FrameInt"] * float(TIME_PER_FRAME)
    return df_all

def _rainbow_colors_in_order(mouse_files: dict) -> dict:
    mouse_names = list(mouse_files.keys())  # preserve user order
    cmap = plt.cm.get_cmap("rainbow", len(mouse_names))
    return {mn: cmap(i) for i, mn in enumerate(mouse_names)}

def plot_microglia():
    df_all = _load_concat_with_mouse(MOUSE_FILES)
    mouse_colors = _rainbow_colors_in_order(MOUSE_FILES)

    # ---------------- Plot 1: overall mean±SEM of per-cell means + jittered dots (color=mouse) ----------------
    # per-cell mean across all frames/time
    per_cell = (df_all
                .groupby([ "Mouse", ID_COL ], as_index=False)[VALUE_COL]
                .mean()
                .rename(columns={VALUE_COL: "CellMean"}))

    overall_mean = per_cell["CellMean"].mean()
    overall_sem  = per_cell["CellMean"].std(ddof=1) / np.sqrt(len(per_cell)) if len(per_cell) > 1 else np.nan

    fig1, ax1 = plt.subplots(figsize=(5, 6))
    bar_center = 1.0
    bar_width  = 0.32

    # mean ± SEM bar
    ax1.bar(
        bar_center,
        overall_mean,
        width=bar_width,
        yerr=(overall_sem if np.isfinite(overall_sem) else None),
        capsize=5,
        alpha=0.25,
        color="grey",
        edgecolor="grey",
        linewidth=1.0,
        label="Mean ± SEM",
        zorder=1
    )

    # jittered per-cell means (color by mouse)
    jitter = np.random.uniform(-bar_width/14, bar_width/14, size=len(per_cell))
    for (x, (_, row)) in zip(bar_center + jitter, per_cell.iterrows()):
        ax1.scatter(
            x, row["CellMean"],
            s=60, alpha=0.9,
            color=mouse_colors[row["Mouse"]],
            edgecolor="white", linewidth=0.7, zorder=2
        )

    ax1.set_xticks([bar_center]); ax1.set_xticklabels([XTICK_LABEL])
    ax1.set_ylabel(Y_LABEL); ax1.set_title(f"{PLOT_TITLE} Across Time")
    for side in ("top","right"): ax1.spines[side].set_visible(False)
    ax1.yaxis.grid(True, linestyle="-", alpha=0.2); ax1.set_axisbelow(True)

    handles1 = [
        plt.Line2D([0],[0], marker="s", color="grey", label="Mean ± SEM",
                   alpha=0.25, markersize=10, linestyle="None")
    ] + [
        plt.Line2D([0],[0], marker="o", color=mouse_colors[mn], label=mn,
                   markersize=6, linestyle="None")
        for mn in MOUSE_FILES.keys()
    ]
    ax1.legend(handles=handles1, bbox_to_anchor=(1.04, 1), loc="upper left", frameon=False)
    fig1.tight_layout()

    # ---------------- Plot 2: per-timepoint mean±SEM bars + jittered individual points (color=mouse) ----------------
    # Use FrameInt as the canonical time bin; label x in minutes to avoid float mismatch
    summary = (df_all
               .groupby("FrameInt", as_index=False)[VALUE_COL]
               .agg(mean="mean", sem=lambda s: s.std(ddof=1)/np.sqrt(len(s)) if len(s)>1 else np.nan))
    summary["TimeMin"] = summary["FrameInt"] * float(TIME_PER_FRAME)

    # bar width: 
    bar_width2 = 2.0 * float(TIME_PER_FRAME)

    fig2, ax2 = plt.subplots(figsize=(8, 6))
    ax2.bar(
        summary["TimeMin"],
        summary["mean"],
        width=bar_width2,
        yerr=summary["sem"],
        capsize=5,
        alpha=0.25,
        color="grey",
        edgecolor="grey",
        linewidth=1.0,
        zorder=1,
        label="Mean ± SEM"
    )

    # jittered points for every observation, colored by mouse, centered on each time bin
    # (so multiple mice appear on the SAME bar per timepoint)
    for mouse, color in mouse_colors.items():
        g = df_all[df_all["Mouse"] == mouse]
        x = g["TimeMin"].to_numpy()
        x_j = x + np.random.uniform(-bar_width2/4, bar_width2/4, size=len(x))
        ax2.scatter(
            x_j, g[VALUE_COL].to_numpy().astype(float),
            s=22, alpha=0.7, color=color,
            edgecolor="white", linewidth=0.5, zorder=2
        )

    ax2.set_xlabel("Time (min)")
    ax2.set_ylabel(Y_LABEL)
    ax2.set_title(f"{PLOT_TITLE} Over Time")
    for side in ("top","right"): ax2.spines[side].set_visible(False)
    ax2.yaxis.grid(True, linestyle="-", alpha=0.2); ax2.set_axisbelow(True)

    handles2 = [
        plt.Line2D([0],[0], marker="s", color="grey", label="Mean ± SEM",
                   alpha=0.25, markersize=10, linestyle="None")
    ] + [
        plt.Line2D([0],[0], marker="o", color=mouse_colors[mn], label=mn,
                   markersize=6, linestyle="None")
        for mn in MOUSE_FILES.keys()
    ]
    ax2.legend(handles=handles2, bbox_to_anchor=(1.04, 1), loc="upper left", frameon=False)
    fig2.tight_layout()

    # ---- Save or show ----
    if SAVE_PATH:
        base, ext = os.path.splitext(SAVE_PATH)
        ext = ext if ext else ".png"
        fig1.savefig(f"{base}_mean{ext}", dpi=450, bbox_inches="tight")
        fig2.savefig(f"{base}_time{ext}", dpi=450, bbox_inches="tight")
        print(f"Saved: {base}_mean{ext} and {base}_time{ext}")
    else:
        plt.show()

if __name__ == "__main__":
    plot_microglia()


KeyError: "Missing required column 'Frame'. Found: ['Track Length', 'Unit', 'Category', 'MicrogliaID', 'Mouse']"

In [89]:
#!/usr/bin/env python3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import colorsys
from itertools import cycle

# === User-defined parameters ===
MOUSE_FILES = {
    "AL47": "/Users/alexlawson/Masters-Data-Final/Live-imaging/Results/Statistics/Combined Stats/CON/Unknown/AL47/Track_Length.csv",
    "AL51": "/Users/alexlawson/Masters-Data-Final/Live-imaging/Results/Statistics/Combined Stats/CON/Unknown/AL51/Track_Length.csv",
    # "AL52 - COLD": "/path/to/al52.csv",
}
ID_COL          = "MicrogliaID"
VALUE_COL       = "Track Length"   # e.g., "AvgPrimaryBranchLength"
FRAME_COL       = "Frame"
TIME_PER_FRAME  = 8.0                       # minutes per frame
XTICK_LABEL     = "Control"
Y_LABEL         = "Soma Sphericity"         # or "Average Length (µm)"³ # or "Average Length (µm)" µm
PLOT_TITLE      = "Soma Sphericity of Microglia"         # or "Average Primary Branch Length"
SAVE_PATH       = None                      # e.g. "/tmp/soma_volume.png" to save, or None to display

# reproducible jitter (optional)
np.random.seed(42)

# -------------------- I/O & preprocessing --------------------
def _load_concat_with_mouse(mouse_files: dict) -> pd.DataFrame:
    dfs = []
    for mouse, path in mouse_files.items():
        df = pd.read_csv(path, encoding="latin1")
        df["Mouse"] = mouse
        dfs.append(df)
    df_all = pd.concat(dfs, ignore_index=True)

    # Clean & coerce
    df_all.columns = [c.strip() for c in df_all.columns]
    for col in [ID_COL, VALUE_COL, FRAME_COL]:
        if col not in df_all.columns:
            raise KeyError(f"Missing required column '{col}'. Found: {list(df_all.columns)}")
    df_all[VALUE_COL] = pd.to_numeric(df_all[VALUE_COL], errors="coerce")
    df_all[FRAME_COL] = pd.to_numeric(df_all[FRAME_COL], errors="coerce")
    df_all = df_all.dropna(subset=["Mouse", ID_COL, VALUE_COL, FRAME_COL]).copy()
    # Add integer frame and time (minutes)
    df_all["FrameInt"] = df_all[FRAME_COL].round().astype(int)
    df_all["TimeMin"]  = df_all["FrameInt"] * float(TIME_PER_FRAME)
    return df_all

# -------------------- Color helpers --------------------
def _distinct_mouse_colors(mouse_files: dict) -> dict:
    """
    Fallback palette (unused if _very_distinct_mouse_colors is used).
    """
    mice = list(mouse_files.keys())
    n = len(mice)
    base = {}
    if n <= 20:
        cmap = plt.cm.get_cmap("tab20", n)
        for i, m in enumerate(mice):
            base[m] = cmap(i)
    else:
        cmap = plt.cm.get_cmap("hsv", n)
        for i, m in enumerate(mice):
            base[m] = cmap(i)
    return base

def _very_distinct_mouse_colors(mouse_files: dict, light=0.55, sat=0.95) -> dict:
    """
    Assign very distinct base hues in a fixed, human-friendly order:
    1: red, 2: green, 3: blue, 4: magenta, 5: yellow/orange, 6: cyan, 7: purple, 8: orange, 9: teal, 10: pink...
    If you have more mice, we append evenly spaced hues.
    """
    mice = list(mouse_files.keys())
    H_BASE = [
        0.00,        # red
        1/3,         # green
        2/3,         # blue
        5/6,         # magenta
        1/6,         # yellow/orange
        0.50,        # cyan
        0.75,        # purple/violet
        1/12,        # orange
        7/12,        # teal
        11/12,       # pink/crimson
    ]
    if len(mice) <= len(H_BASE):
        hues = H_BASE[:len(mice)]
    else:
        extra = len(mice) - len(H_BASE)
        hues = H_BASE + list(np.linspace(0, 1, extra, endpoint=False))

    base = {}
    for m, h in zip(mice, hues):
        r, g, b = colorsys.hls_to_rgb(h, light, sat)  # note: colorsys uses HLS
        base[m] = (r, g, b, 1.0)
    return base

def _rgb_to_hls(rgb):
    r, g, b = rgb[:3]
    h, l, s = colorsys.rgb_to_hls(r, g, b)
    return h, l, s

def _hls_to_rgb(h, l, s):
    r, g, b = colorsys.hls_to_rgb(h, l, s)
    return (r, g, b, 1.0)

def _microglia_shades_by_mouse(df_all: pd.DataFrame, mouse_colors: dict,
                               light_min=0.35, light_max=0.78, sat=0.85) -> dict:
    """
    For each mouse, assign each microglia ID a unique shade by varying lightness (L) along the mouse's hue (H).
    Avoids extremes (no black/white). Saturation fixed to keep hues vivid.
    Returns dict keyed by (mouse, MicrogliaID) -> RGBA color.
    """
    shades = {}
    for mouse, base_rgb in mouse_colors.items():
        h_base, _, _ = _rgb_to_hls(base_rgb)
        gids = sorted(df_all.loc[df_all["Mouse"] == mouse, ID_COL].astype(str).unique())
        k = len(gids)
        if k == 1:
            Ls = [0.55]
        else:
            Ls = np.linspace(light_min, light_max, k)
        for L, gid in zip(Ls, gids):
            shades[(mouse, gid)] = _hls_to_rgb(h_base, float(L), float(sat))
    return shades

def _coerce_microglia_id_str(df: pd.DataFrame) -> pd.DataFrame:
    """Ensure Microglia ID is string so mapping keys match reliably."""
    df = df.copy()
    df[ID_COL] = df[ID_COL].astype(str)
    return df

def _maybe_make_mouse_legend(ax, mouse_colors: dict, title="Mouse", outside=True):
    if not mouse_colors:
        return
    handles = [
        plt.Line2D([0],[0], marker="o", linestyle="None",
                   color=mouse_colors[m], label=str(m), markersize=7)
        for m in sorted(mouse_colors.keys())
    ]
    if outside:
        ax.legend(handles=handles, title=title,
                  bbox_to_anchor=(1.02, 1), loc="upper left", frameon=True)
    else:
        ax.legend(handles=handles, title=title, loc="best", frameon=True)


# -------------------- Plotting --------------------
def plot_microglia():
    df_all = _load_concat_with_mouse(MOUSE_FILES)
    df_all = _coerce_microglia_id_str(df_all)

    # === Colors ===
    mouse_colors = _very_distinct_mouse_colors(MOUSE_FILES)
    microglia_shades = _microglia_shades_by_mouse(df_all, mouse_colors)

    # ---------------- Plot 1: overall mean±SEM of per-cell means + jittered dots ----------------
    per_cell = (df_all
                .groupby(["Mouse", ID_COL], as_index=False)[VALUE_COL]
                .mean()
                .rename(columns={VALUE_COL: "CellMean"}))

    overall_mean = per_cell["CellMean"].mean()
    overall_sem  = per_cell["CellMean"].std(ddof=1) / np.sqrt(len(per_cell)) if len(per_cell) > 1 else np.nan

    fig1, ax1 = plt.subplots(figsize=(4, 6), layout="constrained")
    bar_center, bar_width = 1.0, 0.32
    ax1.bar(
        bar_center, overall_mean, width=bar_width,
        yerr=(overall_sem if np.isfinite(overall_sem) else None),
        capsize=5, alpha=0.25, color="grey", edgecolor="grey",
        linewidth=1.0, label="Mean ± SEM", zorder=1
    )

    jitter = np.random.uniform(-bar_width/8, bar_width/8, size=len(per_cell))
    for (x, (_, row)) in zip(bar_center + jitter, per_cell.iterrows()):
        color = microglia_shades[(row["Mouse"], str(row[ID_COL]))]
        ax1.scatter(
            x, row["CellMean"],
            s=60, alpha=0.9,
            color=color,
            edgecolor="white", linewidth=0.7, zorder=2
        )

    ax1.set_xticks([bar_center]); ax1.set_xticklabels([XTICK_LABEL])
    ax1.set_ylabel(Y_LABEL); ax1.set_title(f"{PLOT_TITLE} Across Time")
    for side in ("top","right"): ax1.spines[side].set_visible(False)
    ax1.yaxis.grid(True, linestyle="-", alpha=0.2); ax1.set_axisbelow(True)

    # Legend: mouse-only (inside axes)
    _maybe_make_mouse_legend(ax1, mouse_colors, title="Mouse", outside=True)


    fig1.tight_layout()

    # ---------------- Plot 2: per-timepoint bars + jittered observations ------------------------
    summary = (df_all
               .groupby("FrameInt", as_index=False)[VALUE_COL]
               .agg(mean="mean", sem=lambda s: s.std(ddof=1)/np.sqrt(len(s)) if len(s)>1 else np.nan))
    summary["TimeMin"] = summary["FrameInt"] * float(TIME_PER_FRAME)
    bar_width2 = 4.0 * float(TIME_PER_FRAME)


    fig2, ax2 = plt.subplots(figsize=(9, 6), layout="constrained")
    ax2.bar(
        summary["TimeMin"], summary["mean"], width=bar_width2,
        yerr=summary["sem"], capsize=5, alpha=0.25, color="grey",
        edgecolor="grey", linewidth=1.0, zorder=1, label="Mean ± SEM"
    )

    x_jitter = np.random.uniform(-bar_width2/6, bar_width2/6, size=len(df_all))
    for (dx, (_, row)) in zip(x_jitter, df_all.iterrows()):
        color = microglia_shades[(row["Mouse"], str(row[ID_COL]))]
        ax2.scatter(
            row["TimeMin"] + dx,
            float(row[VALUE_COL]),
            s=50, alpha=0.85,
            color=color,
            edgecolor="white", linewidth=0.5, zorder=2
        )

    ax2.set_xlabel("Time (min)")
    ax2.set_ylabel(Y_LABEL)
    ax2.set_title(f"{PLOT_TITLE} Over Time")
    for side in ("top","right"): ax2.spines[side].set_visible(False)
    ax2.yaxis.grid(True, linestyle="-", alpha=0.2); ax2.set_axisbelow(True)

    # Legend: mouse-only (inside axes)
    _maybe_make_mouse_legend(ax2, mouse_colors, title="Mouse", outside=True)

    fig2.tight_layout()

    # ---- Save or show ----
    if SAVE_PATH:
        base, ext = os.path.splitext(SAVE_PATH)
        ext = ext if ext else ".png"
        fig1.savefig(f"{base}_mean{ext}", dpi=450, bbox_inches="tight")
        fig2.savefig(f"{base}_time{ext}", dpi=450, bbox_inches="tight")
        print(f"Saved: {base}_mean{ext} and {base}_time{ext}")
    else:
        plt.show()

    # Optional mapping printouts (useful for methods)
    print("\nMouse → Base color (RGBA):")
    for m, c in mouse_colors.items():
        print(f"  {m}: {tuple(round(v,3) for v in c)}")
    print("\n(Mouse, MicrogliaID) → Shade assigned")
    mids = list({(row["Mouse"], str(row[ID_COL])) for _, row in df_all.iterrows()})
    preview = mids if len(mids) <= 30 else mids[:15] + [("...", "...")] + mids[-14:]
    for key in preview:
        if key == ("...", "..."):
            print("  ...")
        else:
            print(f"  {key}: {tuple(round(v,3) for v in microglia_shades[key])}")

if __name__ == "__main__":
    plot_microglia()


KeyError: "Missing required column 'Frame'. Found: ['Track Length', 'Unit', 'Category', 'MicrogliaID', 'Mouse']"