# TCR Repertoire Analysis

Analysis of TCR sequencing data. Primary alignment/assembly performed with MiXCR; this notebook summarizes clonotype statistics, chain usage, and figures.

**Reproducibility & usage**
- Run cells top-to-bottom.
- Python ≥3.10. Install dependencies from `environment.yml` or `requirements.txt`.
- Seeds fixed for numpy/python; figure saving paths are centralized.

In [None]:
# ---- Standard imports & config ----
import os, sys
from pathlib import Path
import json, math, random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Reproducibility
random.seed(1337)
np.random.seed(1337)

# Project paths (edit as needed)
PROJ_ROOT = Path("..").resolve().parent if (Path.cwd()/".here").exists() else Path(".").resolve()
DATA_DIR = PROJ_ROOT / "data"
RESULTS_DIR = PROJ_ROOT / "results"
FIG_DIR = PROJ_ROOT / "figures"
for d in [DATA_DIR, RESULTS_DIR, FIG_DIR]:
    d.mkdir(parents=True, exist_ok=True)

# Matplotlib defaults for publication
plt.rcParams.update({
    "figure.dpi": 150,
    "savefig.bbox": "tight",
    "axes.spines.top": False,
    "axes.spines.right": False,
    "axes.grid": True,
    "grid.alpha": 0.2,
})

def savefig(name, ext="png"):
    out = FIG_DIR / f"{name}.{ext}"
    plt.savefig(out)
    print(f"[saved] {out}")

In [None]:
import os, re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# -----------------------
# CONFIG
# -----------------------
base_dir = "WN"
condition_map = {
    "m9":  "aCD3 + AAV6",
    "m10": "VSVG-WT + Ark312",
    "m11": "VSVG-WT + Ark312",
    "m12": "VSVG-WT + Ark312",
    "m13": "aCD3 + Ark312",
    "m14": "aCD3 + Ark312",
    "m15": "aCD3 + Ark312",
}
# confidence filters (adjust as you like)
MIN_READCOUNT    = 3
MIN_READFRACTION = 0.001

# order rows in this sequence
condition_order = ["aCD3 + AAV6", "VSVG-WT + Ark312", "aCD3 + Ark312"]

# -----------------------
# LOAD
# -----------------------
dfs = []
for mouse in condition_map:
    fp = os.path.join(base_dir, mouse, "clones_TRB.tsv")
    if os.path.exists(fp):
        df = pd.read_csv(fp, sep="\t")
        df["Mouse"] = mouse
        df["Sample"] = mouse  # keep per-mouse pies
        df["Condition"] = condition_map[mouse]
        dfs.append(df)

if not dfs:
    raise FileNotFoundError("No 'clones_TRB.tsv' found under WN/m*/")

combined_df = pd.concat(dfs, ignore_index=True)

# robustly pick count/fraction columns
count_col = "readCount" if "readCount" in combined_df.columns else \
            ("cloneCount" if "cloneCount" in combined_df.columns else None)
frac_col  = "readFraction" if "readFraction" in combined_df.columns else \
            ("cloneFraction" if "cloneFraction" in combined_df.columns else None)
if count_col is None or frac_col is None:
    raise KeyError(f"Need count/fraction columns; have: {combined_df.columns.tolist()}")

# -----------------------
# FILTER (confident clones)
# -----------------------
filtered_df = combined_df[
    (combined_df[count_col] >= MIN_READCOUNT) &
    (combined_df[frac_col]  >= MIN_READFRACTION)
].copy()

# -----------------------
# LAYOUT HELPERS
# -----------------------
def mouse_key(m):
    # sort m9, m10, m11 ... numerically
    nn = re.findall(r"\d+", m)
    return int(nn[0]) if nn else 0

# mice present per condition
mice_by_cond = {
    cond: sorted(filtered_df.loc[filtered_df["Condition"] == cond, "Mouse"].unique(), key=mouse_key)
    for cond in condition_order
}
# drop empty rows
mice_by_cond = {c: ms for c, ms in mice_by_cond.items() if len(ms) > 0}

rows = len(mice_by_cond)
cols = max(len(ms) for ms in mice_by_cond.values())

In [None]:
import itertools

def make_color_palette(n):
    """
    Generate n visually distinct pastel-like colors
    by combining multiple matplotlib qualitative colormaps.
    """
    cmaps = [plt.cm.tab20, plt.cm.Set3, plt.cm.Pastel1, plt.cm.Pastel2, plt.cm.Accent]
    # sample each colormap densely
    colors = list(itertools.chain.from_iterable(
        [cmap(np.linspace(0, 1, cmap.N)) for cmap in cmaps]
    ))
    # repeat if not enough colors
    reps = int(np.ceil(n / len(colors)))
    colors = colors * reps
    return colors[:n]

# -----------------------
# PLOTTING
# -----------------------
fig, axes = plt.subplots(rows, cols, figsize=(4.8*cols, 4.8*rows))
if rows == 1 and cols == 1:
    axes = np.array([[axes]])
elif rows == 1:
    axes = axes.reshape(1, -1)
elif cols == 1:
    axes = axes.reshape(-1, 1)

row_idx = 0
for cond in condition_order:
    if cond not in mice_by_cond:
        continue
    mice = mice_by_cond[cond]
    for col_idx in range(cols):
        ax = axes[row_idx, col_idx]
        if col_idx >= len(mice):
            ax.axis("off")
            continue

        mouse = mice[col_idx]
        sample_df = filtered_df[(filtered_df["Condition"] == cond) & (filtered_df["Sample"] == mouse)].copy()
        sample_df = sample_df.sort_values(by=frac_col, ascending=False)

        values = sample_df[frac_col].values
        if len(values) == 0:
            ax.axis("off")
            continue

        # pastel-like colors sized to this pie
        colors = make_color_palette(len(values))

        ax.pie(values,
               labels=None,
               autopct=None,
               colors=colors,
               startangle=90,
               wedgeprops={"linewidth": 0.0})

        # title and n=
        ax.set_title(f"{mouse} ({cond})", fontsize=11, pad=10)
        ax.text(0.5, 1.08, f"n = {len(values)}",
                ha="center", va="bottom", transform=ax.transAxes, fontsize=11)

    axes[row_idx, 0].set_ylabel(cond, rotation=90, labelpad=35, fontsize=12)
    row_idx += 1

plt.tight_layout()
plt.show()
