In [None]:
import os
import subprocess
from pathlib import Path
import pandas as pd
import numpy as np
import anndata as ad

In [None]:
experiments = [
        "PU", "CAH", "CAB", "CAT", "NAC", "STH", "MGM", "GP"
]

### add_metadata

In [None]:
ret = subprocess.run("ll", shell=True, capture_output=True)
ret.stdout.decode()

In [None]:
template = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/slurm_scripts/add_metadata.sh"
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute")
input_notebook = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/00_add_metadata.ipynb"
SUFFIX = "CPSfilt" # "cellpose_SAM_filt" # "proseg_fv38_filt"

with open(template, "r") as f: 
    bash_content = f.read()

add_metadata_ret = {}
for exp in experiments:
    execute_out_dir = run_dir / exp / "notebooks"
    script_out_dir = run_dir / exp / "slurm_scripts"
    execute_out_dir.mkdir(exist_ok=True, parents=True)
    script_out_dir.mkdir(exist_ok=True, parents=True)
    out_script = script_out_dir / f"add_metadata_cp.sh"

    with open(out_script, "w") as f:
        content = bash_content.format(EXP=exp, SUFFIX=SUFFIX)
        f.write(content)
    ret = subprocess.run(f"sbatch {out_script}", shell=True, capture_output=True)
    add_metadata_ret[exp] = ret.stdout.decode()
    print(f"Submitted job for {exp}")

### Subclass scVIVA

In [None]:
# subclass_scviva_ret['PU'].strip().split(" ")[-1]

In [None]:
template = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/slurm_scripts/subclass_scviva.sh"
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute")
input_notebook = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/00_subclass_scviva.ipynb"

with open(template, "r") as f: 
    bash_content = f.read()
# print(bash_content)

subclass_scviva_ret = {}
for exp in experiments: 
    execute_out_dir = run_dir / exp / "notebooks"
    script_out_dir = run_dir / exp / "slurm_scripts"
    execute_out_dir.mkdir(exist_ok=True, parents=True)
    script_out_dir.mkdir(exist_ok=True, parents=True)
    out_script = script_out_dir / f"subclass_scviva.sh"

    with open(out_script, "w") as f: 
        content = bash_content.format(EXP=exp)
        f.write(content)
    ret = subprocess.run(f"sbatch {out_script}", shell=True, capture_output=True)
    subclass_scviva_ret[exp] = ret.stdout.decode().strip().split(" ")[-1]
    print(ret.stdout.decode().strip())
    print(f"Submitted job for {exp}")

### Subclass Integration (wubin) 

In [None]:
EXP_TO_REF_PATH = {
    "PU": "PU",
    "CAH": "CA",
    "CAB": "CA",
    "CAT": "CA",
    "GP": "GP",
    "STH": "STH",
    "NAC": "NAC",
    "MGM": "MGM",
}

In [None]:
template = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/slurm_scripts/subclass_integration.sh"
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute")

with open(template, "r") as f: 
    bash_content = f.read()
# print(bash_content)

for exp in experiments: 
    ref_exp = EXP_TO_REF_PATH[exp]
    for _execute_dir in ["notebooks/base_r1_subclass", "notebooks/base_r2_subclass", "notebooks/scviva_r1_subclass", "notebooks/scviva_r2_subclass"]:
        execute_out_dir = run_dir / exp / _execute_dir
        execute_out_dir.mkdir(exist_ok=True, parents=True)
    script_out_dir = run_dir / exp / "slurm_scripts"
    script_out_dir.mkdir(exist_ok=True, parents=True)
    out_script = script_out_dir / f"subclass_integration.sh"

    with open(out_script, "w") as f: 
        content = bash_content.format(EXP=exp, REF_EXP=ref_exp)
        f.write(content)
    # os.system(f"sbatch {out_script}")
    print(f"Submitted job for {exp}")

## Neighborhood Annot (SPIDA)

In [None]:
EXP_TO_REF_PATH = {
    "PU": "PU",
    "CAH": "CA",
    "CAB": "CA",
    "CAT": "CA",
    "GP": "GP",
    "STH": "STH",
    "NAC": "NAC",
    "MGM": "MGM1",
}

In [None]:
template = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/slurm_scripts/neihborhood_annot.sh"
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute")
input_notebook = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/01_neighborhood_annot.ipynb"

with open(template, "r") as f: 
    bash_content = f.read()
# print(bash_content)

nb_annot_ret = {}
for exp in experiments: 
    execute_out_dir = run_dir / exp / "notebooks"
    script_out_dir = run_dir / exp / "slurm_scripts"
    execute_out_dir.mkdir(exist_ok=True, parents=True)
    script_out_dir.mkdir(exist_ok=True, parents=True)
    out_script = script_out_dir / f"neighborhood_annot.sh"

    with open(out_script, "w") as f: 
        content = bash_content.format(EXP=exp, REF_EXP=EXP_TO_REF_PATH[exp])
        f.write(content)
    ret = subprocess.run(f"sbatch {out_script}", shell=True, capture_output=True)
    nb_annot_ret[exp] = ret.stdout.decode().strip().split(" ")[-1]
    print(ret.stdout.decode().strip())
    print(f"Submitted job for {exp}")

## Subclass Integration (SPIDA)

In [None]:
# experiments = ["PU"]

In [None]:
EXP_TO_REF_PATH = {
    "PU": "PU",
    "CAH": "CA",
    "CAB": "CA",
    "CAT": "CA",
    "GP": "GP",
    "STH": "STH",
    "NAC": "NAC",
    "MGM": "MGM1",
}

In [None]:
template = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/slurm_scripts/subclass_annot.sh"
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute")
input_notebook = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/01_subclass_annot.ipynb"

with open(template, "r") as f: 
    bash_content = f.read()
# print(bash_content)

for exp in experiments: 
    execute_out_dir = run_dir / exp / "notebooks"
    script_out_dir = run_dir / exp / "slurm_scripts"
    execute_out_dir.mkdir(exist_ok=True, parents=True)
    script_out_dir.mkdir(exist_ok=True, parents=True)
    out_script = script_out_dir / f"subclass_annot.sh"

    with open(out_script, "w") as f: 
        content = bash_content.format(EXP=exp, REF_EXP=EXP_TO_REF_PATH[exp])
        f.write(content)
    os.system(f"sbatch {out_script}")
    print(f"Submitted job for {exp}")

## Group Annotations (SPIDA)

In [None]:
EXP_TO_REF_PATH = {
    "PU": "PU",
    "CAH": "CA",
    "CAB": "CA",
    "CAT": "CA",
    "GP": "GP",
    "STH": "STH",
    "NAC": "NAC",
    "MGM": "MGM",
}

In [None]:
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute")
bash_template ="/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/slurm_scripts/bash_template.sh"

In [None]:
with open(bash_template, "r") as f: 
    bash_content = f.read()

In [None]:
print(bash_content)

In [None]:
# unknown_template = """
# pixi run \\
#     papermill \\
#     /home/x-aklein2/projects/aklein/BICAN/spida_dev/analysis/annotations/template/cluster_unknown.ipynb \\
#     {run_dir}/{EXP}/cluster_unknown.ipynb \\
#     -p EXPERIMENT {EXP}
# """

In [None]:
subclass_template = """
pixi run \\
    papermill \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/02_group_annot.ipynb \\
    {run_dir}/{EXP}/notebooks/group/group_annot_{subclass_filename}.ipynb \\
    -p EXP {EXP} \\
    -p REF_EXP {REF_EXP} \\
    -p rna_subset_value "{SUBCLASS}"
"""

In [None]:
experiments = [
    "PU", "CAH", "CAB", "CAT", "NAC", "STH", "MGM", "GP"
]

In [None]:
experiments = ["ALL_RNA"]

In [None]:
for exp in experiments:
    ref_exp = EXP_TO_REF_PATH[exp]
    read_path = f"/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_{exp}/{exp}.h5ad"
    adata = ad.read_h5ad(read_path)

    run = bash_content.format(EXP=exp)
    # run += unknown_template.format(EXP=exp, run_dir=run_dir)

    subclasses = adata.obs['c2c_allcools_label_Subclass'].unique().tolist()
    for subclass in subclasses:
        if subclass == "unknown":
            continue
        subclass_filename = subclass.replace(" ", "_").replace("/", "-")
        subclass_formatted = subclass_template.format(
            run_dir=run_dir,
            EXP=exp,
            REF_EXP=ref_exp,
            SUBCLASS=subclass,
            subclass_filename=subclass_filename
        )
        run += subclass_formatted

    mk_path = run_dir / exp / "notebooks/group"
    mk_path.mkdir(parents=True, exist_ok=True)
    write_path = run_dir / exp / "slurm_scripts/group_annot.sh"
    write_path.parent.mkdir(parents=True, exist_ok=True)
    with open(write_path, "w") as f:
        f.write(run)

In [None]:
print(run)

## Group Annotations SPIDA (ALL) 

In [None]:
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute")
bash_template ="/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/slurm_scripts/bash_template.sh"
with open(bash_template, "r") as f: 
    bash_content = f.read()

In [None]:
subclass_template = """
pixi run \\
    papermill \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/02_group_annot.ipynb \\
    {run_dir}/{EXP}/notebooks/group/group_annot_{subclass_filename}.ipynb \\
    -p EXP {EXP} \\
    -p rna_subset_col "Subclass" \\
    -p rna_subset_value "{SUBCLASS}" \\
    -p qry_subset_col "Combined.Subclass" \\
    -p qry_subset_value "{SUBCLASS}" \\
    -p ref_adata_path "{REF_ADATA_PATH}" \\
    -p spatial_adata_path "{SPATIAL_ADATA_PATH}" \\
    -p save_path "/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_{EXP}/{EXP}_{SUBSET_LEVEL}_{subclass_filename}.h5ad" \\
    -p integrator_path "/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_{EXP}/{EXP}_{SUBSET_LEVEL}_{subclass_filename}_joint.h5ad" \\
    -p top_deg_genes 50 \\
    -k preprocessing
"""

In [None]:
rna_ref_path = "/home/x-aklein2/projects/aklein/BICAN/data/reference/AIT/all_downsampled_50pct_nonneurons.h5ad"
exp = "ALL_RNA"
ref = "/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_ALL/BG_pfv8_nn.h5ad"
subset_level = "Subclass"
spatial_subset_col = "Combined.Subclass"
# subset_name = "Neurons"
# read_path = f"/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_{exp}/{exp}.h5ad"
adata = ad.read_h5ad(ref)
run = bash_content.format(EXP=exp)

subclasses = adata.obs['Combined.Subclass'].unique().tolist()
for subclass in subclasses:
    if subclass == "unknown":
        continue
    subclass_filename = subclass.replace(" ", "_").replace("/", "-")
    subclass_formatted = subclass_template.format(
        run_dir=run_dir,
        EXP=exp,
        SUBCLASS=subclass,
        SUBSET_LEVEL=subset_level,
        REF_ADATA_PATH=rna_ref_path,
        SPATIAL_ADATA_PATH=ref,
        subclass_filename=subclass_filename,
        
    )
    run += subclass_formatted

mk_path = run_dir / exp / "notebooks/group"
mk_path.mkdir(parents=True, exist_ok=True)
write_path = run_dir / exp / "slurm_scripts/group_annot_nn.sh"
write_path.parent.mkdir(parents=True, exist_ok=True)
with open(write_path, "w") as f:
    f.write(run)

## mC Group Annotations (ALL)

In [None]:
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute")
bash_template ="/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/slurm_scripts/bash_template.sh"
with open(bash_template, "r") as f: 
    bash_content = f.read()

In [None]:
# -r mc_query " CellClass == 'Nonneuron' " \\
subclass_template = """
pixi run \\
    papermill \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/02_group_methyl_annot.ipynb \\
    {run_dir}/{EXP}/notebooks/group/group_annot_{subclass_filename}.ipynb \\
    -p EXP {EXP} \\
    -p mc_subset_col "Subclass" \\
    -p mc_subset_value "{SUBCLASS}" \\
    -p spatial_subset_col "Combined.Subclass" \\
    -p spatial_subset_value "{SUBCLASS}" \\
    -p mc_adata_path "{REF_ADATA_PATH}" \\
    -p spatial_adata_path "{SPATIAL_ADATA_PATH}" \\
    -p integrator_path "/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_{EXP}/{EXP}_{SUBSET_LEVEL}_{subclass_filename}_joint.h5ad" \\
    -p cell_mapping_out_path /home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_{EXP}/{EXP}_{SUBSET_LEVEL}_{subclass_filename}_cell_mapping.tsv \\
    -p topn 50 \\
    -k preprocessing
"""

In [None]:
rna_ref_path = "/home/x-aklein2/projects/aklein/BICAN/data/reference/mC/BG_BICAN.gene-CHN.h5ad"
exp = "ALL_MC"
ref = "/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_ALL/BG_pfv8_neu.h5ad"
subset_level = "Subclass"
spatial_subset_col = "Combined.Subclass"
# subset_name = "Neurons"
# read_path = f"/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_{exp}/{exp}.h5ad"
adata = ad.read_h5ad(ref)
run = bash_content.format(EXP=exp)

subclasses = adata.obs['Combined.Subclass'].unique().tolist()
for subclass in subclasses:
    if subclass == "unknown":
        continue
    subclass_filename = subclass.replace(" ", "_").replace("/", "-")
    subclass_formatted = subclass_template.format(
        run_dir=run_dir,
        EXP=exp,
        SUBCLASS=subclass,
        SUBSET_LEVEL=subset_level,
        REF_ADATA_PATH=rna_ref_path,
        SPATIAL_ADATA_PATH=ref,
        SPATIAL_SUBCLASS=spatial_subset_col,
        subclass_filename=subclass_filename,
        
    )
    run += subclass_formatted

mk_path = run_dir / exp / "notebooks/group"
mk_path.mkdir(parents=True, exist_ok=True)
write_path = run_dir / exp / "slurm_scripts/group_annot_neu.sh"
write_path.parent.mkdir(parents=True, exist_ok=True)
with open(write_path, "w") as f:
    f.write(run)

## Alignment - Ren Data

In [None]:
experiments = [
    "PU", "CAH", "CAB", "CAT", "NAC", "SUBTH", "MGM", "GP"
]

EXP_TO_REF_PATH = {
    "PU": "PU",
    "CAH": "CA",
    "CAB": "CA",
    "CAT": "CA",
    "GP": "GP",
    "SUBTH": "STH",
    "NAC": "NAC",
    "MGM": "MGM",
}

EXP_TO_REN_NAME = {
    "PU": "Pu",
    "CAH": "CaH",
    "CAB": "CaB",
    "CAT": "CaT",
    "GP": "GP",
    "SUBTH": "STH",
    "NAC": "NAC",
    "MGM": "MGM1",
}

In [None]:
bash_content = """#!/bin/bash
# FILENAME = ren_{EXP}_subclass_integration.sh 

#SBATCH -A mcb130189
#SBATCH -J REN_{EXP}_subclass_integration
#SBATCH -p wholenode
#SBATCH --time=1:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --mem=128gb
#SBATCH -o /home/x-aklein2/projects/aklein/BICAN/BG/logs/annot/alignment/ren_{EXP}_subclass_integration.out
#SBATCH -e /home/x-aklein2/projects/aklein/BICAN/BG/logs/annot/alignment/ren_{EXP}_subclass_integration.out
#SBATCH --export=ALL

export PATH="/home/x-aklein2/.pixi/bin:$PATH"
cd /anvil/projects/x-mcb130189/aklein/SPIDA

pixi run \\
    papermill \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/integration/seurat_integration_v2.ipynb \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/alignment/Subclass_Ren_{REN_EXP}/seurat_integration.ipynb \\
    -p EXP {EXP} \\
    -p REF_EXP {REF_EXP} \\
    -p ref_adata_path /home/x-aklein2/projects/aklein/BICAN/data/reference/REN/BICAN_BGC_RNA_fliterV2_20250806.h5ad \\
    -p ref_query ' region == "{REN_EXP}" ' \\
    -p ref_cell_type Subclass_name \\
    -p ref_downsample 3000 \\
    -p spatial_adata_path /home/x-aklein2/projects/aklein/BICAN/BG/data/BICAN_BG_PFV8_annotated_v4.h5ad \\
    -p spatial_query ' brain_region == "{EXP}" ' \\
    -p spatial_cell_type Subclass \\
    -p spatial_downsample 3000 \\
    -p topn 50 \\
    -p outdir /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/alignment/Subclass_Ren_{REN_EXP}/ \\
    

echo "Done"
"""

In [None]:
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/alignment")

for exp in experiments: 
    ref_exp = EXP_TO_REF_PATH[exp]
    ren_exp = EXP_TO_REN_NAME[exp]
    execute_out_dir = run_dir / f"Subclass_Ren_{ren_exp}"
    execute_out_dir.mkdir(exist_ok=True, parents=True)
    out_script = execute_out_dir / "subclass_integration.sh"

    with open(out_script, "w") as f: 
        content = bash_content.format(EXP=exp, REF_EXP=ref_exp, REN_EXP=ren_exp)
        f.write(content)
    os.system(f"sbatch {out_script}")
    print(f"Submitted job for {exp}")

# Region Donor

In [None]:
experiments = [
    "PU", "CAH", "CAB", "CAT", "NAC", "STH", "MGM", "GP"
]
donors = [
    "UCI2424", "UCI4723", "UCI5224", "UWA7648"
]
exclude = [("CAT", "UWA7648")]

In [None]:
EXP_TO_REF_PATH = {
    "PU": "PU",
    "CAH": "CA",
    "CAB": "CA",
    "CAT": "CA",
    "GP": "GP",
    "STH": "STH",
    "NAC": "NAC",
    "MGM": "MGM1",
}

In [None]:
# template = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/slurm_scripts/region_donor_annot.sh"
# run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor")
# input_notebook = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/01_region_donor_annot.ipynb"

# with open(template, "r") as f: 
#     bash_content = f.read()
# print(bash_content)

In [None]:
# experiments = ['GP']

In [None]:
# template = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/slurm_scripts/region_donor_annot.sh"
# run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor")
template = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/slurm_scripts/region_donor_annot_cpsam.sh"
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_cpsam")
input_notebook = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/01_region_donor_annot.ipynb"

with open(template, "r") as f: 
    bash_content = f.read()
# print(bash_content)

nb_annot_ret = {}
for exp in experiments: 
    for donor in donors:
        if (exp, donor) in exclude:
            print(f"Skipping {exp} {donor}")
            continue
        out_dir = run_dir / f"{exp}_{donor}"
        out_dir.mkdir(exist_ok=True, parents=True)
        out_script = out_dir / "region_donor_annot.sh"
        out_nb = out_dir / "region_donor_annot.ipynb"

        with open(out_script, "w") as f: 
            content = bash_content.format(EXP=exp, REF_EXP=EXP_TO_REF_PATH[exp], DONOR=donor)
            f.write(content)
        ret = subprocess.run(f"sbatch {out_script}", shell=True, capture_output=True)
        nb_annot_ret[f"{exp}_{donor}"] = ret.stdout.decode().strip().split(" ")[-1]
        print(ret.stdout.decode().strip())
        print(f"Submitted job for {exp} {donor}")
    # break
        

In [None]:
for k, v in nb_annot_ret.items():
    subprocess.run(f"scancel {v}", shell=True, capture_output=True)

# Region Donor Group

In [None]:
experiments = [
    "PU", "CAH", "CAB", "CAT", "NAC", "STH", "MGM", "GP"
]
donors = [
    "UCI2424", "UCI4723", "UCI5224", "UWA7648"
]
exclude = [("CAT", "UWA7648")]
EXP_TO_REF_PATH = {
    "PU": "PU",
    "CAH": "CA",
    "CAB": "CA",
    "CAT": "CA",
    "GP": "GP",
    "STH": "STH",
    "NAC": "NAC",
    "MGM": "MGM1",
}

In [None]:
bash_header = """#!/bin/bash
# FILENAME = group_annot.sh 

#SBATCH -A mcb130189
#SBATCH -J {EXP}_{DONOR}_group_annot
#SBATCH -p wholenode
#SBATCH --time=2:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --mem=64gb
#SBATCH -o /home/x-aklein2/projects/aklein/BICAN/BG/logs/annot/region_donor/{EXP}_{DONOR}_group.out
#SBATCH -e /home/x-aklein2/projects/aklein/BICAN/BG/logs/annot/region_donor/{EXP}_{DONOR}_group.out
#SBATCH --export=ALL

export PATH="/home/x-aklein2/.pixi/bin:$PATH"
cd /anvil/projects/x-mcb130189/aklein/SPIDA
"""

In [None]:
template = """
pixi run \\
    papermill \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/02_region_donor_group_annot.ipynb \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor/{EXP}_{DONOR}/groups/{subclass_filename}/integrate.ipynb \\
    -p EXP {EXP} \\
    -p REF_EXP {REF_EXP} \\
    -p rna_subset_col "Subclass" \\
    -p rna_subset_value "{SUBCLASS}" \\
    -p qry_subset_col "Subclass" \\
    -p qry_subset_value "{SUBCLASS}" \\
    -p ref_adata_path /home/x-aklein2/projects/aklein/BICAN/data/reference/AIT/AIT_{REF_EXP}_filtered.h5ad \\
    -p spatial_adata_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor/{EXP}_{DONOR}/final.h5ad \\
    -p save_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor/{EXP}_{DONOR}/groups/{subclass_filename}/final.h5ad \\
    -p integrator_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor/{EXP}_{DONOR}/groups/{subclass_filename}/joint.h5ad \\
    -p image_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor/{EXP}_{DONOR}/groups/{subclass_filename}/figures \\
    -p label_transfer_k 30 \\
    -p top_deg_genes 50 \\
    -k preprocessing
"""

In [None]:
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor")

nb_annot_ret = {}
for exp in experiments: 
    for donor in donors:
        if (exp, donor) in exclude:
            print(f"Skipping {exp} {donor}")
            continue
        ad_file = run_dir / f"{exp}_{donor}/final.h5ad"
        adata = ad.read_h5ad(ad_file)
        subclasses = adata.obs['Subclass'].unique().tolist()
        run = bash_header.format(EXP=exp, DONOR=donor)
        for subclass in subclasses:
            if subclass == "unknown" or pd.isna(subclass):
                continue
            subclass_filename = subclass.replace(" ", "_").replace("/", "-")
            subclass_formatted = template.format(
                EXP=exp,
                DONOR=donor,
                REF_EXP=EXP_TO_REF_PATH[exp],
                SUBCLASS=subclass,
                subclass_filename=subclass_filename
            )
            run += subclass_formatted
            out_dir = run_dir / f"{exp}_{donor}/groups" / subclass_filename
            out_dir.mkdir(parents=True, exist_ok=True)
        out_script = run_dir / f"{exp}_{donor}/groups/group_annot.sh"
        with open(out_script, "w") as f:
            f.write(run)
        ret = subprocess.run(f"sbatch {out_script}", shell=True, capture_output=True)
        nb_annot_ret[f"{exp}_{donor}"] = ret.stdout.decode().strip().split(" ")[-1]
        print(ret.stdout.decode().strip())
        print(f"Submitted job for {exp} {donor}")
    # break
        

### CPSAM

In [None]:
bash_header = """#!/bin/bash
# FILENAME = group_annot.sh 

#SBATCH -A mcb130189
#SBATCH -J {EXP}_{DONOR}_cp_group_annot
#SBATCH -p wholenode
#SBATCH --time=2:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --mem=64gb
#SBATCH -o /home/x-aklein2/projects/aklein/BICAN/BG/logs/annot/region_donor_cpsam/{EXP}_{DONOR}_group.out
#SBATCH -e /home/x-aklein2/projects/aklein/BICAN/BG/logs/annot/region_donor_cpsam/{EXP}_{DONOR}_group.out
#SBATCH --export=ALL

export PATH="/home/x-aklein2/.pixi/bin:$PATH"
cd /anvil/projects/x-mcb130189/aklein/SPIDA
"""

In [None]:
template = """
pixi run \\
    papermill \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/02_region_donor_group_annot.ipynb \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_cpsam/{EXP}_{DONOR}/groups/{subclass_filename}/integrate.ipynb \\
    -p EXP {EXP} \\
    -p REF_EXP {REF_EXP} \\
    -p rna_subset_col "Subclass" \\
    -p rna_subset_value "{SUBCLASS}" \\
    -p qry_subset_col "Subclass" \\
    -p qry_subset_value "{SUBCLASS}" \\
    -p ref_adata_path /home/x-aklein2/projects/aklein/BICAN/data/reference/AIT/AIT_{REF_EXP}_filtered.h5ad \\
    -p spatial_adata_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_cpsam/{EXP}_{DONOR}/final.h5ad \\
    -p save_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_cpsam/{EXP}_{DONOR}/groups/{subclass_filename}/final.h5ad \\
    -p integrator_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_cpsam/{EXP}_{DONOR}/groups/{subclass_filename}/joint.h5ad \\
    -p image_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_cpsam/{EXP}_{DONOR}/groups/{subclass_filename}/figures \\
    -p label_transfer_k 30 \\
    -p top_deg_genes 50 \\
    -k preprocessing_bp
"""

In [None]:
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_cpsam")

nb_annot_ret = {}
for exp in experiments: 
    for donor in donors:
        if (exp, donor) in exclude:
            print(f"Skipping {exp} {donor}")
            continue
        ad_file = run_dir / f"{exp}_{donor}/final.h5ad"
        adata = ad.read_h5ad(ad_file)
        subclasses = adata.obs['Subclass'].unique().tolist()
        run = bash_header.format(EXP=exp, DONOR=donor)
        for subclass in subclasses:
            if subclass == "unknown" or pd.isna(subclass):
                continue
            subclass_filename = subclass.replace(" ", "_").replace("/", "-")
            subclass_formatted = template.format(
                EXP=exp,
                DONOR=donor,
                REF_EXP=EXP_TO_REF_PATH[exp],
                SUBCLASS=subclass,
                subclass_filename=subclass_filename
            )
            run += subclass_formatted
            out_dir = run_dir / f"{exp}_{donor}/groups" / subclass_filename
            out_dir.mkdir(parents=True, exist_ok=True)
        out_script = run_dir / f"{exp}_{donor}/groups/group_annot.sh"
        with open(out_script, "w") as f:
            f.write(run)
        ret = subprocess.run(f"sbatch {out_script}", shell=True, capture_output=True)
        nb_annot_ret[f"{exp}_{donor}"] = ret.stdout.decode().strip().split(" ")[-1]
        print(ret.stdout.decode().strip())
        print(f"Submitted job for {exp} {donor}")
    # break
        

### scancel

In [None]:
for k, v in nb_annot_ret.items():
    subprocess.run(f"scancel {v}", shell=True, capture_output=True)

# Region Donor Lab

In [None]:
experiments = [
    "PU", "CAH", "CAB", "CAT", "NAC", # "MGM",# "GP", # "STH"
]
# experiments = [
#     "STH"
# ]
experiments = [
    "GP"
]
donors = [
    "UCI2424", "UCI4723", "UCI5224", "UWA7648"
]
labs = ["salk", "ucsd"]
exclude = [("CAT", "UWA7648")]
EXP_TO_REF_PATH = {
    "PU": "PU",
    "CAH": "CA",
    "CAB": "CA",
    "CAT": "CA",
    "GP": "GP",
    "STH": "STH",
    "NAC": "NAC",
    "MGM": "MGM1",
}

## Proseg

In [None]:
template = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/slurm_scripts/region_donor_lab_annot_gp.sh"
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_gp")
input_notebook = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/01_region_donor_lab_annot.ipynb"

with open(template, "r") as f: 
    bash_content = f.read()
# print(bash_content)

nb_annot_ret = {}
for exp in experiments: 
    for donor in donors:
        if (exp, donor) in exclude:
            print(f"Skipping {exp} {donor}")
            continue
        for lab in labs:
            out_dir = run_dir / f"{exp}_{donor}_{lab}"
            out_dir.mkdir(exist_ok=True, parents=True)
            out_script = out_dir / "region_donor_lab_annot.sh"
            out_nb = out_dir / "region_donor_lab_annot.ipynb"

        with open(out_script, "w") as f:
            content = bash_content.format(EXP=exp, REF_EXP=EXP_TO_REF_PATH[exp], DONOR=donor, LAB1=labs[0], LAB2=labs[1])
            f.write(content)
        ret = subprocess.run(f"sbatch {out_script}", shell=True, capture_output=True)
        nb_annot_ret[f"{exp}_{donor}_{lab}"] = ret.stdout.decode().strip().split(" ")[-1]
        print(ret.stdout.decode().strip())
        print(f"Submitted job for {exp} {donor}")
        
    #     break
    # break

        

## CPSAM

In [None]:
template = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/slurm_scripts/region_donor_lab_cpsam_annot_gp.sh"
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cps2")
input_notebook = "/home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/01_region_donor_lab_cpsam_annot.ipynb"

with open(template, "r") as f: 
    bash_content = f.read()
# print(bash_content)

nb_annot_ret = {}
for exp in experiments: 
    for donor in donors:
        if (exp, donor) in exclude:
            print(f"Skipping {exp} {donor}")
            continue
        for lab in labs:
            out_dir = run_dir / f"{exp}_{donor}_{lab}"
            out_dir.mkdir(exist_ok=True, parents=True)
            out_script = out_dir / "region_donor_lab_annot.sh"
            out_nb = out_dir / "region_donor_lab_annot.ipynb"

        with open(out_script, "w") as f:
            content = bash_content.format(EXP=exp, REF_EXP=EXP_TO_REF_PATH[exp], DONOR=donor, LAB1=labs[0], LAB2=labs[1])
            f.write(content)
        ret = subprocess.run(f"sbatch {out_script}", shell=True, capture_output=True)
        nb_annot_ret[f"{exp}_{donor}_{lab}"] = ret.stdout.decode().strip().split(" ")[-1]
        print(ret.stdout.decode().strip())
        print(f"Submitted job for {exp} {donor}")
        
        

## scancel

In [None]:
for k, v in nb_annot_ret.items():
    subprocess.run(f"scancel {v}", shell=True, capture_output=True)

# Region Donor Lab Group

In [None]:
experiments = [
    "PU", "CAH", "CAB", "CAT", "NAC", # "MGM", # "GP", "STH"
]
# experiments = [
#     "PU", "NAC"
# ]
# experiments = [
#     "STH", "GP"
# ]
donors = [
    "UCI2424", "UCI4723", "UCI5224", "UWA7648"
]
labs = ["salk", "ucsd"]
exclude = [("CAT", "UWA7648")]
EXP_TO_REF_PATH = {
    "PU": "PU",
    "CAH": "CA",
    "CAB": "CA",
    "CAT": "CA",
    "GP": "GP",
    "STH": "STH",
    "NAC": "NAC",
    "MGM": "MGM1",
}

## Proseg

In [None]:
bash_header = """#!/bin/bash
# FILENAME = rdl_group_annot.sh 

#SBATCH -A mcb130189
#SBATCH -J {EXP}_{DONOR}_{LAB}_group_annot
#SBATCH -p shared
#SBATCH --time=2:30:00
#SBATCH --nodes=4
#SBATCH --ntasks=1
#SBATCH --mem=46gb
#SBATCH -o /home/x-aklein2/projects/aklein/BICAN/BG/logs/annot/region_donor_lab/{EXP}_{DONOR}_{LAB}_group.out
#SBATCH -e /home/x-aklein2/projects/aklein/BICAN/BG/logs/annot/region_donor_lab/{EXP}_{DONOR}_{LAB}_group.out
#SBATCH --export=ALL

export PATH="/home/x-aklein2/.pixi/bin:$PATH"
cd /anvil/projects/x-mcb130189/aklein/SPIDA
"""

In [None]:
template = """
pixi run \\
    papermill \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/02_region_donor_group_annot.ipynb \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_gp/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/integrate.ipynb \\
    -p EXP {EXP} \\
    -p REF_EXP {REF_EXP} \\
    -p rna_subset_col "Subclass" \\
    -p rna_subset_value "{SUBCLASS}" \\
    -p qry_subset_col "Subclass" \\
    -p qry_subset_value "{SUBCLASS}" \\
    -p ref_adata_path /home/x-aklein2/projects/aklein/BICAN/data/reference/AIT/AIT_{REF_EXP}_filtered.h5ad \\
    -p spatial_adata_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_gp/{EXP}_{DONOR}_{LAB}/final.h5ad \\
    -p save_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_gp/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/final.h5ad \\
    -p integrator_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_gp/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/joint.h5ad \\
    -p image_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_gp/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/figures \\
    -p label_transfer_k 30 \\
    -p top_deg_genes 150 \\
    -p deg_type cef \\
    -p joint_embedding_leiden_res 0.6 \\
    -k preprocessing_bp
"""

In [None]:
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_gp")

nb_annot_ret = {}
for exp in experiments: 
    for donor in donors:
        if (exp, donor) in exclude:
            print(f"Skipping {exp} {donor}")
            continue
        for lab in labs:
            ad_file = run_dir / f"{exp}_{donor}_{lab}/final.h5ad"
            adata = ad.read_h5ad(ad_file)
            subclasses = adata.obs['Subclass'].unique().tolist()
            run = bash_header.format(EXP=exp, DONOR=donor, LAB=lab)
            for subclass in subclasses:
                if subclass == "unknown" or pd.isna(subclass):
                    continue
                subclass_filename = subclass.replace(" ", "_").replace("/", "-")
                subclass_formatted = template.format(
                    EXP=exp,
                    DONOR=donor,
                    LAB=lab,
                    REF_EXP=EXP_TO_REF_PATH[exp],
                    SUBCLASS=subclass,
                    subclass_filename=subclass_filename
                )
                run += subclass_formatted
                out_dir = run_dir / f"{exp}_{donor}_{lab}/groups" / subclass_filename
                out_dir.mkdir(parents=True, exist_ok=True)
            out_script = run_dir / f"{exp}_{donor}_{lab}/groups/group_annot.sh"
            with open(out_script, "w") as f:
                f.write(run)
            ret = subprocess.run(f"sbatch {out_script}", shell=True, capture_output=True)
            nb_annot_ret[f"{exp}_{donor}_{lab}"] = ret.stdout.decode().strip().split(" ")[-1]
            print(ret.stdout.decode().strip())
            print(f"Submitted job for {exp} {donor} {lab}")
    #         break
    #     break
    # break
        

## CPSAM

In [None]:
bash_header = """#!/bin/bash
# FILENAME = rdl_cp_group_annot.sh 

#SBATCH -A mcb130189
#SBATCH -J cp_{EXP}_{DONOR}_{LAB}_group_annot
#SBATCH -p shared
#SBATCH --time=2:00:00
#SBATCH --nodes=4
#SBATCH --ntasks=1
#SBATCH --mem=46gb
#SBATCH -o /home/x-aklein2/projects/aklein/BICAN/BG/logs/annot/region_donor_lab_cpsam/{EXP}_{DONOR}_{LAB}_group.out
#SBATCH -e /home/x-aklein2/projects/aklein/BICAN/BG/logs/annot/region_donor_lab_cpsam/{EXP}_{DONOR}_{LAB}_group.out
#SBATCH --export=ALL

export PATH="/home/x-aklein2/.pixi/bin:$PATH"
cd /anvil/projects/x-mcb130189/aklein/SPIDA
"""

In [None]:
template = """
pixi run \\
    papermill \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/02_region_donor_group_annot.ipynb \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cpsam_gp/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/integrate.ipynb \\
    -p EXP {EXP} \\
    -p REF_EXP {REF_EXP} \\
    -p rna_subset_col "Subclass" \\
    -p rna_subset_value "{SUBCLASS}" \\
    -p qry_subset_col "Subclass" \\
    -p qry_subset_value "{SUBCLASS}" \\
    -p ref_adata_path /home/x-aklein2/projects/aklein/BICAN/data/reference/AIT/AIT_{REF_EXP}_filtered.h5ad \\
    -p spatial_adata_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cpsam_gp/{EXP}_{DONOR}_{LAB}/final.h5ad \\
    -p save_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cpsam_gp/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/final.h5ad \\
    -p integrator_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cpsam_gp/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/joint.h5ad \\
    -p image_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cpsam_gp/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/figures \\
    -p label_transfer_k 30 \\
    -p top_deg_genes 200 \\
    -p deg_type cef \\
    -p joint_embedding_leiden_res 0.65 \\
    -k preprocessing_bp
"""

In [None]:
# from_path = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_cpsam_lab")
# to_path = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cpsam")
# for _exp_donor_lab_dir in from_path.glob("*"):
#     print(_exp_donor_lab_dir)
#     for file in _exp_donor_lab_dir.glob("*"):
#         if file.is_dir():
#             subprocess.run(f"mv {file} {to_path}/{_exp_donor_lab_dir.name}/{file.name}", shell=True)
#         else:
#             subprocess.run(f"mv {file} {to_path}/{_exp_donor_lab_dir.name}/{file.name}", shell=True)
#     # break

In [None]:
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cpsam_gp")

nb_annot_ret = {}
for exp in experiments: 
    for donor in donors:
        if (exp, donor) in exclude:
            print(f"Skipping {exp} {donor}")
            continue
        for lab in labs:
            ad_file = run_dir / f"{exp}_{donor}_{lab}/final.h5ad"
            adata = ad.read_h5ad(ad_file)
            subclasses = adata.obs['Subclass'].unique().tolist()
            run = bash_header.format(EXP=exp, DONOR=donor, LAB=lab)
            for subclass in subclasses:
                if subclass == "unknown" or pd.isna(subclass):
                    continue
                subclass_filename = subclass.replace(" ", "_").replace("/", "-")
                subclass_formatted = template.format(
                    EXP=exp,
                    DONOR=donor,
                    LAB=lab,
                    REF_EXP=EXP_TO_REF_PATH[exp],
                    SUBCLASS=subclass,
                    subclass_filename=subclass_filename
                )
                run += subclass_formatted
                out_dir = run_dir / f"{exp}_{donor}_{lab}/groups" / subclass_filename
                out_dir.mkdir(parents=True, exist_ok=True)
            out_script = run_dir / f"{exp}_{donor}_{lab}/groups/group_annot.sh"
            with open(out_script, "w") as f:
                f.write(run)
            ret = subprocess.run(f"sbatch {out_script}", shell=True, capture_output=True)
            nb_annot_ret[f"{exp}_{donor}_{lab}"] = ret.stdout.decode().strip().split(" ")[-1]
            print(ret.stdout.decode().strip())
            print(f"Submitted job for {exp} {donor} {lab}")
    #         break
    #     break
    # break
        

## CPS

In [None]:
bash_header = """#!/bin/bash
# FILENAME = rdl_cp_group_annot.sh 

#SBATCH -A mcb130189
#SBATCH -J cps_{EXP}_{DONOR}_{LAB}_group_annot
#SBATCH -p shared
#SBATCH --time=3:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --mem=46gb
#SBATCH -o /home/x-aklein2/projects/aklein/BICAN/BG/logs/annot/region_donor_lab_cps/{EXP}_{DONOR}_{LAB}_group.out
#SBATCH -e /home/x-aklein2/projects/aklein/BICAN/BG/logs/annot/region_donor_lab_cps/{EXP}_{DONOR}_{LAB}_group.out
#SBATCH --export=ALL

export PATH="/home/x-aklein2/.pixi/bin:$PATH"
cd /anvil/projects/x-mcb130189/aklein/SPIDA
"""

In [None]:
template = """
pixi run \\
    papermill \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/02_region_donor_group_annot.ipynb \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cps2/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/integrate.ipynb \\
    -p EXP {EXP} \\
    -p REF_EXP {REF_EXP} \\
    -p rna_subset_col "Subclass" \\
    -p rna_subset_value "{SUBCLASS}" \\
    -p qry_subset_col "Subclass" \\
    -p qry_subset_value "{SUBCLASS}" \\
    -p ref_adata_path /home/x-aklein2/projects/aklein/BICAN/data/reference/AIT/AIT_{REF_EXP}_filtered.h5ad \\
    -p spatial_adata_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cps2/{EXP}_{DONOR}_{LAB}/final.h5ad \\
    -p save_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cps2/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/final.h5ad \\
    -p integrator_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cps2/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/joint.h5ad \\
    -p image_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cps2/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/figures \\
    -p label_transfer_k 30 \\
    -p top_deg_genes 200 \\
    -p deg_type cef \\
    -p joint_embedding_leiden_res 0.65 \\
    -p run_harmony_joint_embeddings True \\
    -k preprocessing
"""

In [None]:
run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cps2")

nb_annot_ret = {}
for exp in experiments: 
    for donor in donors:
        if (exp, donor) in exclude:
            print(f"Skipping {exp} {donor}")
            continue
        for lab in labs:
            ad_file = run_dir / f"{exp}_{donor}_{lab}/final.h5ad"
            adata = ad.read_h5ad(ad_file)
            subclasses = adata.obs['Subclass'].unique().tolist()
            run = bash_header.format(EXP=exp, DONOR=donor, LAB=lab)
            for subclass in subclasses:
                if subclass == "unknown" or pd.isna(subclass):
                    continue
                subclass_filename = subclass.replace(" ", "_").replace("/", "-")
                subclass_formatted = template.format(
                    EXP=exp,
                    DONOR=donor,
                    LAB=lab,
                    REF_EXP=EXP_TO_REF_PATH[exp],
                    SUBCLASS=subclass,
                    subclass_filename=subclass_filename
                )
                run += subclass_formatted
                out_dir = run_dir / f"{exp}_{donor}_{lab}/groups" / subclass_filename
                out_dir.mkdir(parents=True, exist_ok=True)
            out_script = run_dir / f"{exp}_{donor}_{lab}/groups/group_annot.sh"
            with open(out_script, "w") as f:
                f.write(run)
            ret = subprocess.run(f"sbatch {out_script}", shell=True, capture_output=True)
            nb_annot_ret[f"{exp}_{donor}_{lab}"] = ret.stdout.decode().strip().split(" ")[-1]
            print(ret.stdout.decode().strip())
            print(f"Submitted job for {exp} {donor} {lab}")
    #         break
    #     break
    # break
        

In [None]:
## Doing some V2 stuff for NAC and PU
template = """
pixi run \\
    papermill \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/template/02_region_donor_group_annot.ipynb \\
    /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cps/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/integrate.ipynb \\
    -p EXP {EXP} \\
    -p REF_EXP {REF_EXP} \\
    -p rna_subset_col "Subclass" \\
    -p rna_subset_value "{SUBCLASS}" \\
    -p qry_subset_col "Subclass" \\
    -p qry_subset_value "{SUBCLASS}" \\
    -p ref_adata_path /home/x-aklein2/projects/aklein/BICAN/data/reference/AIT/AIT_{REF_EXP}_filtered_2.h5ad \\
    -p spatial_adata_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cps/{EXP}_{DONOR}_{LAB}/final.h5ad \\
    -p save_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cps/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/final.h5ad \\
    -p integrator_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cps/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/joint.h5ad \\
    -p image_path /home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cps/{EXP}_{DONOR}_{LAB}/groups/{subclass_filename}/figures \\
    -p label_transfer_k 30 \\
    -p top_deg_genes 200 \\
    -p deg_type cef \\
    -p joint_embedding_leiden_res 0.65 \\
    -k preprocessing_bp
"""

run_dir = Path("/home/x-aklein2/projects/aklein/BICAN/BG/annotation/execute/region_donor_lab_cps")

nb_annot_ret = {}
for exp in experiments: 
    for donor in donors:
        if (exp, donor) in exclude:
            print(f"Skipping {exp} {donor}")
            continue
        for lab in labs:
            # ad_file = run_dir / f"{exp}_{donor}_{lab}/final.h5ad"
            # adata = ad.read_h5ad(ad_file)
            subclasses = ["STR D1 MSN", "STR D2 MSN"] # adata.obs['Subclass'].unique().tolist()
            run = bash_header.format(EXP=exp, DONOR=donor, LAB=lab)
            for subclass in subclasses:
                if subclass == "unknown" or pd.isna(subclass):
                    continue
                subclass_filename = subclass.replace(" ", "_").replace("/", "-")
                subclass_formatted = template.format(
                    EXP=exp,
                    DONOR=donor,
                    LAB=lab,
                    REF_EXP=EXP_TO_REF_PATH[exp],
                    SUBCLASS=subclass,
                    subclass_filename=subclass_filename
                )
                run += subclass_formatted
                out_dir = run_dir / f"{exp}_{donor}_{lab}/groups" / subclass_filename
                out_dir.mkdir(parents=True, exist_ok=True)
            out_script = run_dir / f"{exp}_{donor}_{lab}/groups/group_annot_v2.sh"
            with open(out_script, "w") as f:
                f.write(run)
            ret = subprocess.run(f"sbatch {out_script}", shell=True, capture_output=True)
            nb_annot_ret[f"{exp}_{donor}_{lab}"] = ret.stdout.decode().strip().split(" ")[-1]
            print(ret.stdout.decode().strip())
            print(f"Submitted job for {exp} {donor} {lab}")
    #         break
    #     break
    # break
        

## scancel 

In [None]:
for k, v in nb_annot_ret.items():
    subprocess.run(f"scancel {v}", shell=True, capture_output=True)

# Notes

Jiang Notes: 

mitochondrial genes / ribosomal genes --> not well annotated genes that behave differently.
Isoform specific genes. They look for the highest expressing isoforms of genes.
