# Annotate KPMP data azimuth-annotate

:warning: doesn't work

In [1]:
!pip install cwltool cwlref-runner



In [2]:
from cwltool.main import main as cwl_main
import tempfile
import json

def cwlrunner(workflow, job = {}, output_dir = "."):
    """
    Runs a CWL (Common Workflow Language) workflow using Singularity.

    Parameters:
    workflow (str): The path or URL to the CWL workflow file.
    job (dict, optional): A dictionary representing the job inputs. Defaults to an empty dictionary.

    Example:
    >>> cwlrunner('path/to/workflow.cwl', {'input1': 'value1', 'input2': 'value2'})
    """
    with tempfile.NamedTemporaryFile(mode="w+t", dir=".", suffix=".json") as job_file:
        job_file.write(json.dumps(job))
        job_file.flush()
        cwl_main(['--singularity', '--log-dir', output_dir, '--outdir', output_dir, workflow, job_file.name])

In [3]:
def download_azimuth_models():
    cwlrunner("https://raw.githubusercontent.com/hubmapconsortium/hra-workflows/main/containers/azimuth/download-data.cwl")

def download_popv_models():
    cwlrunner("https://raw.githubusercontent.com/hubmapconsortium/hra-workflows/main/containers/popv/download-data.cwl")

def run_hra_workflows(job, output_dir):
    cwlrunner("https://raw.githubusercontent.com/hubmapconsortium/hra-workflows/main/pipeline.cwl", job, output_dir)

def run_azimuth_annotate(h5ad, output_dir):
    cwlrunner("https://raw.githubusercontent.com/hubmapconsortium/azimuth-annotate/refs/heads/main/steps/azimuth-annotate.cwl", {
        "matrix": {
            "class": "File", "location": h5ad
        },
        "reference": "LK",
        "secondary_analysis_matrix": {
            "class": "File", "location": h5ad
        }
    }, output_dir)
    cwlrunner("https://raw.githubusercontent.com/hubmapconsortium/azimuth-annotate/refs/heads/main/steps/write-metadata.cwl", {
        "orig_secondary_analysis_matrix": {
            "class": "File", "location": h5ad
        },
        "secondary_analysis_matrix": {
            "class": "File", "location": output_dir + "/secondary_analysis.h5ad"
        },
        "version_metadata": {
            "class": "File", "location": output_dir + "/version_metadata.json"
        },
        "annotations_csv": {
            "class": "File", "location": output_dir + "/annotations.csv"
        }
    }, output_dir)


def run_azimuth_kidney(h5ad, output_dir):
    run_hra_workflows({
        "matrix": {
            "class": "File", "location": h5ad
        },
        "organ": "UBERON:0002113",
        "algorithms": [
            {
                "azimuth": {
                    "referenceDataDir": {
                        "class": "Directory",
                        "path": "models/azimuth"
                    },
                    "queryLayersKey": "raw"
                },
                "crosswalk": {
                    "table": {
                        "class": "File",
                        "path": "/home/bherr/workspaces/hubmap/hra-workflows-runner/crosswalking-tables/azimuth.csv"
                    },
                    "tableLabelColumn": "Annotation_Label",
                    "tableClidColumn": "CL_ID",
                    "tableMatchColumn": "CL_Match"
                },
                "summarize": {
                    "annotationMethod": "azimuth",
                    "cellSource": f"urn:h5ad:{h5ad}"
                },
                "directory": "azimuth"
            }
        ]
    }, output_dir)

def run_celltypist_kidney(h5ad, output_dir):
    run_hra_workflows({
        "matrix": {
            "class": "File", "location": h5ad
        },
        "organ": "UBERON:0002113",
        "algorithms": [
            {
                "celltypist": {
                    "queryLayersKey": "raw"
                },
                "crosswalk": {
                    "table": {
                    "class": "File",
                        "path": "/home/bherr/workspaces/hubmap/hra-workflows-runner/crosswalking-tables/celltypist.csv"
                    },
                    "tableLabelColumn": "Annotation_Label",
                    "tableClidColumn": "CL_ID",
                    "tableMatchColumn": "CL_Match"
                },
                "summarize": {
                    "annotationMethod": "celltypist",
                    "cellSource": f"urn:h5ad:{h5ad}"
                },
                "directory": "celltypist"
            }
        ]
    }, output_dir)

def run_popv_kidney(h5ad, output_dir):
    run_hra_workflows({
        "matrix": {
            "class": "File", "location": h5ad
        },
        "organ": "UBERON:0002113",
        "algorithms": [
            {
                "popv": {
                    "queryLayersKey": "raw",
                    "referenceDataDir": {
                        "class": "Directory",
                        "path": "models/popv/reference-data"
                    },
                    "modelsDir": {
                        "class": "Directory",
                        "path": "models/popv/models"
                    },
                },
                "crosswalk": {
                    "table": {
                    "class": "File",
                    "path": "/home/bherr/workspaces/hubmap/hra-workflows-runner/crosswalking-tables/popv.csv"
                    },
                    "tableLabelColumn": "Annotation_Label",
                    "tableClidColumn": "CL_ID",
                    "tableMatchColumn": "CL_Match"
                },
                "summarize": {
                    "annotationMethod": "popv",
                    "cellSource": f"urn:h5ad:{h5ad}"
                },
                "directory": "popv"
            }
        ]
    }, output_dir)

## Run Azimuth

In [None]:
download_azimuth_models()

In [8]:
!mkdir -p kpmp-sc-rnaseq

In [4]:
run_azimuth_annotate('kpmp-sc-rnaseq.h5ad', 'kpmp-sc-rnaseq')

[1;30mINFO[0m /home/bherr/workspaces/hubmap/hra-hubmap-kpmp-integration/.venv/lib/python3.10/site-packages/ipykernel_launcher.py 3.1.20240909164951
[1;30mINFO[0m ['singularity', 'pull', '--force', '--name', 'hubmap_azimuth-annotate:1.5.sif', 'docker://hubmap/azimuth-annotate:1.5']
INFO:    Using cached SIF image
[1;30mINFO[0m [job azimuth-annotate.cwl] /tmp/ju6gact6$ singularity \
    --quiet \
    exec \
    --contain \
    --ipc \
    --cleanenv \
    --userns \
    --home \
    /tmp/ju6gact6:/sDuXXH \
    --mount=type=bind,source=/tmp/ybyfbus_,target=/tmp \
    --mount=type=bind,source=/home/bherr/workspaces/hubmap/hra-hubmap-kpmp-integration/notebooks/kpmp-sc-rnaseq.h5ad,target=/var/lib/cwl/stg1d0b9513-87f5-41d9-862b-4871781aac59/kpmp-sc-rnaseq.h5ad,readonly \
    --pwd \
    /sDuXXH \
    /home/bherr/workspaces/hubmap/hra-hubmap-kpmp-integration/notebooks/hubmap_azimuth-annotate:1.5.sif \
    Rscript \
    /azimuth_analysis.R \
    LK \
    /var/lib/cwl/stg1d0b9513-87f5-41d9

{}

[1;30mINFO[0m /home/bherr/workspaces/hubmap/hra-hubmap-kpmp-integration/.venv/lib/python3.10/site-packages/ipykernel_launcher.py 3.1.20240909164951
[1;30mERROR[0m [31mInput object failed validation:
tmpybn4vm2l.json:1:320: [Errno 2] No such file or directory:
                        '/home/bherr/workspaces/hubmap/hra-hubmap-kpmp-integration/notebooks/kpmp-sc-rnaseq/annotations.csv'[0m
