From ce9c630221e7dbc189f612f57abc3648213907ef Mon Sep 17 00:00:00 2001 From: Devon Bush Date: Tue, 19 Apr 2022 15:11:42 -0400 Subject: [PATCH 01/12] Revert "setting value for __event_name" This reverts commit 4c8baf1a8a23d6187363113c58dc5c81e3b01a12. --- ingest/ingest_pipeline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ingest/ingest_pipeline.py b/ingest/ingest_pipeline.py index ba4c5789..427f7ef2 100644 --- a/ingest/ingest_pipeline.py +++ b/ingest/ingest_pipeline.py @@ -600,7 +600,6 @@ def main() -> None: print(f'{key}: {metrics_dump[key]}') except Exception as e: - config.set_parent_event_name("ingest-pipeline:unhandled-exception:ingest") log_exception(IngestPipeline.dev_logger, IngestPipeline.user_logger, e) status = 1 From 9d2a8f32a2fe0cd0a0e49912074538bdd0c486fd Mon Sep 17 00:00:00 2001 From: Devon Bush Date: Tue, 19 Apr 2022 15:11:55 -0400 Subject: [PATCH 02/12] Revert "improved variable scoping" This reverts commit 188218d3f5ce78de332a798e19d9050ee44b3707. --- ingest/ingest_pipeline.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/ingest/ingest_pipeline.py b/ingest/ingest_pipeline.py index 427f7ef2..a3965895 100644 --- a/ingest/ingest_pipeline.py +++ b/ingest/ingest_pipeline.py @@ -576,22 +576,17 @@ def main() -> None: Returns: None """ - parsed_args = create_parser().parse_args() - validate_arguments(parsed_args) - arguments = vars(parsed_args) status = 0 - status_cell_metadata = 0 - ingest = None - - # Initialize global variables for current ingest job - config.init( - arguments["study_id"], - arguments["study_file_id"], - arguments["user_metrics_uuid"], - ) - try: - + parsed_args = create_parser().parse_args() + validate_arguments(parsed_args) + arguments = vars(parsed_args) + # Initialize global variables for current ingest job + config.init( + arguments["study_id"], + arguments["study_file_id"], + arguments["user_metrics_uuid"], + ) ingest = IngestPipeline(**arguments) status, status_cell_metadata = run_ingest(ingest, arguments, parsed_args) # Print metrics properties @@ -599,12 +594,11 @@ def main() -> None: for key in metrics_dump.keys(): print(f'{key}: {metrics_dump[key]}') + # Log Mixpanel events + MetricsService.log(config.get_parent_event_name(), config.get_metric_properties()) except Exception as e: log_exception(IngestPipeline.dev_logger, IngestPipeline.user_logger, e) status = 1 - - # Log Mixpanel events - MetricsService.log(config.get_parent_event_name(), config.get_metric_properties()) # Exit pipeline exit_pipeline(ingest, status, status_cell_metadata, arguments) From f2ab7459e055eb603b8fae8c54c83acf365efce1 Mon Sep 17 00:00:00 2001 From: Devon Bush Date: Tue, 19 Apr 2022 15:12:07 -0400 Subject: [PATCH 03/12] Revert "minimizing change scope" This reverts commit 7b9f2aedd6a4bab791bc141d1a6a1a823fd5fa9a. --- ingest/ingest_pipeline.py | 41 +++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/ingest/ingest_pipeline.py b/ingest/ingest_pipeline.py index a3965895..7326291b 100644 --- a/ingest/ingest_pipeline.py +++ b/ingest/ingest_pipeline.py @@ -576,29 +576,24 @@ def main() -> None: Returns: None """ - status = 0 - try: - parsed_args = create_parser().parse_args() - validate_arguments(parsed_args) - arguments = vars(parsed_args) - # Initialize global variables for current ingest job - config.init( - arguments["study_id"], - arguments["study_file_id"], - arguments["user_metrics_uuid"], - ) - ingest = IngestPipeline(**arguments) - status, status_cell_metadata = run_ingest(ingest, arguments, parsed_args) - # Print metrics properties - metrics_dump = config.get_metric_properties().get_properties() - for key in metrics_dump.keys(): - print(f'{key}: {metrics_dump[key]}') - - # Log Mixpanel events - MetricsService.log(config.get_parent_event_name(), config.get_metric_properties()) - except Exception as e: - log_exception(IngestPipeline.dev_logger, IngestPipeline.user_logger, e) - status = 1 + parsed_args = create_parser().parse_args() + validate_arguments(parsed_args) + arguments = vars(parsed_args) + # Initialize global variables for current ingest job + config.init( + arguments["study_id"], + arguments["study_file_id"], + arguments["user_metrics_uuid"], + ) + ingest = IngestPipeline(**arguments) + status, status_cell_metadata = run_ingest(ingest, arguments, parsed_args) + # Print metrics properties + metrics_dump = config.get_metric_properties().get_properties() + for key in metrics_dump.keys(): + print(f'{key}: {metrics_dump[key]}') + + # Log Mixpanel events + MetricsService.log(config.get_parent_event_name(), config.get_metric_properties()) # Exit pipeline exit_pipeline(ingest, status, status_cell_metadata, arguments) From 673dc4363b0f2318aadc84e8f818210cab84e738 Mon Sep 17 00:00:00 2001 From: jlchang Date: Wed, 20 Apr 2022 07:29:02 -0400 Subject: [PATCH 04/12] report dups instead of rejecting or falling back to gene_id --- ingest/de.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/ingest/de.py b/ingest/de.py index 1a896b51..670da251 100644 --- a/ingest/de.py +++ b/ingest/de.py @@ -175,6 +175,7 @@ def subset_adata(adata, de_cells): return adata def execute_de(self): + print(f'dev_info: Starting DE for {self.accession}') try: if self.matrix_file_type == "mtx": DifferentialExpression.de_logger.info("preparing DE on sparse matrix") @@ -222,18 +223,17 @@ def get_genes(genes_path): """ genes_df = pd.read_csv(genes_path, sep="\t", header=None) if len(genes_df.columns) > 1: + # unclear if falling back to gene_id is useful (SCP-4283) + # print so we're aware of dups during dev testing if genes_df[1].count() == genes_df[1].nunique(): - return genes_df[1].tolist() - elif genes_df[0].count() == genes_df[0].nunique(): - return genes_df[0].tolist() + msg = "dev_info: Features file contains duplicate identifiers (col 2)" + print(msg) + return genes_df[1].tolist() else: - msg = "Features file contains duplicate identifiers" - print(msg) - log_exception( - DifferentialExpression.dev_logger, DifferentialExpression.de_logger, msg - ) - raise ValueError(msg) - return genes + if genes_df[0].count() == genes_df[0].nunique(): + msg = "dev_info: Features file contains duplicate identifiers (col 1)" + print(msg) + return genes_df[0].tolist() @staticmethod def get_barcodes(barcodes_path): From a00b18588eca56e7c92c239600880f2619cb0997 Mon Sep 17 00:00:00 2001 From: jlchang Date: Wed, 20 Apr 2022 07:33:39 -0400 Subject: [PATCH 05/12] clarify outfile name structure --- ingest/de.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ingest/de.py b/ingest/de.py index 670da251..2ebaf5b1 100644 --- a/ingest/de.py +++ b/ingest/de.py @@ -317,13 +317,15 @@ def run_scanpy_de( raise KeyError(msg) DifferentialExpression.de_logger.info("Gathering DE annotation labels") - groups = np.unique(adata.obs[annotation]).tolist() - for group in groups: - group_filename = re.sub(r'\W+', '_', group) - DifferentialExpression.de_logger.info(f"Writing DE output for {group}") - rank = sc.get.rank_genes_groups_df(adata, key=rank_key, group=group) + annots = np.unique(adata.obs[annotation]).tolist() + for annot in annots: + annot_label = re.sub(r'\W+', '_', annot) + DifferentialExpression.de_logger.info( + f"Writing DE output for {annot_label}" + ) + rank = sc.get.rank_genes_groups_df(adata, key=rank_key, group=annot) - out_file = f'{cluster_name}--{annotation}--{group_filename}--{method}.tsv' + out_file = f'{cluster_name}--{annotation}--{annot_label}--{method}.tsv' # Round numbers to 4 significant digits while respecting fixed point # and scientific notation (note: trailing zeros are removed) rank.to_csv(out_file, sep='\t', float_format='%.4g') From 092811be7f705e1c05a7482769af1357ae51e5ee Mon Sep 17 00:00:00 2001 From: jlchang Date: Wed, 20 Apr 2022 08:34:54 -0400 Subject: [PATCH 06/12] include additional cli params for annots --- ingest/cli_parser.py | 18 ++++++++++++++---- ingest/de.py | 4 ++-- ingest/ingest_pipeline.py | 2 ++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/ingest/cli_parser.py b/ingest/cli_parser.py index 3312dc91..8279a31c 100644 --- a/ingest/cli_parser.py +++ b/ingest/cli_parser.py @@ -268,7 +268,17 @@ def create_parser(): ) parser_differential_expression.add_argument( - "--annotation", required=True, help="Name of annotation for DE analysis" + "--annotation-name", required=True, help="Name of annotation for DE analysis" + ) + + parser_differential_expression.add_argument( + "--annotation-type", required=True, help="Type of annotation for DE analysis" + ) + + parser_differential_expression.add_argument( + "--annotation-scope", + required=True, + help="Scope of annotation file for DE analysis", ) parser_differential_expression.add_argument( @@ -276,7 +286,7 @@ def create_parser(): ) parser_differential_expression.add_argument( - "--name", required=True, help="study owner-specified cluster anem" + "--name", required=True, help="study owner-specified cluster name" ) parser_differential_expression.add_argument( @@ -286,9 +296,9 @@ def create_parser(): ) parser_differential_expression.add_argument( - "--cell-metadata-file", + "--annot-metadata-file", required=True, - help="Absolute or relative path to cell metadata file.", + help="Absolute or relative path to cell metadata or cluster file of annotations.", ) parser_differential_expression.add_argument( diff --git a/ingest/de.py b/ingest/de.py index 2ebaf5b1..f151e585 100644 --- a/ingest/de.py +++ b/ingest/de.py @@ -36,7 +36,7 @@ def __init__( cell_metadata, matrix_file_path, matrix_file_type, - annotation, + annotation_name, **kwargs, ): DifferentialExpression.de_logger.info( @@ -44,7 +44,7 @@ def __init__( ) self.cluster = cluster self.metadata = cell_metadata - self.annotation = annotation + self.annotation = annotation_name self.matrix_file_path = matrix_file_path self.matrix_file_type = matrix_file_type self.kwargs = kwargs diff --git a/ingest/ingest_pipeline.py b/ingest/ingest_pipeline.py index 7326291b..47c2e635 100644 --- a/ingest/ingest_pipeline.py +++ b/ingest/ingest_pipeline.py @@ -579,6 +579,8 @@ def main() -> None: parsed_args = create_parser().parse_args() validate_arguments(parsed_args) arguments = vars(parsed_args) + if "differential_expression" in arguments: + arguments["cell_metadata_file"] = arguments["annot_metadata_file"] # Initialize global variables for current ingest job config.init( arguments["study_id"], From c135126adf2c55b8123acfa8c93fc8f8ea9e7601 Mon Sep 17 00:00:00 2001 From: jlchang Date: Wed, 20 Apr 2022 08:49:01 -0400 Subject: [PATCH 07/12] rename DE 'name' to 'cluster-name' for clarity --- ingest/cli_parser.py | 2 +- ingest/ingest_pipeline.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ingest/cli_parser.py b/ingest/cli_parser.py index 8279a31c..3dcf4fd5 100644 --- a/ingest/cli_parser.py +++ b/ingest/cli_parser.py @@ -286,7 +286,7 @@ def create_parser(): ) parser_differential_expression.add_argument( - "--name", required=True, help="study owner-specified cluster name" + "--cluster-name", required=True, help="study owner-specified cluster name" ) parser_differential_expression.add_argument( diff --git a/ingest/ingest_pipeline.py b/ingest/ingest_pipeline.py index 47c2e635..d4cdc165 100644 --- a/ingest/ingest_pipeline.py +++ b/ingest/ingest_pipeline.py @@ -580,7 +580,11 @@ def main() -> None: validate_arguments(parsed_args) arguments = vars(parsed_args) if "differential_expression" in arguments: + # DE may use metadata or cluster file for annots BUT + # IngestPipeline initialization will need a "cell_metadata_file" arguments["cell_metadata_file"] = arguments["annot_metadata_file"] + # IngestPipeline initialiation expects "name" and not "cluster_name" + arguments["name"] = arguments["cluster_name"] # Initialize global variables for current ingest job config.init( arguments["study_id"], From e6783d79f41c4bed5c6a29a963a503ae48d2b4de Mon Sep 17 00:00:00 2001 From: jlchang Date: Wed, 20 Apr 2022 09:01:16 -0400 Subject: [PATCH 08/12] trivial check that input annot is type group --- ingest/cli_parser.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ingest/cli_parser.py b/ingest/cli_parser.py index 3dcf4fd5..d6186201 100644 --- a/ingest/cli_parser.py +++ b/ingest/cli_parser.py @@ -74,6 +74,14 @@ def validate_arguments(parsed_args): raise ValueError( f" Invalid argument: unable to connect to a BigQuery table called {parsed_args.bq_table}." ) + if ( + "differential_expression" in parsed_args + and parsed_args.annotation_type != "group" + ): + raise ValueError( + f"Differential expression analysis restricted to group-type annotaions," + " cannot run on data of type {parsed_args.annotation_type}." + ) def create_parser(): From ec9eb6850393db34b83896080367eb47e574a16a Mon Sep 17 00:00:00 2001 From: jlchang Date: Wed, 20 Apr 2022 09:27:14 -0400 Subject: [PATCH 09/12] fix error msg typos --- ingest/cli_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ingest/cli_parser.py b/ingest/cli_parser.py index d6186201..cb50f03d 100644 --- a/ingest/cli_parser.py +++ b/ingest/cli_parser.py @@ -79,8 +79,8 @@ def validate_arguments(parsed_args): and parsed_args.annotation_type != "group" ): raise ValueError( - f"Differential expression analysis restricted to group-type annotaions," - " cannot run on data of type {parsed_args.annotation_type}." + "Differential expression analysis restricted to group-type annotations," + f" cannot run on data of type \"{parsed_args.annotation_type}\"." ) From 68c9a607c832f693132cd530d7da248ab454bc9b Mon Sep 17 00:00:00 2001 From: jlchang Date: Thu, 21 Apr 2022 09:35:25 -0400 Subject: [PATCH 10/12] add scope to outfile name; update tests --- ingest/de.py | 7 +++- ...--cholinergic_neuron--study--wilcoxon.tsv} | 0 ...el--endothelial_cell--study--wilcoxon.tsv} | 0 tests/test_de.py | 40 +++++++++++++++---- 4 files changed, 38 insertions(+), 9 deletions(-) rename tests/data/differential_expression/reference/{de_integration--cell_type__ontology_label--cholinergic_neuron--wilcoxon.tsv => de_integration--cell_type__ontology_label--cholinergic_neuron--study--wilcoxon.tsv} (100%) rename tests/data/differential_expression/sparse/reference/{de_sparse_integration--cell_type__ontology_label--endothelial_cell--wilcoxon.tsv => de_sparse_integration--cell_type__ontology_label--endothelial_cell--study--wilcoxon.tsv} (100%) diff --git a/ingest/de.py b/ingest/de.py index f151e585..498b1f4a 100644 --- a/ingest/de.py +++ b/ingest/de.py @@ -49,6 +49,7 @@ def __init__( self.matrix_file_type = matrix_file_type self.kwargs = kwargs self.accession = self.kwargs["study_accession"] + self.annot_scope = self.kwargs["annotation_scope"] # only used in output filename, replacing non-alphanumeric with underscores self.cluster_name = re.sub(r'\W+', '_', self.kwargs["name"]) self.method = self.kwargs["method"] @@ -185,6 +186,7 @@ def execute_de(self): self.matrix_file_path, self.matrix_file_type, self.annotation, + self.annot_scope, self.accession, self.cluster_name, self.method, @@ -199,6 +201,7 @@ def execute_de(self): self.matrix_file_path, self.matrix_file_type, self.annotation, + self.annot_scope, self.accession, self.cluster_name, self.method, @@ -264,6 +267,7 @@ def run_scanpy_de( matrix_file_path, matrix_file_type, annotation, + annot_scope, study_accession, cluster_name, method, @@ -320,12 +324,13 @@ def run_scanpy_de( annots = np.unique(adata.obs[annotation]).tolist() for annot in annots: annot_label = re.sub(r'\W+', '_', annot) + clean_annotation = re.sub(r'\W+', '_', annotation) DifferentialExpression.de_logger.info( f"Writing DE output for {annot_label}" ) rank = sc.get.rank_genes_groups_df(adata, key=rank_key, group=annot) - out_file = f'{cluster_name}--{annotation}--{annot_label}--{method}.tsv' + out_file = f'{cluster_name}--{clean_annotation}--{annot_label}--{annot_scope}--{method}.tsv' # Round numbers to 4 significant digits while respecting fixed point # and scientific notation (note: trailing zeros are removed) rank.to_csv(out_file, sep='\t', float_format='%.4g') diff --git a/tests/data/differential_expression/reference/de_integration--cell_type__ontology_label--cholinergic_neuron--wilcoxon.tsv b/tests/data/differential_expression/reference/de_integration--cell_type__ontology_label--cholinergic_neuron--study--wilcoxon.tsv similarity index 100% rename from tests/data/differential_expression/reference/de_integration--cell_type__ontology_label--cholinergic_neuron--wilcoxon.tsv rename to tests/data/differential_expression/reference/de_integration--cell_type__ontology_label--cholinergic_neuron--study--wilcoxon.tsv diff --git a/tests/data/differential_expression/sparse/reference/de_sparse_integration--cell_type__ontology_label--endothelial_cell--wilcoxon.tsv b/tests/data/differential_expression/sparse/reference/de_sparse_integration--cell_type__ontology_label--endothelial_cell--study--wilcoxon.tsv similarity index 100% rename from tests/data/differential_expression/sparse/reference/de_sparse_integration--cell_type__ontology_label--endothelial_cell--wilcoxon.tsv rename to tests/data/differential_expression/sparse/reference/de_sparse_integration--cell_type__ontology_label--endothelial_cell--study--wilcoxon.tsv diff --git a/tests/test_de.py b/tests/test_de.py index 638832dd..8a8bd186 100644 --- a/tests/test_de.py +++ b/tests/test_de.py @@ -6,6 +6,7 @@ import sys import hashlib import os +import glob import pandas as pd sys.path.append("../ingest") @@ -26,15 +27,13 @@ def get_annotation_labels(metadata, annotation, de_cells): return unique_labels.tolist() -def find_expected_files(labels, cluster_name, annotation, method): +def find_expected_files(labels, cluster_name, annotation, scope, method): """ Check that files were created for all expected annotation labels """ found = 0 for label in labels: sanitized_label = label.replace(" ", "_") - expected_file = ( - f"{cluster_name}--{annotation}--{str(sanitized_label)}--{method}.tsv" - ) + expected_file = f"{cluster_name}--{annotation}--{str(sanitized_label)}--{scope}--{method}.tsv" assert os.path.exists(expected_file) found += 1 return found @@ -90,6 +89,7 @@ def test_de_process_dense(self): confirm expected output """ test_annotation = "cell_type__ontology_label" + test_scope = "study" test_method = "wilcoxon" cm = CellMetadata( "../tests/data/differential_expression/de_integration_unordered_metadata.tsv", @@ -109,6 +109,7 @@ def test_de_process_dense(self): de_kwargs = { "study_accession": cm.study_accession, "name": cluster.name, + "annotation_scope": test_scope, "method": test_method, } @@ -124,7 +125,7 @@ def test_de_process_dense(self): de_cells = DifferentialExpression.get_cluster_cells(cluster.file['NAME'].values) labels = get_annotation_labels(cm, test_annotation, de_cells) found_label_count = find_expected_files( - labels, cluster.name, test_annotation, test_method + labels, cluster.name, test_annotation, test_scope, test_method ) self.assertEqual( @@ -135,7 +136,7 @@ def test_de_process_dense(self): expected_file_path = ( "../tests/de_integration--cell_type__ontology_label" - "--cholinergic_neuron--wilcoxon.tsv" + "--cholinergic_neuron--study--wilcoxon.tsv" ) content = pd.read_csv(expected_file_path, sep="\t", index_col=0) @@ -165,11 +166,22 @@ def test_de_process_dense(self): "generated output file should match expected checksum", ) + # clean up DE outputs + output_wildcard_match = f"../tests/de_integration--{test_annotation}*.tsv" + files = glob.glob(output_wildcard_match) + + for file in files: + try: + os.remove(file) + except: + print(f"Error while deleting file : {file}") + def test_de_process_sparse(self): """ Run DE on small test case with sparse matrix inputs confirm expected output """ test_annotation = "cell_type__ontology_label" + test_scope = "study" test_method = "wilcoxon" cm = CellMetadata( "../tests/data/differential_expression/sparse/sparsemini_metadata.txt", @@ -189,6 +201,7 @@ def test_de_process_sparse(self): de_kwargs = { "study_accession": cm.study_accession, "name": cluster.name, + "annotation_scope": test_scope, "method": test_method, "gene_file": "../tests/data/differential_expression/sparse/sparsemini_features.tsv", "barcode_file": "../tests/data/differential_expression/sparse/sparsemini_barcodes.tsv", @@ -208,7 +221,7 @@ def test_de_process_sparse(self): # In find_expected_files, checks all files with expected names were created # yields the number of files expected for an external check for file count found_label_count = find_expected_files( - labels, cluster.name, test_annotation, test_method + labels, cluster.name, test_annotation, test_scope, test_method ) self.assertEqual( @@ -219,7 +232,7 @@ def test_de_process_sparse(self): expected_file_path = ( "../tests/de_sparse_integration--cell_type__ontology_label" - "--endothelial_cell--wilcoxon.tsv" + "--endothelial_cell--study--wilcoxon.tsv" ) content = pd.read_csv(expected_file_path, sep="\t", index_col=0) @@ -249,3 +262,14 @@ def test_de_process_sparse(self): "generated output file should match expected checksum", ) + # clean up DE outputs + output_wildcard_match = ( + f"../tests/de_sparse_integration--{test_annotation}*.tsv" + ) + files = glob.glob(output_wildcard_match) + + for file in files: + try: + os.remove(file) + except: + print(f"Error while deleting file : {file}") From a6f1c8a3140fab9d1cbc754762d0c98a1cd48a03 Mon Sep 17 00:00:00 2001 From: jlchang Date: Thu, 21 Apr 2022 09:40:09 -0400 Subject: [PATCH 11/12] handle exception for failed DE: annot_label w/ single sample --- ingest/de.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ingest/de.py b/ingest/de.py index 498b1f4a..7f7a15a6 100644 --- a/ingest/de.py +++ b/ingest/de.py @@ -319,6 +319,13 @@ def run_scanpy_de( DifferentialExpression.dev_logger, DifferentialExpression.de_logger, msg ) raise KeyError(msg) + # ToDo - detection and handling of annotations with only one sample (SCP-4282) + except ValueError as e: + print(e) + log_exception( + DifferentialExpression.dev_logger, DifferentialExpression.de_logger, e + ) + raise KeyError(e) DifferentialExpression.de_logger.info("Gathering DE annotation labels") annots = np.unique(adata.obs[annotation]).tolist() From e31497810cc1359fac8d1bfbdf7996fe73d1aac6 Mon Sep 17 00:00:00 2001 From: jlchang Date: Thu, 21 Apr 2022 13:56:21 -0400 Subject: [PATCH 12/12] address PR blockers/suggestions --- ingest/cli_parser.py | 6 +- ingest/de.py | 15 +- ingest/ingest_pipeline.py | 6 +- .../de_integration_annotated_cluster.txt | 240 ++++++++++++++++++ tests/test_de.py | 4 +- 5 files changed, 255 insertions(+), 16 deletions(-) create mode 100644 tests/data/differential_expression/de_integration_annotated_cluster.txt diff --git a/ingest/cli_parser.py b/ingest/cli_parser.py index cb50f03d..77dd697d 100644 --- a/ingest/cli_parser.py +++ b/ingest/cli_parser.py @@ -284,9 +284,7 @@ def create_parser(): ) parser_differential_expression.add_argument( - "--annotation-scope", - required=True, - help="Scope of annotation file for DE analysis", + "--annotation-scope", required=True, help="Scope of annotation for DE analysis" ) parser_differential_expression.add_argument( @@ -304,7 +302,7 @@ def create_parser(): ) parser_differential_expression.add_argument( - "--annot-metadata-file", + "--annotation-file", required=True, help="Absolute or relative path to cell metadata or cluster file of annotations.", ) diff --git a/ingest/de.py b/ingest/de.py index 7f7a15a6..642d1c1b 100644 --- a/ingest/de.py +++ b/ingest/de.py @@ -1,3 +1,4 @@ +from email.headerregistry import Group import logging import numpy as np import pandas as pd @@ -328,16 +329,14 @@ def run_scanpy_de( raise KeyError(e) DifferentialExpression.de_logger.info("Gathering DE annotation labels") - annots = np.unique(adata.obs[annotation]).tolist() - for annot in annots: - annot_label = re.sub(r'\W+', '_', annot) + groups = np.unique(adata.obs[annotation]).tolist() + for group in groups: + clean_group = re.sub(r'\W+', '_', group) clean_annotation = re.sub(r'\W+', '_', annotation) - DifferentialExpression.de_logger.info( - f"Writing DE output for {annot_label}" - ) - rank = sc.get.rank_genes_groups_df(adata, key=rank_key, group=annot) + DifferentialExpression.de_logger.info(f"Writing DE output for {group}") + rank = sc.get.rank_genes_groups_df(adata, key=rank_key, group=group) - out_file = f'{cluster_name}--{clean_annotation}--{annot_label}--{annot_scope}--{method}.tsv' + out_file = f'{cluster_name}--{clean_annotation}--{clean_group}--{annot_scope}--{method}.tsv' # Round numbers to 4 significant digits while respecting fixed point # and scientific notation (note: trailing zeros are removed) rank.to_csv(out_file, sep='\t', float_format='%.4g') diff --git a/ingest/ingest_pipeline.py b/ingest/ingest_pipeline.py index d4cdc165..ca7e8fe9 100644 --- a/ingest/ingest_pipeline.py +++ b/ingest/ingest_pipeline.py @@ -581,9 +581,9 @@ def main() -> None: arguments = vars(parsed_args) if "differential_expression" in arguments: # DE may use metadata or cluster file for annots BUT - # IngestPipeline initialization will need a "cell_metadata_file" - arguments["cell_metadata_file"] = arguments["annot_metadata_file"] - # IngestPipeline initialiation expects "name" and not "cluster_name" + # IngestPipeline initialization assumes a "cell_metadata_file" + arguments["cell_metadata_file"] = arguments["annotation_file"] + # IngestPipeline initialization expects "name" and not "cluster_name" arguments["name"] = arguments["cluster_name"] # Initialize global variables for current ingest job config.init( diff --git a/tests/data/differential_expression/de_integration_annotated_cluster.txt b/tests/data/differential_expression/de_integration_annotated_cluster.txt new file mode 100644 index 00000000..7a5c6d0f --- /dev/null +++ b/tests/data/differential_expression/de_integration_annotated_cluster.txt @@ -0,0 +1,240 @@ +NAME X Y cluster_annot cell_type cell_type__ontology_label +TYPE numeric numeric group group group +Mm_AMB_N098 3.140087487 -1.273400095 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N099 4.725843789 -1.653986003 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N100 7.415670755 1.478285048 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N101 18.00091493 2.853666518 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N102 4.674414994 -1.703558233 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N103 -5.130973218 -6.094096925 2 CL_0000598 pyramidal neuron +Mm_AMB_N105 7.280724885 1.746386263 3 CL_0005024 somatomotor neuron +Mm_AMB_N106 -5.738340972 -5.535514739 2 CL_0000598 pyramidal neuron +Mm_AMB_N107 -8.597541926 3.276709769 0 CL_0000108 cholinergic neuron +Mm_AMB_N108 4.864700677 -0.389440563 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N110 -10.26152861 3.415581915 0 CL_0000108 cholinergic neuron +Mm_AMB_N111 5.433240296 -0.161243942 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N112 -5.974845288 -5.26880553 2 CL_0000598 pyramidal neuron +Mm_AMB_N113 7.894586923 2.099830363 3 CL_0005024 somatomotor neuron +Mm_AMB_N114 -6.253827689 -6.865749147 2 CL_0000598 pyramidal neuron +Mm_AMB_N115 7.595676782 3.970272276 3 CL_0005024 somatomotor neuron +Mm_AMB_N117 -8.982423423 2.625506136 0 CL_0000108 cholinergic neuron +Mm_AMB_N118 8.132836701 2.255005095 3 CL_0005024 somatomotor neuron +Mm_AMB_N119 8.493417146 1.565001223 3 CL_0005024 somatomotor neuron +Mm_AMB_N120 7.464867951 2.835095618 3 CL_0005024 somatomotor neuron +Mm_AMB_N121 18.95712984 1.733775828 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N124 -10.08203327 3.202082846 0 CL_0000108 cholinergic neuron +Mm_AMB_N125 4.787664773 -1.970788386 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N126 4.169263723 -1.479499605 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N127 -12.17685568 2.250826094 0 CL_0000108 cholinergic neuron +Mm_AMB_N128 -10.26949227 3.299045775 0 CL_0000108 cholinergic neuron +Mm_AMB_N129 7.291922929 3.078298781 3 CL_0005024 somatomotor neuron +Mm_AMB_N130 3.931503179 -0.584689167 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N131 -12.87084734 2.755191538 0 CL_0000108 cholinergic neuron +Mm_AMB_N132 8.192381265 3.748367522 3 CL_0005024 somatomotor neuron +Mm_AMB_N133 8.109444024 3.173025343 3 CL_0005024 somatomotor neuron +Mm_AMB_N134 4.853798272 -0.210352686 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N135 -6.091469405 -6.286829259 2 CL_0000598 pyramidal neuron +Mm_AMB_N136 -9.724432109 2.310277197 0 CL_0000108 cholinergic neuron +Mm_AMB_N138 4.571406724 -2.265397933 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N139 4.638490083 -0.897866752 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N140 18.24030245 2.250039313 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N142 19.2226919 2.102125857 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N143 8.59917772 2.018590662 3 CL_0005024 somatomotor neuron +Mm_AMB_N144 3.966480138 -1.175693061 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N145 -9.292243121 3.523585532 0 CL_0000108 cholinergic neuron +Mm_AMB_N146 -6.55647671 -6.882932212 0 CL_0000108 cholinergic neuron +Mm_AMB_N147 7.774518373 2.710429881 3 CL_0005024 somatomotor neuron +Mm_AMB_N148 -9.84153044 4.144498084 0 CL_0000108 cholinergic neuron +Mm_AMB_N150 18.53747499 2.320198748 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N151 -10.067649 1.483394835 0 CL_0000108 cholinergic neuron +Mm_AMB_N152 4.614567163 -0.470181491 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N154 -7.005810378 -4.795861867 2 CL_0000598 pyramidal neuron +Mm_AMB_N155 -10.66187441 2.508826945 0 CL_0000108 cholinergic neuron +Mm_AMB_N156 3.518062951 -1.833867099 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N157 -9.064921973 2.751543734 0 CL_0000108 cholinergic neuron +Mm_AMB_N158 -6.764120696 -5.032259848 2 CL_0000598 pyramidal neuron +Mm_AMB_N159 -10.32736218 1.869718764 0 CL_0000108 cholinergic neuron +Mm_AMB_N160 -5.515234826 -6.577742364 2 CL_0000598 pyramidal neuron +Mm_AMB_N161 2.341690423 -1.508996513 2 CL_0000598 pyramidal neuron +Mm_AMB_N165 -5.354843018 -5.586662676 2 CL_0000598 pyramidal neuron +Mm_AMB_N166 2.443514707 -1.107008007 0 CL_0000108 cholinergic neuron +Mm_AMB_N167 7.79236448 3.153294775 3 CL_0005024 somatomotor neuron +Mm_AMB_N168 4.398187043 -0.621180084 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N169 8.133822801 3.66456959 3 CL_0005024 somatomotor neuron +Mm_AMB_N170 -8.732355712 3.408966753 0 CL_0000108 cholinergic neuron +Mm_AMB_N171 4.50660074 -2.13358393 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N172 3.772507073 -1.759694126 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N173 8.144499185 2.500747893 3 CL_0005024 somatomotor neuron +Mm_AMB_N174 19.07072008 1.539551947 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N175 18.86313951 1.964607928 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N178 19.32244623 1.928024504 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N179 -6.689724801 -4.449012783 2 CL_0000598 pyramidal neuron +Mm_AMB_N181 4.054095628 -0.918124464 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N182 -6.449617503 -4.895254638 2 CL_0000598 pyramidal neuron +Mm_AMB_N183 -6.572851298 -6.65461519 2 CL_0000598 pyramidal neuron +Mm_AMB_N184 18.48137796 2.605997775 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N186 4.864717843 -1.563757207 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N187 8.264972093 3.757057402 3 CL_0005024 somatomotor neuron +Mm_AMB_N188 3.585117223 -1.44048312 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N189 -9.594976066 2.443775866 0 CL_0000108 cholinergic neuron +Mm_AMB_N190 -6.578274129 -5.029420521 2 CL_0000598 pyramidal neuron +Mm_AMB_N191 -5.414247392 -6.468319681 2 CL_0000598 pyramidal neuron +Mm_AMB_N194 18.59783113 2.534378264 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N195 -12.05063498 2.719312403 0 CL_0000108 cholinergic neuron +Mm_AMB_N196 4.188382032 -2.127558496 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N197 -10.43037617 2.783822272 0 CL_0000108 cholinergic neuron +Mm_AMB_N198 -10.41784203 2.047070239 0 CL_0000108 cholinergic neuron +Mm_AMB_N199 -10.01769125 2.546656821 0 CL_0000108 cholinergic neuron +Mm_AMB_N200 5.074365975 0.10075495 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N201 -10.18318951 3.931473467 0 CL_0000108 cholinergic neuron +Mm_AMB_N202 8.027311685 2.417285654 3 CL_0005024 somatomotor neuron +Mm_AMB_N203 -6.702827332 -4.562156107 2 CL_0000598 pyramidal neuron +Mm_AMB_N204 7.404225709 3.39886019 3 CL_0005024 somatomotor neuron +Mm_AMB_N205 -6.504680035 -6.057409074 2 CL_0000598 pyramidal neuron +Mm_AMB_N206 -12.6842705 2.519956324 0 CL_0000108 cholinergic neuron +Mm_AMB_N207 -8.957153914 3.238717291 0 CL_0000108 cholinergic neuron +Mm_AMB_N208 -12.00916159 2.210995409 0 CL_0000108 cholinergic neuron +Mm_AMB_N209 5.118758561 -1.878546383 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N210 8.34566343 3.023439143 3 CL_0005024 somatomotor neuron +Mm_AMB_N211 4.266895177 -1.541616466 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N212 4.563371064 -0.331653144 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N213 -6.532243846 -5.586548951 2 CL_0000598 pyramidal neuron +Mm_AMB_N214 4.813324334 -2.35395148 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N215 -9.66545069 2.694981787 0 CL_0000108 cholinergic neuron +Mm_AMB_N216 -9.472166179 2.525134776 0 CL_0000108 cholinergic neuron +Mm_AMB_N217 5.182531716 -0.530996826 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N219 8.353728654 2.527538989 3 CL_0005024 somatomotor neuron +Mm_AMB_N220 18.60411966 1.850130293 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N221 18.54845941 2.412269804 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N222 -5.657072423 -6.178900983 2 CL_0000598 pyramidal neuron +Mm_AMB_N223 7.340929391 3.11344454 3 CL_0005024 somatomotor neuron +Mm_AMB_N224 -12.78482973 2.593527052 0 CL_0000108 cholinergic neuron +Mm_AMB_N225 -6.03749883 -5.599259045 2 CL_0000598 pyramidal neuron +Mm_AMB_N226 7.932530763 1.991745684 3 CL_0005024 somatomotor neuron +Mm_AMB_N227 -10.16876662 3.038719866 0 CL_0000108 cholinergic neuron +Mm_AMB_N228 -12.08532107 2.673424933 0 CL_0000108 cholinergic neuron +Mm_AMB_N229 -9.039196608 3.519463274 0 CL_0000108 cholinergic neuron +Mm_AMB_N230 -10.03032744 4.084387038 0 CL_0000108 cholinergic neuron +Mm_AMB_N231 -9.171292422 2.841758463 0 CL_0000108 cholinergic neuron +Mm_AMB_N232 4.890991571 -2.213337567 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N233 7.595521333 4.037879679 3 CL_0005024 somatomotor neuron +Mm_AMB_N234 -9.864079593 2.306307528 0 CL_0000108 cholinergic neuron +Mm_AMB_N235 -6.028452037 -6.785489585 2 CL_0000598 pyramidal neuron +Mm_AMB_N237 -6.895101903 -5.631796863 2 CL_0000598 pyramidal neuron +Mm_AMB_N238 -10.2035743 2.412242148 0 CL_0000108 cholinergic neuron +Mm_AMB_N239 -9.59493172 3.203228686 0 CL_0000108 cholinergic neuron +Mm_AMB_N240 -5.116693614 -5.698513177 2 CL_0000598 pyramidal neuron +Mm_AMB_N241 -5.64397466 -6.056944873 2 CL_0000598 pyramidal neuron +Mm_AMB_N242 -5.335160373 -6.207723882 2 CL_0000598 pyramidal neuron +Mm_AMB_N243 -5.679906485 -6.51370218 2 CL_0000598 pyramidal neuron +Mm_AMB_N244 7.451953294 3.823737357 3 CL_0005024 somatomotor neuron +Mm_AMB_N245 8.469065072 2.193786356 3 CL_0005024 somatomotor neuron +Mm_AMB_N246 -12.77197706 2.904312346 0 CL_0000108 cholinergic neuron +Mm_AMB_N247 5.161008241 -1.80296972 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N248 7.694859864 3.22638962 3 CL_0005024 somatomotor neuron +Mm_AMB_N249 -5.822579501 -5.948100832 2 CL_0000598 pyramidal neuron +Mm_AMB_N251 7.953496339 3.588763926 3 CL_0005024 somatomotor neuron +Mm_AMB_N252 -9.964606402 3.257647726 0 CL_0000108 cholinergic neuron +Mm_AMB_N253 18.94501054 1.570863936 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N254 3.443722608 -1.669131067 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N255 7.799243333 3.93373749 3 CL_0005024 somatomotor neuron +Mm_AMB_N256 3.041388871 -0.975432184 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N257 3.814522626 -0.627959039 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N258 -6.796474812 -5.763392952 2 CL_0000598 pyramidal neuron +Mm_AMB_N259 -12.65660918 3.07275841 0 CL_0000108 cholinergic neuron +Mm_AMB_N260 3.703167798 -1.517047432 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N261 -10.19309962 1.365347121 0 CL_0000108 cholinergic neuron +Mm_AMB_N263 -6.681285499 -4.715871241 2 CL_0000598 pyramidal neuron +Mm_AMB_N264 -6.036138652 -6.150394228 2 CL_0000598 pyramidal neuron +Mm_AMB_N265 -7.157994626 -5.442302253 2 CL_0000598 pyramidal neuron +Mm_AMB_N266 4.950543763 -2.201776412 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N267 -12.42995703 2.402638171 0 CL_0000108 cholinergic neuron +Mm_AMB_N268 4.740373017 -0.464855459 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N269 8.136021974 1.535433028 3 CL_0005024 somatomotor neuron +Mm_AMB_N270 -10.09037459 1.558325026 0 CL_0000108 cholinergic neuron +Mm_AMB_N271 -12.27817786 2.949826453 0 CL_0000108 cholinergic neuron +Mm_AMB_N273 -6.136590836 -6.660199668 2 CL_0000598 pyramidal neuron +Mm_AMB_N276 7.824115159 3.721842501 3 CL_0005024 somatomotor neuron +Mm_AMB_N278 -12.91126311 2.414263461 0 CL_0000108 cholinergic neuron +Mm_AMB_N279 -6.656438229 -5.848840501 2 CL_0000598 pyramidal neuron +Mm_AMB_N280 8.525113465 2.299923632 3 CL_0005024 somatomotor neuron +Mm_AMB_N282 -6.820338605 -4.650688853 2 CL_0000598 pyramidal neuron +Mm_AMB_N283 -5.484909652 -5.115516212 2 CL_0000598 pyramidal neuron +Mm_AMB_N285 5.20283735 -1.757914093 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N286 -10.19620764 2.034649584 0 CL_0000108 cholinergic neuron +Mm_AMB_N287 18.90230501 2.504481051 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N290 -12.2473551 2.616947863 0 CL_0000108 cholinergic neuron +Mm_AMB_N291 19.1353792 1.91158745 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N292 7.20615709 1.430902693 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N293 -6.977985261 -5.640513089 2 CL_0000598 pyramidal neuron +Mm_AMB_N294 -5.678647159 -5.338758614 2 CL_0000598 pyramidal neuron +Mm_AMB_N298 2.304056527 -0.955396679 0 CL_0000108 cholinergic neuron +Mm_AMB_N299 4.243685128 -0.897657182 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N300 -11.98372233 2.03666899 0 CL_0000108 cholinergic neuron +Mm_AMB_N301 -12.45843184 2.31190798 0 CL_0000108 cholinergic neuron +Mm_AMB_N302 -9.507600902 2.306728098 0 CL_0000108 cholinergic neuron +Mm_AMB_N303 18.35538042 2.601289008 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N305 5.330193879 -0.563907411 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N306 7.247819306 2.973840449 3 CL_0005024 somatomotor neuron +Mm_AMB_N307 5.2116102 -0.339653757 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N309 -12.70538771 2.298842165 0 CL_0000108 cholinergic neuron +Mm_AMB_N310 7.24289644 2.161418173 3 CL_0005024 somatomotor neuron +Mm_AMB_N312 7.040603044 1.698847506 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N313 -8.692967532 3.037675116 0 CL_0000108 cholinergic neuron +Mm_AMB_N314 5.001196267 -1.096974399 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N315 4.198024633 -1.106303003 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N316 -7.064011691 -5.596567776 2 CL_0000598 pyramidal neuron +Mm_AMB_N317 -10.2009002 3.781424735 0 CL_0000108 cholinergic neuron +Mm_AMB_N318 -11.89463675 2.846759531 0 CL_0000108 cholinergic neuron +Mm_AMB_N319 -7.239272473 -5.364945557 2 CL_0000598 pyramidal neuron +Mm_AMB_N320 -5.444244383 -4.973301199 2 CL_0000598 pyramidal neuron +Mm_AMB_N322 -9.249926684 3.119786951 0 CL_0000108 cholinergic neuron +Mm_AMB_N323 3.350311639 -1.685912397 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N324 19.61211336 1.891798708 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N325 8.440383317 1.812841151 3 CL_0005024 somatomotor neuron +Mm_AMB_N326 -8.568745253 3.146341536 0 CL_0000108 cholinergic neuron +Mm_AMB_N327 -9.551443217 3.811699126 0 CL_0000108 cholinergic neuron +Mm_AMB_N328 19.46605051 1.851924632 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N329 -10.02038348 2.31873152 0 CL_0000108 cholinergic neuron +Mm_AMB_N330 -10.45902645 1.960747454 0 CL_0000108 cholinergic neuron +Mm_AMB_N331 -12.71356928 2.764385912 0 CL_0000108 cholinergic neuron +Mm_AMB_N335 -5.281249998 -5.843355444 2 CL_0000598 pyramidal neuron +Mm_AMB_N336 -7.079306481 -5.2955884 2 CL_0000598 pyramidal neuron +Mm_AMB_N337 -12.39381468 2.913970683 0 CL_0000108 cholinergic neuron +Mm_AMB_N338 -6.249489425 -5.464890745 2 CL_0000598 pyramidal neuron +Mm_AMB_N339 18.27492464 2.675321314 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N341 -5.774287341 -6.050954607 2 CL_0000598 pyramidal neuron +Mm_AMB_N342 19.3390249 1.748794291 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N343 7.66249788 3.940833304 3 CL_0005024 somatomotor neuron +Mm_AMB_N344 -9.966899989 3.935294363 0 CL_0000108 cholinergic neuron +Mm_AMB_N347 2.782020928 -1.691774633 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N348 5.214014413 -1.368584182 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N350 -5.711321471 -5.841650989 2 CL_0000598 pyramidal neuron +Mm_AMB_N351 19.28724802 1.612103674 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N352 -10.0263921 2.830829356 0 CL_0000108 cholinergic neuron +Mm_AMB_N353 -5.562049506 -6.39371064 2 CL_0000598 pyramidal neuron +Mm_AMB_N354 -8.690549491 2.866442892 0 CL_0000108 cholinergic neuron +Mm_AMB_N355 -6.847852347 -4.711193707 2 CL_0000598 pyramidal neuron +Mm_AMB_N356 -8.989562629 2.631283495 0 CL_0000108 cholinergic neuron +Mm_AMB_N357 -9.333207724 3.097544405 0 CL_0000108 cholinergic neuron +Mm_AMB_N358 -6.342018006 -6.199534204 2 CL_0000598 pyramidal neuron +Mm_AMB_N360 4.368296029 -2.086575296 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N361 19.06308305 2.313462946 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N362 -12.43498957 2.685776446 0 CL_0000108 cholinergic neuron +Mm_AMB_N365 -9.514747737 3.080645773 0 CL_0000108 cholinergic neuron +Mm_AMB_N366 7.515557649 3.713855002 3 CL_0005024 somatomotor neuron +Mm_AMB_N367 18.98013628 2.160935614 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N368 -5.670807002 -5.63641968 2 CL_0000598 pyramidal neuron +Mm_AMB_N369 2.958816411 -1.036733415 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N370 4.663360002 -0.127841261 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N371 4.308141591 -0.650887754 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N373 18.92074335 1.93529484 4 CL_3000003 sympathetic cholinergic neuron +Mm_AMB_N374 8.272935273 2.666578982 3 CL_0005024 somatomotor neuron +Mm_AMB_N375 7.988397958 3.29492924 3 CL_0005024 somatomotor neuron +Mm_AMB_N376 2.477417352 -0.903517034 0 CL_0000108 cholinergic neuron +Mm_AMB_N377 -6.089020846 -5.030414488 2 CL_0000598 pyramidal neuron +Mm_AMB_N378 7.682403924 2.369721625 3 CL_0005024 somatomotor neuron +Mm_AMB_N379 4.774490716 -1.706192281 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N380 4.733815553 -0.782675531 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N381 -10.14598191 2.805597517 0 CL_0000108 cholinergic neuron +Mm_AMB_N383 3.842284085 -0.850447681 1 CL_0008010 cranial somatomotor neuron +Mm_AMB_N384 5.213223817 -0.004524496 1 CL_0008010 cranial somatomotor neuron diff --git a/tests/test_de.py b/tests/test_de.py index 8a8bd186..23666784 100644 --- a/tests/test_de.py +++ b/tests/test_de.py @@ -33,7 +33,9 @@ def find_expected_files(labels, cluster_name, annotation, scope, method): found = 0 for label in labels: sanitized_label = label.replace(" ", "_") - expected_file = f"{cluster_name}--{annotation}--{str(sanitized_label)}--{scope}--{method}.tsv" + expected_file = ( + f"{cluster_name}--{annotation}--{sanitized_label}--{scope}--{method}.tsv" + ) assert os.path.exists(expected_file) found += 1 return found