Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions ingest/anndata_.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def generate_cluster_body(adata, clustering_name):
[cluster_cells, pd.DataFrame(adata.obsm[clustering_name])], axis=1
)
pd.DataFrame(cluster_body).to_csv(
f"{clustering_name}.cluster.anndata_segment.tsv",
AnnDataIngestor.set_output_filename(clustering_name),
sep="\t",
mode="a",
header=None,
Expand All @@ -94,9 +94,13 @@ def generate_cluster_body(adata, clustering_name):
@staticmethod
def files_to_delocalize(arguments):
# ToDo - check if names using obsm_keys need sanitization
cluster_file_names = [name + ".tsv" for name in arguments["obsm_keys"]]
cluster_file_names = [AnnDataIngestor.set_output_filename(name) for name in arguments["obsm_keys"]]
return cluster_file_names

@staticmethod
def set_output_filename(name):
return f"{name}.cluster.anndata_segment.tsv"

@staticmethod
def delocalize_cluster_files(file_path, study_file_id, files_to_delocalize):
""" Copy cluster files to study bucket
Expand Down
Binary file added tests/data/anndata/trimmed_compliant_pbmc3K.h5ad
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
NAME disease__time_since_onset disease__time_since_onset__unit organ_region organ_region__ontology_label donor disease__treated species species__ontology_label geographical_region geographical_region__ontology_label library_preparation_protocol library_preparation_protocol__ontology_label organ organ__ontology_label sex is_living organism_age__unit organism_age__unit_label ethnicity__ontology_label ethnicity organism_age disease disease__ontology_label cell_type cell_type__ontology_label donor_id biosample_id biosample_type preservation_method cell_type__custom
TYPE numeric group group group group group group group group group group group group group group group group group group group numeric group group group group group group group group group
BM01_16dpp_AAGCAGTGGTAT 12|2 UO_0000035 MBA:000000944 Folium-tuber vermis (VII) BM01 False|False NCBITaxon_9606 human GAZ_00003181 Boston EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European HANCESTRO_0005 31 MONDO_0005015|MONDO_0006849 diabetes|mastitis CL_0000066 epithelial cell BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh epithelial
BM01_16dpp_TAAGCAGTGGTA 1 UO_0000035 MBA:000000302|MBA:000000294|MBA:000000795 "Superior colliculus, sensory related|Superior colliculus, motor related|Periaqueductal gray" BM01 FALSE NCBITaxon_9606 Homo Sapiens GAZ_00003181 Boston EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year white HANCESTRO_0005 31 MONDO_0005709 common cold CL_0000066 epithelial cell BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh epithelial
BM01_16dpp_CTAAGCAGTGGT 24|2 UO_0000035 MBA:000000714|MBA:000000972 BM01 True|False NCBITaxon_9606 Homo sapiens GAZ_00003181 Boston EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year British HANCESTRO_0462 31 MONDO_0005015|MONDO_0005709 diabetes mellitus|common cold CL_0000066 epithelial cell BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh epithelial
BM01_16dpp_CGGTAAACCATT 36|3|1 UO_0000035 MBA:000001041 Paraflocculus BM01 True|False|False NCBITaxon_9606 Homo sapiens GAZ_00003181 Boston EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year HANCESTRO_0462 31 MONDO_0005015|MONDO_0006849|MONDO_0005709 diabetes|breast infection|common cold BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh sub-epithelial
BM01_16dpp_CCGAATTCACCG 0 UO_0000035 MBA:000000909|MBA:000000502 Entorhinal area|Subiculum BM01 FALSE NCBITaxon_9606 Homo sapiens GAZ_00003181 Boston EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year Caucasian HANCESTRO_0005 31 MONDO_0000001 disease or disorder BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh sub-epithelial
BM01_16dpp_AAGCAGTGGTAT 12|2 UO_0000035 MBA:000000944 Folium-tuber vermis (VII) BM01 False|False NCBITaxon_9606 human GAZ_00003181 Boston EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European HANCESTRO_0005 31 MONDO_0005015|MONDO_0006849 diabetes|mastitis CL_0000066 epithelial cell BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh epithelial
BM01_16dpp_TAAGCAGTGGTA 1 UO_0000035 MBA:000000302|MBA:000000294|MBA:000000795 "Superior colliculus, sensory related|Superior colliculus, motor related|Periaqueductal gray" BM01 FALSE NCBITaxon_9606 Homo Sapiens GAZ_00003181 Boston EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year white HANCESTRO_0005 31 MONDO_0005709 common cold CL_0000066 epithelial cell BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh epithelial
BM01_16dpp_CTAAGCAGTGGT 24|2 UO_0000035 MBA:000000714|MBA:000000972 BM01 True|False NCBITaxon_9606 Homo sapiens GAZ_00003181 Boston EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year British HANCESTRO_0462 31 MONDO_0005015|MONDO_0005709 diabetes mellitus|common cold CL_0000066 epithelial cell BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh epithelial
BM01_16dpp_CGGTAAACCATT 36|3|1 UO_0000035 MBA:000001041 Paraflocculus BM01 True|False|False NCBITaxon_9606 Homo sapiens GAZ_00003181 Boston EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year HANCESTRO_0462 31 MONDO_0005015|MONDO_0006849|MONDO_0005709 diabetes|breast infection|common cold BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh sub-epithelial
BM01_16dpp_CCGAATTCACCG 0 UO_0000035 MBA:000000909|MBA:000000502 Entorhinal area|Subiculum BM01 FALSE NCBITaxon_9606 Homo sapiens GAZ_00003181 Boston EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year Caucasian HANCESTRO_0005 31 MONDO_0000001 disease or disorder BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh sub-epithelial
84 changes: 68 additions & 16 deletions tests/test_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,37 @@

import unittest
import sys
import os
from unittest.mock import patch

sys.path.append("../ingest")
from anndata_ import AnnDataIngestor

from ingest_files import IngestFiles

class TestAnnDataIngestor(unittest.TestCase):

@staticmethod
def setup_class(self):
filepath_valid = "../tests/data/anndata/trimmed_compliant_pbmc3K.h5ad"
filepath_invalid = "../tests/data/anndata/bad.h5"
self.study_id = "addedfeed000000000000000"
self.study_file_id = "dec0dedfeed0000000000000"
self.valid_args = [filepath_valid, self.study_id, self.study_file_id]
self.invalid_args = [filepath_invalid, self.study_id, self.study_file_id]
self.cluster_name = 'X_tsne'
self.valid_kwargs = {'obsm_keys': [self.cluster_name]}
self.anndata_ingest = AnnDataIngestor(*self.valid_args, **self.valid_kwargs)
self.output_filename = f"{self.cluster_name}.cluster.anndata_segment.tsv"

def teardown_method(self, _):
if os.path.isfile(self.output_filename):
os.remove(self.output_filename)

def test_minimal_valid_anndata(self):
good_input = AnnDataIngestor(
"../tests/data/anndata/test.h5ad",
"addedfeed000000000000000",
"dec0dedfeed0000000000000",
)
self.assertTrue(
good_input.validate(), "expect known good file to open with scanpy"
)
self.assertTrue(self.anndata_ingest.validate(), "expect known good file to open with scanpy")

def test_truncated_anndata(self):
truncated_input = AnnDataIngestor(
"../tests/data/anndata/bad.h5",
"addedfeed000000000000000",
"dec0dedfeed0000000000000",
)
truncated_input = AnnDataIngestor(*self.invalid_args)
# passing obtain_data function to assertRaises using lambda
# otherwise truncated_input.obtain_data() is evaluated and triggers
# an exception before assertRaises gets called
Expand All @@ -39,8 +48,8 @@ def test_truncated_anndata(self):
def test_input_bad_suffix(self):
bad_input = AnnDataIngestor(
"../tests/data/anndata/bad.foo",
"addedfeed000000000000000",
"dec0dedfeed0000000000000",
self.study_id,
self.study_file_id,
)
# passing obtain_data function to assertRaises using lambda
# otherwise bad_input.obtain_data() is evaluated and triggers
Expand All @@ -52,3 +61,46 @@ def test_input_bad_suffix(self):
)
self.assertFalse(bad_input.validate())

def test_set_output_filename(self):
cluster_name = "X_Umap"
self.assertEqual(
AnnDataIngestor.set_output_filename(cluster_name),
"X_Umap.cluster.anndata_segment.tsv"
)

def test_generate_cluster_header(self):
self.anndata_ingest.generate_cluster_header(self.anndata_ingest.obtain_adata(), self.cluster_name)
with open(self.output_filename) as header_file:
header = header_file.readline().split("\t")
self.assertEqual(['NAME', 'X', "Y\n"], header, "did not find expected headers")

def test_generate_cluster_type_declaration(self):
self.anndata_ingest.generate_cluster_type_declaration(self.anndata_ingest.obtain_adata(), self.cluster_name)
with open(self.output_filename) as header_file:
header = header_file.readline().split("\t")
self.assertEqual(['TYPE', 'numeric', "numeric\n"], header, "did not find expected headers")

def test_generate_cluster_body(self):
self.anndata_ingest.generate_cluster_body(self.anndata_ingest.obtain_adata(), self.cluster_name)
with open(self.output_filename) as cluster_body:
line = cluster_body.readline().split("\t")
expected_line = ['AAACATACAACCAC-1', '16.009954', "-21.073845\n"]
self.assertEqual(expected_line, line, 'did not get expected coordinates from cluster body')

def test_get_files_to_delocalize(self):
files = AnnDataIngestor.files_to_delocalize(self.valid_kwargs)
expected_files = [self.output_filename]
self.assertEqual(expected_files, files)

def test_delocalize_files(self):
# just create header, no reason to run full extract
self.anndata_ingest.generate_cluster_header(self.anndata_ingest.obtain_adata(), self.cluster_name)
with patch('ingest_files.IngestFiles.delocalize_file'):
AnnDataIngestor.delocalize_file(
"gs://fake_bucket", self.study_id, AnnDataIngestor.files_to_delocalize(self.valid_kwargs)
)
self.assertEqual(
IngestFiles.delocalize_file.call_count,
1,
"expected 1 call to delocalize output files",
)
24 changes: 22 additions & 2 deletions tests/test_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,10 @@
validate_arguments,
IngestPipeline,
exit_pipeline,
run_ingest,
run_ingest
)
from expression_files.expression_files import GeneExpression


def mock_load(self, *args, **kwargs):
"""Enables overwriting normal function with this placeholder.
Returning the arguments enables tests to verify that the code invokes
Expand Down Expand Up @@ -675,6 +674,27 @@ def test_subsample_no_cell_intersection(self, mock_load_subsample):
exit_pipeline(ingest, status, status_cell_metadata, arguments)
self.assertEqual(cm.exception.code, 1)

def test_extract_cluster_file_from_anndata(self):
args = [
"--study-id",
"5d276a50421aa9117c982845",
"--study-file-id",
"5dd5ae25421aa910a723a337",
"ingest_anndata",
"--ingest-anndata",
"--extract-cluster",
"--anndata-file",
"../tests/data/anndata/trimmed_compliant_pbmc3K.h5ad",
"--obsm-keys",
"['X_tsne']"

]
ingest, arguments, status, status_cell_metadata = self.execute_ingest(args)
self.assertEqual(len(status), 1)
self.assertEqual(status[0], 0)
filename = 'X_tsne.cluster.anndata_segment.tsv'
self.assertTrue(os.path.isfile(filename))


if __name__ == "__main__":
unittest.main()