From 0bae9e34e80ae16265860ba36f1ae1ae59681faa Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Sat, 31 Aug 2019 23:40:37 +1000 Subject: [PATCH 01/26] restructuring test data directory --- .../tests/data/{ => empiric}/empiric.xlsx | Bin .../data/{ => empiric}/empiric_expected.csv | 0 .../data/{enrich1.tsv => enrich/enrich.tsv} | 0 .../data/{enrich1.xlsx => enrich/enrich.xlsx} | Bin .../enrich_1based.tsv} | 0 .../enrich_expected.csv} | 0 .../enrich_expected_offset.csv} | 0 .../enrich_no_seqid.tsv} | 0 .../tests/data/{ => enrich2}/dummy.h5 | 0 .../tests/data/{ => enrich2}/enrich2.tsv | 0 .../tests/data/{ => fasta}/bad_format.fasta | 0 .../data/{ => fasta}/invalid_chars.fasta | 0 mavedbconvert/tests/data/{ => fasta}/lower.fa | 0 .../tests/data/{ => fasta}/spaces.fasta | 0 .../tests/data/{ => fasta}/two.fasta | 0 mavedbconvert/tests/data/{ => fasta}/wt.fasta | 0 .../tests/data/{ => fasta}/wt.fasta.bz2 | Bin .../tests/data/{ => fasta}/wt.fasta.gz | Bin mavedbconvert/tests/test_base.py | 19 +++++----- mavedbconvert/tests/test_enrich.py | 28 +++++++-------- mavedbconvert/tests/test_fasta.py | 2 +- mavedbconvert/tests/test_parsers.py | 33 +++++++++--------- 22 files changed, 40 insertions(+), 42 deletions(-) rename mavedbconvert/tests/data/{ => empiric}/empiric.xlsx (100%) rename mavedbconvert/tests/data/{ => empiric}/empiric_expected.csv (100%) rename mavedbconvert/tests/data/{enrich1.tsv => enrich/enrich.tsv} (100%) rename mavedbconvert/tests/data/{enrich1.xlsx => enrich/enrich.xlsx} (100%) rename mavedbconvert/tests/data/{enrich1_1based.tsv => enrich/enrich_1based.tsv} (100%) rename mavedbconvert/tests/data/{enrich1_expected.csv => enrich/enrich_expected.csv} (100%) rename mavedbconvert/tests/data/{enrich1_expected_offset.csv => enrich/enrich_expected_offset.csv} (100%) rename mavedbconvert/tests/data/{enrich1_no_seqid.tsv => enrich/enrich_no_seqid.tsv} (100%) rename mavedbconvert/tests/data/{ => enrich2}/dummy.h5 (100%) rename mavedbconvert/tests/data/{ => enrich2}/enrich2.tsv (100%) rename mavedbconvert/tests/data/{ => fasta}/bad_format.fasta (100%) rename mavedbconvert/tests/data/{ => fasta}/invalid_chars.fasta (100%) rename mavedbconvert/tests/data/{ => fasta}/lower.fa (100%) rename mavedbconvert/tests/data/{ => fasta}/spaces.fasta (100%) rename mavedbconvert/tests/data/{ => fasta}/two.fasta (100%) rename mavedbconvert/tests/data/{ => fasta}/wt.fasta (100%) rename mavedbconvert/tests/data/{ => fasta}/wt.fasta.bz2 (100%) rename mavedbconvert/tests/data/{ => fasta}/wt.fasta.gz (100%) diff --git a/mavedbconvert/tests/data/empiric.xlsx b/mavedbconvert/tests/data/empiric/empiric.xlsx similarity index 100% rename from mavedbconvert/tests/data/empiric.xlsx rename to mavedbconvert/tests/data/empiric/empiric.xlsx diff --git a/mavedbconvert/tests/data/empiric_expected.csv b/mavedbconvert/tests/data/empiric/empiric_expected.csv similarity index 100% rename from mavedbconvert/tests/data/empiric_expected.csv rename to mavedbconvert/tests/data/empiric/empiric_expected.csv diff --git a/mavedbconvert/tests/data/enrich1.tsv b/mavedbconvert/tests/data/enrich/enrich.tsv similarity index 100% rename from mavedbconvert/tests/data/enrich1.tsv rename to mavedbconvert/tests/data/enrich/enrich.tsv diff --git a/mavedbconvert/tests/data/enrich1.xlsx b/mavedbconvert/tests/data/enrich/enrich.xlsx similarity index 100% rename from mavedbconvert/tests/data/enrich1.xlsx rename to mavedbconvert/tests/data/enrich/enrich.xlsx diff --git a/mavedbconvert/tests/data/enrich1_1based.tsv b/mavedbconvert/tests/data/enrich/enrich_1based.tsv similarity index 100% rename from mavedbconvert/tests/data/enrich1_1based.tsv rename to mavedbconvert/tests/data/enrich/enrich_1based.tsv diff --git a/mavedbconvert/tests/data/enrich1_expected.csv b/mavedbconvert/tests/data/enrich/enrich_expected.csv similarity index 100% rename from mavedbconvert/tests/data/enrich1_expected.csv rename to mavedbconvert/tests/data/enrich/enrich_expected.csv diff --git a/mavedbconvert/tests/data/enrich1_expected_offset.csv b/mavedbconvert/tests/data/enrich/enrich_expected_offset.csv similarity index 100% rename from mavedbconvert/tests/data/enrich1_expected_offset.csv rename to mavedbconvert/tests/data/enrich/enrich_expected_offset.csv diff --git a/mavedbconvert/tests/data/enrich1_no_seqid.tsv b/mavedbconvert/tests/data/enrich/enrich_no_seqid.tsv similarity index 100% rename from mavedbconvert/tests/data/enrich1_no_seqid.tsv rename to mavedbconvert/tests/data/enrich/enrich_no_seqid.tsv diff --git a/mavedbconvert/tests/data/dummy.h5 b/mavedbconvert/tests/data/enrich2/dummy.h5 similarity index 100% rename from mavedbconvert/tests/data/dummy.h5 rename to mavedbconvert/tests/data/enrich2/dummy.h5 diff --git a/mavedbconvert/tests/data/enrich2.tsv b/mavedbconvert/tests/data/enrich2/enrich2.tsv similarity index 100% rename from mavedbconvert/tests/data/enrich2.tsv rename to mavedbconvert/tests/data/enrich2/enrich2.tsv diff --git a/mavedbconvert/tests/data/bad_format.fasta b/mavedbconvert/tests/data/fasta/bad_format.fasta similarity index 100% rename from mavedbconvert/tests/data/bad_format.fasta rename to mavedbconvert/tests/data/fasta/bad_format.fasta diff --git a/mavedbconvert/tests/data/invalid_chars.fasta b/mavedbconvert/tests/data/fasta/invalid_chars.fasta similarity index 100% rename from mavedbconvert/tests/data/invalid_chars.fasta rename to mavedbconvert/tests/data/fasta/invalid_chars.fasta diff --git a/mavedbconvert/tests/data/lower.fa b/mavedbconvert/tests/data/fasta/lower.fa similarity index 100% rename from mavedbconvert/tests/data/lower.fa rename to mavedbconvert/tests/data/fasta/lower.fa diff --git a/mavedbconvert/tests/data/spaces.fasta b/mavedbconvert/tests/data/fasta/spaces.fasta similarity index 100% rename from mavedbconvert/tests/data/spaces.fasta rename to mavedbconvert/tests/data/fasta/spaces.fasta diff --git a/mavedbconvert/tests/data/two.fasta b/mavedbconvert/tests/data/fasta/two.fasta similarity index 100% rename from mavedbconvert/tests/data/two.fasta rename to mavedbconvert/tests/data/fasta/two.fasta diff --git a/mavedbconvert/tests/data/wt.fasta b/mavedbconvert/tests/data/fasta/wt.fasta similarity index 100% rename from mavedbconvert/tests/data/wt.fasta rename to mavedbconvert/tests/data/fasta/wt.fasta diff --git a/mavedbconvert/tests/data/wt.fasta.bz2 b/mavedbconvert/tests/data/fasta/wt.fasta.bz2 similarity index 100% rename from mavedbconvert/tests/data/wt.fasta.bz2 rename to mavedbconvert/tests/data/fasta/wt.fasta.bz2 diff --git a/mavedbconvert/tests/data/wt.fasta.gz b/mavedbconvert/tests/data/fasta/wt.fasta.gz similarity index 100% rename from mavedbconvert/tests/data/wt.fasta.gz rename to mavedbconvert/tests/data/fasta/wt.fasta.gz diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py index 352482b..3f06812 100644 --- a/mavedbconvert/tests/test_base.py +++ b/mavedbconvert/tests/test_base.py @@ -6,8 +6,7 @@ from . import ProgramTestCase -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.normpath(BASE_DIR + "/data/") +TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) class TestBaseProgram(ProgramTestCase): @@ -18,9 +17,9 @@ class TestBaseProgram(ProgramTestCase): def setUp(self): super().setUp() - self.src = os.path.join(DATA_DIR, "enrich1.tsv") - self.src_with_spaces = os.path.join(DATA_DIR, "enrich 1.tsv") - self.h5_src = os.path.join(DATA_DIR, "dummy.h5") + self.src = os.path.join(TESTS_DIR, "data", "enrich", "enrich.tsv") + self.src_with_spaces = os.path.join(TESTS_DIR, "data", "enrich", "e nrich.tsv") + self.h5_src = os.path.join(TESTS_DIR, "data", "enrich2", "dummy.h5") def tearDown(self): for path in self.bin: @@ -64,7 +63,7 @@ def test_checks_write_permission(self, patch): def test_splits_src_into_filename_and_ext(self): p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA") - self.assertEqual(p.src_filename, "enrich1") + self.assertEqual(p.src_filename, "enrich") self.assertEqual(p.ext, ".tsv") def test_lower_cases_ext(self): @@ -77,11 +76,11 @@ def test_value_error_coding_offset_not_multiple_of_three(self): def test_dst_filename_replaces_whitespace_with_underscores(self): p = base.BaseProgram(src=self.src_with_spaces, wt_sequence="AAA") - self.assertEqual(p.dst_filename, "mavedb_enrich_1.csv") + self.assertEqual(p.dst_filename, "mavedb_e_nrich.csv") def test_output_file_joins_dst_and_dst_filename(self): p = base.BaseProgram(src=self.src, wt_sequence="AAA") - self.assertEqual(p.output_file, os.path.join(DATA_DIR, "mavedb_enrich1.csv")) + self.assertEqual(p.output_file, os.path.join(TESTS_DIR, "data", "enrich", "mavedb_enrich.csv")) def test_output_directory_expands_user_and_norms_path(self): p = base.BaseProgram(src=self.src, wt_sequence="AAA") @@ -126,7 +125,7 @@ def test_wt_setter_value_error_not_valid_wt_sequence(self): class TestBaseProgramValidateAgainstWTSeq(ProgramTestCase): def setUp(self): super().setUp() - self.src = os.path.join(DATA_DIR, "enrich1.tsv") + self.src = os.path.join(TESTS_DIR, "data", "enrich", "enrich.tsv") self.base = base.BaseProgram(src=self.src, wt_sequence="ATG", one_based=True) def test_error_not_a_dna_sub(self): @@ -177,7 +176,7 @@ def test_index_error_index_extends_beyond_indexable_wt_seq(self): class TestBaseProgramValidateAgainstProteinSeq(ProgramTestCase): def setUp(self): super().setUp() - self.src = os.path.join(DATA_DIR, "enrich1.tsv") + self.src = os.path.join(TESTS_DIR, "data", "enrich", "enrich.tsv") self.base = base.BaseProgram(src=self.src, wt_sequence="ATGAAA", one_based=True) def test_error_not_a_protein_sub(self): diff --git a/mavedbconvert/tests/test_enrich.py b/mavedbconvert/tests/test_enrich.py index b30e3b0..92800f3 100644 --- a/mavedbconvert/tests/test_enrich.py +++ b/mavedbconvert/tests/test_enrich.py @@ -35,7 +35,7 @@ def test_ok_is_mult_of_three(self): class TestEnrichParseRow(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich1.tsv") + self.path = os.path.join(DATA_DIR, "enrich.tsv") self.enrich = enrich.Enrich( src=self.path, wt_sequence=WT, @@ -128,7 +128,7 @@ def test_applies_offset_divided_by_3(self): class TestEnrichParseInput(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich1.tsv") + self.path = os.path.join(DATA_DIR, "enrich.tsv") self.enrich = enrich.Enrich( src=self.path, wt_sequence=WT, @@ -186,13 +186,13 @@ def test_removes_non_numeric(self): class TestEnrichLoadInput(ProgramTestCase): def setUp(self): - self.path = os.path.join(DATA_DIR, "enrich1.tsv") - self.path_1based = os.path.join(DATA_DIR, "enrich1_1based.tsv") + self.path = os.path.join(DATA_DIR, "enrich.tsv") + self.path_1based = os.path.join(DATA_DIR, "enrich_1based.tsv") self.path_csv = os.path.join(DATA_DIR, "enrich1.csv") - self.expected = os.path.join(DATA_DIR, "enrich1_expected.csv") - self.expected_offset = os.path.join(DATA_DIR, "enrich1_expected_offset.csv") - self.excel_path = os.path.join(DATA_DIR, "enrich1.xlsx") - self.no_seq_id = os.path.join(DATA_DIR, "enrich1_no_seqid.tsv") + self.expected = os.path.join(DATA_DIR, "enrich_expected.csv") + self.expected_offset = os.path.join(DATA_DIR, "enrich_expected_offset.csv") + self.excel_path = os.path.join(DATA_DIR, "enrich.xlsx") + self.no_seq_id = os.path.join(DATA_DIR, "enrich_no_seqid.tsv") self.tmp_path = os.path.join(DATA_DIR, "tmp.xlsx") self.bin = [ @@ -279,13 +279,13 @@ def test_table_and_excel_load_same_dataframe(self): class TestEnrichIntegration(ProgramTestCase): def setUp(self): - self.path = os.path.join(DATA_DIR, "enrich1.tsv") - self.path_1based = os.path.join(DATA_DIR, "enrich1_1based.tsv") - self.excel_path = os.path.join(DATA_DIR, "enrich1.xlsx") - self.no_seq_id = os.path.join(DATA_DIR, "enrich1_no_seqid.tsv") + self.path = os.path.join(DATA_DIR, "enrich.tsv") + self.path_1based = os.path.join(DATA_DIR, "enrich_1based.tsv") + self.excel_path = os.path.join(DATA_DIR, "enrich.xlsx") + self.no_seq_id = os.path.join(DATA_DIR, "enrich_no_seqid.tsv") - self.expected = os.path.join(DATA_DIR, "enrich1_expected.csv") - self.expected_offset = os.path.join(DATA_DIR, "enrich1_expected_offset.csv") + self.expected = os.path.join(DATA_DIR, "enrich_expected.csv") + self.expected_offset = os.path.join(DATA_DIR, "enrich_expected_offset.csv") self.bin = [ os.path.join(DATA_DIR, "mavedb_enrich1.csv"), diff --git a/mavedbconvert/tests/test_fasta.py b/mavedbconvert/tests/test_fasta.py index 923e6ad..4aaba22 100644 --- a/mavedbconvert/tests/test_fasta.py +++ b/mavedbconvert/tests/test_fasta.py @@ -37,7 +37,7 @@ def test_infers_none(self): def test_ioerror_invalid_ext(self): with self.assertRaises(IOError): - split_fasta_path(os.path.join(DATA_DIR, "enrich1.tsv")) + split_fasta_path(os.path.join(DATA_DIR, "enrich.tsv")) class TestFastaReader(TestCase): diff --git a/mavedbconvert/tests/test_parsers.py b/mavedbconvert/tests/test_parsers.py index db62ea9..3a97891 100644 --- a/mavedbconvert/tests/test_parsers.py +++ b/mavedbconvert/tests/test_parsers.py @@ -7,8 +7,7 @@ from . import ProgramTestCase -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.normpath(BASE_DIR + "/data/") +TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) class TestParseBoolean(TestCase): @@ -49,14 +48,14 @@ def test_returns_string_stripped_of_ws(self): class TestParseSrc(TestCase): @mock.patch( "mavedbconvert.parsers.parse_string", - return_value=os.path.join(DATA_DIR, "enrich2.tsv"), + return_value=os.path.join(TESTS_DIR, "data", "enrich2", "enrich2.tsv"), ) def test_calls_parse_string(self, patch): - parsers.parse_src(os.path.join(DATA_DIR, "enrich2.tsv")) + parsers.parse_src(os.path.join(TESTS_DIR, "data", "enrich2", "enrich2.tsv")) patch.assert_called() def test_ok_file_exists(self): - path = os.path.join(DATA_DIR, "enrich2.tsv") + path = os.path.join(TESTS_DIR, "data", "enrich2", "enrich2.tsv") self.assertEqual(path, parsers.parse_src(path)) def test_error_no_value(self): @@ -65,23 +64,23 @@ def test_error_no_value(self): parsers.parse_src(v) def test_error_file_not_found(self): - path = os.path.join(DATA_DIR, "missing_file.tsv") + path = os.path.join(TESTS_DIR, "data", "enrich2", "missing_file.tsv") with self.assertRaises(FileNotFoundError): parsers.parse_src(path) def test_error_file_is_a_dir(self): with self.assertRaises(IsADirectoryError): - parsers.parse_src(DATA_DIR) + parsers.parse_src(os.path.join(TESTS_DIR, "data")) class TestParseDst(ProgramTestCase): - @mock.patch("mavedbconvert.parsers.parse_string", return_value=DATA_DIR) + @mock.patch("mavedbconvert.parsers.parse_string", return_value=os.path.join(TESTS_DIR, "data")) def test_calls_parse_string(self, patch): - parsers.parse_dst(DATA_DIR) + parsers.parse_dst(os.path.join(TESTS_DIR, "data")) patch.assert_called() def test_ok_dst_exists(self): - path = os.path.join(DATA_DIR) + path = os.path.join(os.path.join(TESTS_DIR, "data")) self.assertEqual(path, parsers.parse_dst(path)) def test_returns_none_no_value(self): @@ -89,11 +88,11 @@ def test_returns_none_no_value(self): self.assertIsNone(parsers.parse_dst(v)) def test_dst_path_is_normalised(self): - path = BASE_DIR + "//data" - self.assertEqual(parsers.parse_dst(path), DATA_DIR) + path = TESTS_DIR + "//data" + self.assertEqual(parsers.parse_dst(path), os.path.join(TESTS_DIR, "data")) def test_makes_dst_directory_tree(self): - path = os.path.join(DATA_DIR, "subdir") + path = os.path.join(TESTS_DIR, "data", "subdir") parsers.parse_dst(path) self.assertTrue(os.path.isdir(path)) self.bin.append(path) @@ -125,7 +124,7 @@ def test_sets_correct_program_from_dict(self): class TestParseWildTypeSequence(TestCase): def test_can_read_from_fasta(self): - path = os.path.join(DATA_DIR, "lower.fa") + path = os.path.join(TESTS_DIR, "fasta", "lower.fa") wtseq = parsers.parse_wt_sequence(path, program="enrich2", non_coding=True) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" @@ -258,13 +257,13 @@ def mock_args( if program is None: program = "enrich2" if src is None: - src = os.path.join(DATA_DIR, "enrich2.tsv") + src = os.path.join(TESTS_DIR, "enrich2", "enrich2.tsv") return { "enrich": True if program == "enrich" else False, "enrich2": True if program == "enrich2" else False, "empiric": True if program == "empiric" else False, - "": os.path.join(DATA_DIR, src), - "--dst": os.path.join(DATA_DIR, dst) if dst else dst, + "": os.path.join(TESTS_DIR, "data", program, src), + "--dst": os.path.join(TESTS_DIR, "data", program, dst) if dst else dst, "--score-column": score_column, "--hgvs-column": hgvs_column, "--skip-header": skip_header, From b933cf8a1af5eb7f8f996103f2ff6bb58372fea9 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Sun, 1 Sep 2019 19:29:57 +1000 Subject: [PATCH 02/26] removed the nonfunctional install_requires --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 98aca6d..d4a98cb 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,7 @@ "A command line tool for converting alternate " "file formats into a MaveDB compliant format." ), - install_requires=open("requirements/install.txt", "rt").read().split("\n"), + # TODO: this fails to recognize hgvs-patterns from github + # install_requires=open("requirements/install.txt", "rt").read().split("\n"), entry_points={"console_scripts": ["mavedb-convert=mavedbconvert.main:main"]}, ) From 84d2cabdff3cbba33c05b5864f79c0fab4b43ab9 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Sun, 1 Sep 2019 20:31:51 +1000 Subject: [PATCH 03/26] updated all file paths to new structure --- mavedbconvert/tests/test_base.py | 24 +++++----- mavedbconvert/tests/test_empiric.py | 23 +++++----- mavedbconvert/tests/test_enrich.py | 47 ++++++++++---------- mavedbconvert/tests/test_enrich2.py | 69 ++++++++++++++--------------- mavedbconvert/tests/test_fasta.py | 34 +++++++------- mavedbconvert/tests/test_parsers.py | 32 ++++++------- 6 files changed, 113 insertions(+), 116 deletions(-) diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py index 3f06812..60b592d 100644 --- a/mavedbconvert/tests/test_base.py +++ b/mavedbconvert/tests/test_base.py @@ -6,7 +6,7 @@ from . import ProgramTestCase -TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") class TestBaseProgram(ProgramTestCase): @@ -17,9 +17,9 @@ class TestBaseProgram(ProgramTestCase): def setUp(self): super().setUp() - self.src = os.path.join(TESTS_DIR, "data", "enrich", "enrich.tsv") - self.src_with_spaces = os.path.join(TESTS_DIR, "data", "enrich", "e nrich.tsv") - self.h5_src = os.path.join(TESTS_DIR, "data", "enrich2", "dummy.h5") + self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") + self.src_with_spaces = os.path.join(TEST_DATA_DIR, "enrich", "enrich .tsv") + self.h5_src = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") def tearDown(self): for path in self.bin: @@ -30,7 +30,7 @@ def tearDown(self): def test_sets_directory_as_input_directory_if_dst_is_none(self): p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA") - self.assertEqual(p.dst, DATA_DIR) + self.assertEqual(p.dst, os.path.join(TEST_DATA_DIR, "enrich")) def test_error_file_not_readable(self): with self.assertRaises(IOError): @@ -42,11 +42,11 @@ def test_expands_user_and_norms_dst(self): def test_dir_with_input_fname_appended_when_h5_and_dst_is_none(self): p = base.BaseProgram(src=self.h5_src, dst=None, wt_sequence="AAA") - self.assertEqual(p.dst, os.path.join(DATA_DIR, "dummy")) - self.bin.append(os.path.join(DATA_DIR, "dummy")) + self.assertEqual(p.dst, os.path.join(TEST_DATA_DIR, "enrich2", "dummy")) + self.bin.append(os.path.join(TEST_DATA_DIR, "enrich2", "dummy")) def test_creates_directory_tree_if_it_doesnt_exist(self): - output = os.path.join(DATA_DIR, "outer_dir/inner_dir/") + output = os.path.join(TEST_DATA_DIR, "enrich2", "outer_dir", "inner_dir") base.BaseProgram(src=self.h5_src, dst=output, wt_sequence="AAA") self.assertTrue(os.path.isdir(output)) self.bin.append(output) @@ -76,11 +76,11 @@ def test_value_error_coding_offset_not_multiple_of_three(self): def test_dst_filename_replaces_whitespace_with_underscores(self): p = base.BaseProgram(src=self.src_with_spaces, wt_sequence="AAA") - self.assertEqual(p.dst_filename, "mavedb_e_nrich.csv") + self.assertEqual(p.dst_filename, "mavedb_enrich_.csv") def test_output_file_joins_dst_and_dst_filename(self): p = base.BaseProgram(src=self.src, wt_sequence="AAA") - self.assertEqual(p.output_file, os.path.join(TESTS_DIR, "data", "enrich", "mavedb_enrich.csv")) + self.assertEqual(p.output_file, os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv")) def test_output_directory_expands_user_and_norms_path(self): p = base.BaseProgram(src=self.src, wt_sequence="AAA") @@ -125,7 +125,7 @@ def test_wt_setter_value_error_not_valid_wt_sequence(self): class TestBaseProgramValidateAgainstWTSeq(ProgramTestCase): def setUp(self): super().setUp() - self.src = os.path.join(TESTS_DIR, "data", "enrich", "enrich.tsv") + self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") self.base = base.BaseProgram(src=self.src, wt_sequence="ATG", one_based=True) def test_error_not_a_dna_sub(self): @@ -176,7 +176,7 @@ def test_index_error_index_extends_beyond_indexable_wt_seq(self): class TestBaseProgramValidateAgainstProteinSeq(ProgramTestCase): def setUp(self): super().setUp() - self.src = os.path.join(TESTS_DIR, "data", "enrich", "enrich.tsv") + self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") self.base = base.BaseProgram(src=self.src, wt_sequence="ATGAAA", one_based=True) def test_error_not_a_protein_sub(self): diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py index ec8b9b7..2e95225 100644 --- a/mavedbconvert/tests/test_empiric.py +++ b/mavedbconvert/tests/test_empiric.py @@ -10,14 +10,13 @@ from . import ProgramTestCase -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.normpath(BASE_DIR + "/data/") +TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") class TestEmpiricInit(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich2.tsv") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") def test_error_offset_not_mult_of_three(self): with self.assertRaises(ValueError): @@ -70,7 +69,7 @@ def test_adds_codon_pos_multiplied_by_3_to_position(self): class TestEmpiric(ProgramTestCase): def setUp(self): super().setUp() - self.input = os.path.join(DATA_DIR, "empiric.xlsx") + self.input = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx") self.empiric = empiric.Empiric( src=self.input, wt_sequence="AAA", one_based=False ) @@ -180,7 +179,7 @@ def test_correctly_infers_hgvs_nt_positions_when_one_based(self): class TestEmpiricValidateColumns(TestCase): def setUp(self): super().setUp() - self.input = os.path.join(DATA_DIR, "empiric.xlsx") + self.input = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx") self.empiric = empiric.Empiric( src=self.input, wt_sequence="AAA", one_based=False ) @@ -219,7 +218,7 @@ def test_sets_aa_column(self): class TestEmpiricParseInput(ProgramTestCase): def setUp(self): super().setUp() - self.input = os.path.join(DATA_DIR, "empiric.xlsx") + self.input = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx") self.empiric = empiric.Empiric( src=self.input, wt_sequence="AAA", @@ -356,10 +355,10 @@ def test_keeps_int_type_as_int(self): class TestEmpiricLoadInput(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "empiric.xlsx") - self.tmp_path = os.path.join(DATA_DIR, "tmp.csv") - self.tmp_path_tsv = os.path.join(DATA_DIR, "tmp.tsv") - self.tmp_excel_path = os.path.join(DATA_DIR, "tmp.xlsx") + self.path = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx") + self.tmp_path = os.path.join(TEST_DATA_DIR, "empiric", "tmp.csv") + self.tmp_path_tsv = os.path.join(TEST_DATA_DIR, "empiric", "tmp.tsv") + self.tmp_excel_path = os.path.join(TEST_DATA_DIR, "empiric", "tmp.xlsx") self.bin.append(self.tmp_path) self.bin.append(self.tmp_path_tsv) @@ -475,8 +474,8 @@ def test_applies_offset_to_position_column(self): class TestEmpiricConvert(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "empiric.xlsx") - self.expected = os.path.join(DATA_DIR, "empiric_expected.csv") + self.path = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx") + self.expected = os.path.join(TEST_DATA_DIR, "empiric", "empiric_expected.csv") self.empiric = empiric.Empiric( src=self.path, wt_sequence="TTTTCTTATTGT", diff --git a/mavedbconvert/tests/test_enrich.py b/mavedbconvert/tests/test_enrich.py index 92800f3..a3f9867 100644 --- a/mavedbconvert/tests/test_enrich.py +++ b/mavedbconvert/tests/test_enrich.py @@ -9,8 +9,7 @@ from . import ProgramTestCase -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.normpath(BASE_DIR + "/data/") +TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") WT = ( @@ -22,7 +21,7 @@ class TestEnrichInit(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich2.tsv") + self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich2.tsv") def test_error_offset_not_mult_of_three(self): with self.assertRaises(ValueError): @@ -35,7 +34,7 @@ def test_ok_is_mult_of_three(self): class TestEnrichParseRow(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich.tsv") + self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") self.enrich = enrich.Enrich( src=self.path, wt_sequence=WT, @@ -128,7 +127,7 @@ def test_applies_offset_divided_by_3(self): class TestEnrichParseInput(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich.tsv") + self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") self.enrich = enrich.Enrich( src=self.path, wt_sequence=WT, @@ -186,19 +185,19 @@ def test_removes_non_numeric(self): class TestEnrichLoadInput(ProgramTestCase): def setUp(self): - self.path = os.path.join(DATA_DIR, "enrich.tsv") - self.path_1based = os.path.join(DATA_DIR, "enrich_1based.tsv") - self.path_csv = os.path.join(DATA_DIR, "enrich1.csv") - self.expected = os.path.join(DATA_DIR, "enrich_expected.csv") - self.expected_offset = os.path.join(DATA_DIR, "enrich_expected_offset.csv") - self.excel_path = os.path.join(DATA_DIR, "enrich.xlsx") - self.no_seq_id = os.path.join(DATA_DIR, "enrich_no_seqid.tsv") - self.tmp_path = os.path.join(DATA_DIR, "tmp.xlsx") + self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") + self.path_1based = os.path.join(TEST_DATA_DIR, "enrich", "enrich_1based.tsv") + self.path_csv = os.path.join(TEST_DATA_DIR, "enrich", "enrich1.csv") + self.expected = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected.csv") + self.expected_offset = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv") + self.excel_path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.xlsx") + self.no_seq_id = os.path.join(TEST_DATA_DIR, "enrich", "enrich_no_seqid.tsv") + self.tmp_path = os.path.join(TEST_DATA_DIR, "enrich", "tmp.xlsx") self.bin = [ - os.path.join(DATA_DIR, "mavedb_enrich1.csv"), - os.path.join(DATA_DIR, "mavedb_enrich1_1based.csv"), - os.path.join(DATA_DIR, self.path_csv), + os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich1.csv"), + os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich1_1based.csv"), + os.path.join(TEST_DATA_DIR, "enrich", self.path_csv), ] def test_error_seq_id_not_in_columns(self): @@ -279,17 +278,17 @@ def test_table_and_excel_load_same_dataframe(self): class TestEnrichIntegration(ProgramTestCase): def setUp(self): - self.path = os.path.join(DATA_DIR, "enrich.tsv") - self.path_1based = os.path.join(DATA_DIR, "enrich_1based.tsv") - self.excel_path = os.path.join(DATA_DIR, "enrich.xlsx") - self.no_seq_id = os.path.join(DATA_DIR, "enrich_no_seqid.tsv") + self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") + self.path_1based = os.path.join(TEST_DATA_DIR, "enrich", "enrich_1based.tsv") + self.excel_path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.xlsx") + self.no_seq_id = os.path.join(TEST_DATA_DIR, "enrich", "enrich_no_seqid.tsv") - self.expected = os.path.join(DATA_DIR, "enrich_expected.csv") - self.expected_offset = os.path.join(DATA_DIR, "enrich_expected_offset.csv") + self.expected = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected.csv") + self.expected_offset = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv") self.bin = [ - os.path.join(DATA_DIR, "mavedb_enrich1.csv"), - os.path.join(DATA_DIR, "mavedb_enrich1_1based.csv"), + os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv"), + os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich_1based.csv"), ] def test_saves_to_input_dst_by_default(self): diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py index 882f07d..7aa59ce 100644 --- a/mavedbconvert/tests/test_enrich2.py +++ b/mavedbconvert/tests/test_enrich2.py @@ -15,8 +15,7 @@ from . import ProgramTestCase -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.normpath(BASE_DIR + "/data/") +TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") # Utility tests @@ -28,7 +27,7 @@ class TestGetCountDataFrames(TestCase): """ def setUp(self): - self.path = os.path.join(DATA_DIR, "test_store.h5") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "test_store.h5") self.store = pd.HDFStore(self.path, "w") index = pd.MultiIndex.from_product( [["c1", "c2"], ["rep1", "rep2"], ["t0", "t1"]], @@ -104,7 +103,7 @@ class TestReplicateScoreDataFrames(TestCase): """ def setUp(self): - self.path = os.path.join(DATA_DIR, "test_store.h5") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "test_store.h5") self.store = pd.HDFStore(self.path, "w") shared_index = pd.MultiIndex.from_product( @@ -277,7 +276,7 @@ def test_scores_and_counts_columns_separated_after_join(self): class TestEnrich2ConvertH5Filepath(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich2.h5") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") self.bin.append(self.path.replace(".h5", "")) @@ -294,12 +293,12 @@ def test_concats_basename_elem_type_then_cnd_and_csv_ext(self): class TestEnrich2ConvertH5Df(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich2.h5") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") - self.bin.append(os.path.join(DATA_DIR, "enrich2")) + self.bin.append(os.path.join(TEST_DATA_DIR, "enrich2", "enrich2")) def test_doesnt_open_invalid_rows_file_if_there_are_no_invalid_rows(self): - self.path = os.path.join(DATA_DIR, "enrich2.tsv") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv" @@ -332,7 +331,7 @@ def test_sets_index_as_input_index(self): assert_index_equal(result.index, df.index) def test_opens_invalid_rows_file_for_invalid_rows(self): - self.path = os.path.join(DATA_DIR, "enrich2.tsv") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") df = pd.DataFrame(data={"score": [1], "B": ["a"]}, index=["c.1T>G (p.Lys1Val)"]) with self.assertRaises(ValueError): @@ -345,7 +344,7 @@ def test_opens_invalid_rows_file_for_invalid_rows(self): self.bin.append(fpath) def test_invalid_rows_file_contains_error_description(self): - self.path = os.path.join(DATA_DIR, "enrich2.tsv") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv" @@ -368,7 +367,7 @@ class TestEnrich2ParseInput(ProgramTestCase): def setUp(self): super().setUp() self.wt = "GCTGAT" - self.path = os.path.join(DATA_DIR, "test_store.h5") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "test_store.h5") self.store = pd.HDFStore(self.path, "w") self.enrich2 = enrich2.Enrich2( self.path, wt_sequence=self.wt, offset=0, one_based=True @@ -387,42 +386,42 @@ def setUp(self): self.files = [ os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_synonymous_counts_c1.csv" + TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_counts_c1.csv" ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_synonymous_counts_c2.csv" + TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_counts_c2.csv" ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_synonymous_scores_c1.csv" + TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_scores_c1.csv" ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_synonymous_scores_c2.csv" + TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_scores_c2.csv" ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_variants_counts_c1.csv" + TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_counts_c1.csv" ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_variants_counts_c2.csv" + TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_counts_c2.csv" ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_variants_scores_c1.csv" + TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_scores_c1.csv" ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_variants_scores_c2.csv" + TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_scores_c2.csv" ) ), ] @@ -531,7 +530,7 @@ def parse_rows(self, variants, element=None): @mock.patch.object(pd.DataFrame, "to_csv", return_value=None) def test_saves_to_output_directory(self, patch): - output = os.path.join(DATA_DIR, "new") + output = os.path.join(TEST_DATA_DIR, "enrich2", "new") p = enrich2.Enrich2(src=self.store, dst=output, wt_sequence=self.wt, offset=0) p.parse_input(p.load_input_file()) for call_args in patch.call_args_list: @@ -542,7 +541,7 @@ def test_saves_to_output_directory(self, patch): def test_saves_to_file_location_if_no_dst_supplied(self, patch): p = enrich2.Enrich2(src=self.store, wt_sequence=self.wt, offset=0) p.parse_input(self.enrich2.load_input_file()) - expected_base_path = os.path.normpath(os.path.join(DATA_DIR, "test_store")) + expected_base_path = os.path.normpath(os.path.join(TEST_DATA_DIR, "enrich2", "test_store")) for call_args in patch.call_args_list: self.assertIn(expected_base_path, call_args[0][0]) @@ -832,13 +831,13 @@ def test_drops_null_rows(self): class TestEnrich2LoadInput(TestCase): def test_error_file_not_h5_or_tsv(self): - path = os.path.join(DATA_DIR, "empiric.xlsx") + path = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx") p = enrich2.Enrich2(path, wt_sequence="AAA") with self.assertRaises(TypeError): p.load_input_file() def test_scores_tsv_missing_score_column(self): - path = os.path.join(DATA_DIR, "enrich2.tsv") + path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") p = enrich2.Enrich2( path, wt_sequence="AAA", @@ -850,7 +849,7 @@ def test_scores_tsv_missing_score_column(self): p.load_input_file() def test_input_type_counts_doesnt_raise_keyerror(self): - path = os.path.join(DATA_DIR, "enrich2.tsv") + path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") p = enrich2.Enrich2( path, wt_sequence="AAA", @@ -860,7 +859,7 @@ def test_input_type_counts_doesnt_raise_keyerror(self): p.load_input_file() def test_scores_tsv_missing_hgvs_column(self): - path = os.path.join(DATA_DIR, "enrich2.tsv") + path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") p = enrich2.Enrich2(path, wt_sequence="AAA", hgvs_column="hgvs") with self.assertRaises(KeyError): p.load_input_file() @@ -869,7 +868,7 @@ def test_scores_tsv_missing_hgvs_column(self): class TestEnrich2ParseRow(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "dummy.h5") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="ACT") self.bin.append(self.path.replace(".h5", "")) @@ -941,7 +940,7 @@ def test_uses_three_qmarks(self): class TestProteinHGVSParsing(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "dummy.h5") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") self.bin.append(self.path.replace(".h5", "")) @@ -1001,7 +1000,7 @@ def test_maintains_ordering(self): class TestNucleotideHGVSParing(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "dummy.h5") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") self.bin.append(self.path.replace(".h5", "")) @@ -1055,7 +1054,7 @@ def test_strips_ws(self): class TestEnrich2MixedHGVSParsing(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "dummy.h5") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") self.wt = "ACT" self.wt_aa = constants.AA_CODES[constants.CODON_TABLE[self.wt]] self.enrich2 = enrich2.Enrich2(self.path, wt_sequence=self.wt) @@ -1149,7 +1148,7 @@ def test_protein_set_as_nt_when_table_is_not_syn_and_variant_is_special(self): class TestInferSilentAASub(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "dummy.h5") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA", offset=0) self.bin.append(self.path.replace(".h5", "")) @@ -1228,14 +1227,14 @@ def test_applies_offset_to_protein_variant_modulo_3(self): @mock.patch.object(enrich2.base.BaseProgram, "validate_against_wt_sequence") def test_validates_against_wt_sequence(self, patch): variant = "c.-9C>T" - path = os.path.join(DATA_DIR, "dummy.h5") + path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") p = enrich2.Enrich2(path, wt_sequence="ACT") enrich2.apply_offset(variant, offset=-10, enrich2=p) # pass patch.assert_called_with(*("c.1C>T",)) def test_value_error_base_mismatch_after_offset_applied(self): variant = "c.-9G>T" - path = os.path.join(DATA_DIR, "dummy.h5") + path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") p = enrich2.Enrich2(path, wt_sequence="ACT") with self.assertRaises(ValueError): enrich2.apply_offset(variant, offset=-10, enrich2=p) @@ -1243,14 +1242,14 @@ def test_value_error_base_mismatch_after_offset_applied(self): @mock.patch.object(enrich2.base.BaseProgram, "validate_against_protein_sequence") def test_validates_against_pro_sequence(self, patch): variant = "p.Gly3Leu" - path = os.path.join(DATA_DIR, "dummy.h5") + path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") p = enrich2.Enrich2(path, wt_sequence="ACG") enrich2.apply_offset(variant, offset=6, enrich2=p) # pass patch.assert_called_with(*("p.Gly1Leu",)) def test_value_error_pro_mismatch_after_offset_applied(self): variant = "p.Gly3Leu" - path = os.path.join(DATA_DIR, "dummy.h5") + path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") p = enrich2.Enrich2(path, wt_sequence="ACG") with self.assertRaises(ValueError): enrich2.apply_offset(variant, offset=6, enrich2=p) @@ -1259,7 +1258,7 @@ def test_value_error_pro_mismatch_after_offset_applied(self): class TestEnrich2Init(TestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich2.tsv") + self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") def test_error_is_coding_and_offset_not_mult_of_three(self): with self.assertRaises(ValueError): diff --git a/mavedbconvert/tests/test_fasta.py b/mavedbconvert/tests/test_fasta.py index 4aaba22..970120a 100644 --- a/mavedbconvert/tests/test_fasta.py +++ b/mavedbconvert/tests/test_fasta.py @@ -3,46 +3,46 @@ from ..fasta import parse_fasta, split_fasta_path -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.normpath(BASE_DIR + "/data/") +TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") -class TestSplitFasta(TestCase): + +class TestFastaPath(TestCase): def test_infers_bzip(self): head, base, ext, compression = split_fasta_path( - os.path.join(DATA_DIR, "wt.fasta.bz2") + os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.bz2") ) self.assertEqual(ext, ".fasta") self.assertEqual(compression, "bz2") def test_infers_gzip(self): head, base, ext, compression = split_fasta_path( - os.path.join(DATA_DIR, "wt.fasta.gz") + os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.gz") ) self.assertEqual(ext, ".fasta") self.assertEqual(compression, "gz") - def test_infers_none(self): + def test_infers_uncompressed(self): head, base, ext, compression = split_fasta_path( - os.path.join(DATA_DIR, "wt.fasta") + os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta") ) self.assertEqual(ext, ".fasta") self.assertEqual(compression, None) head, base, ext, compression = split_fasta_path( - os.path.join(DATA_DIR, "lower.fa") + os.path.join(TEST_DATA_DIR, "fasta", "lower.fa") ) self.assertEqual(ext, ".fa") self.assertEqual(compression, None) def test_ioerror_invalid_ext(self): with self.assertRaises(IOError): - split_fasta_path(os.path.join(DATA_DIR, "enrich.tsv")) + split_fasta_path(os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")) class TestFastaReader(TestCase): def test_can_read_first_sequence(self): - sequence = parse_fasta(os.path.join(DATA_DIR, "wt.fasta")) + sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta")) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA" @@ -52,7 +52,7 @@ def test_can_read_first_sequence(self): self.assertEqual(sequence, expected) def test_converts_to_uppercase(self): - sequence = parse_fasta(os.path.join(DATA_DIR, "lower.fa")) + sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "lower.fa")) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA" @@ -63,11 +63,11 @@ def test_converts_to_uppercase(self): def test_error_more_than_one_sequence(self): with self.assertRaises(ValueError): - parse_fasta(os.path.join(DATA_DIR, "two.fasta")) + parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "two.fasta")) def test_error_invalid_chars_in_sequence(self): with self.assertRaises(ValueError): - parse_fasta(os.path.join(DATA_DIR, "invalid_chars.fasta")) + parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "invalid_chars.fasta")) def test_ignores_blank_lines(self): expected = ( @@ -76,15 +76,15 @@ def test_ignores_blank_lines(self): "GAGGCTACGATCGTGCCTTGTGGCAAGTCTTCGCTCGCAC" "GCCCTTCCTACCGTGCTATGAGAGGAAATCTCGGGCGTAA" ) - seq = parse_fasta(os.path.join(DATA_DIR, "spaces.fasta")) + seq = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "spaces.fasta")) self.assertEqual(seq, expected) def test_error_missing_gt_on_first_line(self): with self.assertRaises(IOError): - parse_fasta(os.path.join(DATA_DIR, "bad_format.fasta")) + parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "bad_format.fasta")) def test_can_open_with_gzip(self): - sequence = parse_fasta(os.path.join(DATA_DIR, "wt.fasta.gz")) + sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.gz")) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA" @@ -94,7 +94,7 @@ def test_can_open_with_gzip(self): self.assertEqual(sequence, expected) def test_can_open_with_bzip(self): - sequence = parse_fasta(os.path.join(DATA_DIR, "wt.fasta.bz2")) + sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.bz2")) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA" diff --git a/mavedbconvert/tests/test_parsers.py b/mavedbconvert/tests/test_parsers.py index 3a97891..4a8b5c1 100644 --- a/mavedbconvert/tests/test_parsers.py +++ b/mavedbconvert/tests/test_parsers.py @@ -7,7 +7,7 @@ from . import ProgramTestCase -TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") class TestParseBoolean(TestCase): @@ -48,14 +48,14 @@ def test_returns_string_stripped_of_ws(self): class TestParseSrc(TestCase): @mock.patch( "mavedbconvert.parsers.parse_string", - return_value=os.path.join(TESTS_DIR, "data", "enrich2", "enrich2.tsv"), + return_value=os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv"), ) def test_calls_parse_string(self, patch): - parsers.parse_src(os.path.join(TESTS_DIR, "data", "enrich2", "enrich2.tsv")) + parsers.parse_src(os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")) patch.assert_called() def test_ok_file_exists(self): - path = os.path.join(TESTS_DIR, "data", "enrich2", "enrich2.tsv") + path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") self.assertEqual(path, parsers.parse_src(path)) def test_error_no_value(self): @@ -64,23 +64,23 @@ def test_error_no_value(self): parsers.parse_src(v) def test_error_file_not_found(self): - path = os.path.join(TESTS_DIR, "data", "enrich2", "missing_file.tsv") + path = os.path.join(TEST_DATA_DIR, "enrich2", "missing_file.tsv") with self.assertRaises(FileNotFoundError): parsers.parse_src(path) def test_error_file_is_a_dir(self): with self.assertRaises(IsADirectoryError): - parsers.parse_src(os.path.join(TESTS_DIR, "data")) + parsers.parse_src(os.path.join(TEST_DATA_DIR)) class TestParseDst(ProgramTestCase): - @mock.patch("mavedbconvert.parsers.parse_string", return_value=os.path.join(TESTS_DIR, "data")) + @mock.patch("mavedbconvert.parsers.parse_string", return_value=os.path.join(TEST_DATA_DIR)) def test_calls_parse_string(self, patch): - parsers.parse_dst(os.path.join(TESTS_DIR, "data")) + parsers.parse_dst(os.path.join(TEST_DATA_DIR)) patch.assert_called() def test_ok_dst_exists(self): - path = os.path.join(os.path.join(TESTS_DIR, "data")) + path = os.path.join(os.path.join(TEST_DATA_DIR)) self.assertEqual(path, parsers.parse_dst(path)) def test_returns_none_no_value(self): @@ -88,11 +88,11 @@ def test_returns_none_no_value(self): self.assertIsNone(parsers.parse_dst(v)) def test_dst_path_is_normalised(self): - path = TESTS_DIR + "//data" - self.assertEqual(parsers.parse_dst(path), os.path.join(TESTS_DIR, "data")) + path = TEST_DATA_DIR + "//fasta" + self.assertEqual(parsers.parse_dst(path), os.path.join(TEST_DATA_DIR, "fasta")) def test_makes_dst_directory_tree(self): - path = os.path.join(TESTS_DIR, "data", "subdir") + path = os.path.join(TEST_DATA_DIR, "subdir") parsers.parse_dst(path) self.assertTrue(os.path.isdir(path)) self.bin.append(path) @@ -124,7 +124,7 @@ def test_sets_correct_program_from_dict(self): class TestParseWildTypeSequence(TestCase): def test_can_read_from_fasta(self): - path = os.path.join(TESTS_DIR, "fasta", "lower.fa") + path = os.path.join(TEST_DATA_DIR, "fasta", "lower.fa") wtseq = parsers.parse_wt_sequence(path, program="enrich2", non_coding=True) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" @@ -257,13 +257,13 @@ def mock_args( if program is None: program = "enrich2" if src is None: - src = os.path.join(TESTS_DIR, "enrich2", "enrich2.tsv") + src = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") return { "enrich": True if program == "enrich" else False, "enrich2": True if program == "enrich2" else False, "empiric": True if program == "empiric" else False, - "": os.path.join(TESTS_DIR, "data", program, src), - "--dst": os.path.join(TESTS_DIR, "data", program, dst) if dst else dst, + "": os.path.join(TEST_DATA_DIR, program, src), + "--dst": os.path.join(TEST_DATA_DIR, program, dst) if dst else dst, "--score-column": score_column, "--hgvs-column": hgvs_column, "--skip-header": skip_header, From 86145e7342c755e8057d65de67e161a7066cb8da Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Tue, 3 Sep 2019 10:39:22 +1000 Subject: [PATCH 04/26] split file paths and wt seq into different test cases --- mavedbconvert/tests/test_base.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py index 60b592d..bf1ca5e 100644 --- a/mavedbconvert/tests/test_base.py +++ b/mavedbconvert/tests/test_base.py @@ -9,10 +9,10 @@ TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") -class TestBaseProgram(ProgramTestCase): +class TestPaths(ProgramTestCase): """ Test __init__ correctly sets up read and write directories, - sequence information etc. + etc. """ def setUp(self): @@ -70,10 +70,6 @@ def test_lower_cases_ext(self): p = base.BaseProgram(src=self.src.replace("tsv", "TSV"), wt_sequence="AAA") self.assertEqual(p.ext, ".tsv") - def test_value_error_coding_offset_not_multiple_of_three(self): - with self.assertRaises(ValueError): - base.BaseProgram(src=self.src, wt_sequence="ATCA", offset=-1) - def test_dst_filename_replaces_whitespace_with_underscores(self): p = base.BaseProgram(src=self.src_with_spaces, wt_sequence="AAA") self.assertEqual(p.dst_filename, "mavedb_enrich_.csv") @@ -89,6 +85,29 @@ def test_output_directory_expands_user_and_norms_path(self): p.output_directory, os.path.join(os.path.expanduser("~"), "user") ) + +class TestWtSequence(ProgramTestCase): + """ + Test __init__ correctly sets up sequence information etc. + """ + + def setUp(self): + super().setUp() + self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") + self.src_with_spaces = os.path.join(TEST_DATA_DIR, "enrich", "enrich .tsv") + self.h5_src = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") + + def tearDown(self): + for path in self.bin: + if os.path.exists(path) and os.path.isfile(path): + os.remove(path) + elif os.path.exists(path) and os.path.isdir(path): + os.removedirs(path) + + def test_value_error_coding_offset_not_multiple_of_three(self): + with self.assertRaises(ValueError): + base.BaseProgram(src=self.src, wt_sequence="ATCA", offset=-1) + # --- Test property setters --- # def test_wt_setter_upper_cases_wt_sequence(self): p = base.BaseProgram(src=self.src, wt_sequence="AAA") From bd788fda71249cdf66d2bd6308b6690d68b5023d Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Tue, 3 Sep 2019 14:23:55 +1000 Subject: [PATCH 05/26] black formatting --- mavedbconvert/tests/test_base.py | 4 ++- mavedbconvert/tests/test_enrich.py | 8 ++++-- mavedbconvert/tests/test_enrich2.py | 44 +++++++++++++++++++++++------ mavedbconvert/tests/test_parsers.py | 4 ++- 4 files changed, 47 insertions(+), 13 deletions(-) diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py index bf1ca5e..47818a8 100644 --- a/mavedbconvert/tests/test_base.py +++ b/mavedbconvert/tests/test_base.py @@ -76,7 +76,9 @@ def test_dst_filename_replaces_whitespace_with_underscores(self): def test_output_file_joins_dst_and_dst_filename(self): p = base.BaseProgram(src=self.src, wt_sequence="AAA") - self.assertEqual(p.output_file, os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv")) + self.assertEqual( + p.output_file, os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv") + ) def test_output_directory_expands_user_and_norms_path(self): p = base.BaseProgram(src=self.src, wt_sequence="AAA") diff --git a/mavedbconvert/tests/test_enrich.py b/mavedbconvert/tests/test_enrich.py index a3f9867..c86ebe7 100644 --- a/mavedbconvert/tests/test_enrich.py +++ b/mavedbconvert/tests/test_enrich.py @@ -189,7 +189,9 @@ def setUp(self): self.path_1based = os.path.join(TEST_DATA_DIR, "enrich", "enrich_1based.tsv") self.path_csv = os.path.join(TEST_DATA_DIR, "enrich", "enrich1.csv") self.expected = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected.csv") - self.expected_offset = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv") + self.expected_offset = os.path.join( + TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv" + ) self.excel_path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.xlsx") self.no_seq_id = os.path.join(TEST_DATA_DIR, "enrich", "enrich_no_seqid.tsv") self.tmp_path = os.path.join(TEST_DATA_DIR, "enrich", "tmp.xlsx") @@ -284,7 +286,9 @@ def setUp(self): self.no_seq_id = os.path.join(TEST_DATA_DIR, "enrich", "enrich_no_seqid.tsv") self.expected = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected.csv") - self.expected_offset = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv") + self.expected_offset = os.path.join( + TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv" + ) self.bin = [ os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv"), diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py index 7aa59ce..d7f254f 100644 --- a/mavedbconvert/tests/test_enrich2.py +++ b/mavedbconvert/tests/test_enrich2.py @@ -386,42 +386,66 @@ def setUp(self): self.files = [ os.path.normpath( os.path.join( - TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_counts_c1.csv" + TEST_DATA_DIR, + "enrich2", + "test_store", + "mavedb_test_store_synonymous_counts_c1.csv", ) ), os.path.normpath( os.path.join( - TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_counts_c2.csv" + TEST_DATA_DIR, + "enrich2", + "test_store", + "mavedb_test_store_synonymous_counts_c2.csv", ) ), os.path.normpath( os.path.join( - TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_scores_c1.csv" + TEST_DATA_DIR, + "enrich2", + "test_store", + "mavedb_test_store_synonymous_scores_c1.csv", ) ), os.path.normpath( os.path.join( - TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_scores_c2.csv" + TEST_DATA_DIR, + "enrich2", + "test_store", + "mavedb_test_store_synonymous_scores_c2.csv", ) ), os.path.normpath( os.path.join( - TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_counts_c1.csv" + TEST_DATA_DIR, + "enrich2", + "test_store", + "mavedb_test_store_variants_counts_c1.csv", ) ), os.path.normpath( os.path.join( - TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_counts_c2.csv" + TEST_DATA_DIR, + "enrich2", + "test_store", + "mavedb_test_store_variants_counts_c2.csv", ) ), os.path.normpath( os.path.join( - TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_scores_c1.csv" + TEST_DATA_DIR, + "enrich2", + "test_store", + "mavedb_test_store_variants_scores_c1.csv", ) ), os.path.normpath( os.path.join( - TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_scores_c2.csv" + TEST_DATA_DIR, + "enrich2", + "test_store", + "mavedb_test_store_variants_scores_c2.csv", ) ), ] @@ -541,7 +565,9 @@ def test_saves_to_output_directory(self, patch): def test_saves_to_file_location_if_no_dst_supplied(self, patch): p = enrich2.Enrich2(src=self.store, wt_sequence=self.wt, offset=0) p.parse_input(self.enrich2.load_input_file()) - expected_base_path = os.path.normpath(os.path.join(TEST_DATA_DIR, "enrich2", "test_store")) + expected_base_path = os.path.normpath( + os.path.join(TEST_DATA_DIR, "enrich2", "test_store") + ) for call_args in patch.call_args_list: self.assertIn(expected_base_path, call_args[0][0]) diff --git a/mavedbconvert/tests/test_parsers.py b/mavedbconvert/tests/test_parsers.py index 4a8b5c1..28560df 100644 --- a/mavedbconvert/tests/test_parsers.py +++ b/mavedbconvert/tests/test_parsers.py @@ -74,7 +74,9 @@ def test_error_file_is_a_dir(self): class TestParseDst(ProgramTestCase): - @mock.patch("mavedbconvert.parsers.parse_string", return_value=os.path.join(TEST_DATA_DIR)) + @mock.patch( + "mavedbconvert.parsers.parse_string", return_value=os.path.join(TEST_DATA_DIR) + ) def test_calls_parse_string(self, patch): parsers.parse_dst(os.path.join(TEST_DATA_DIR)) patch.assert_called() From 0ae8315a54fb68b2da0b5dad368eb5fb3ab0e320 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Tue, 3 Sep 2019 14:52:23 +1000 Subject: [PATCH 06/26] copy the test data into a temp directory and run there instead --- mavedbconvert/tests/__init__.py | 9 ++++ mavedbconvert/tests/test_base.py | 29 +++++----- mavedbconvert/tests/test_empiric.py | 25 ++++----- mavedbconvert/tests/test_enrich.py | 49 +++++++++-------- mavedbconvert/tests/test_enrich2.py | 84 +++++++++++++++-------------- mavedbconvert/tests/test_fasta.py | 35 ++++++------ 6 files changed, 117 insertions(+), 114 deletions(-) diff --git a/mavedbconvert/tests/__init__.py b/mavedbconvert/tests/__init__.py index 37466b8..07d0191 100644 --- a/mavedbconvert/tests/__init__.py +++ b/mavedbconvert/tests/__init__.py @@ -1,5 +1,7 @@ import os +import shutil from unittest import TestCase +from tempfile import TemporaryDirectory import pandas as pd @@ -19,6 +21,12 @@ class ProgramTestCase(TestCase): def setUp(self): + self._data_dir = TemporaryDirectory() # store the object + self.data_dir = os.path.join(self._data_dir.name, "data") # store the directory path + shutil.copytree( + src=os.path.join(os.path.dirname(os.path.abspath(__file__)), "data"), + dst=self.data_dir, + ) self.bin = [] def mock_multi_sheet_excel_file(self, path, data): @@ -30,6 +38,7 @@ def mock_multi_sheet_excel_file(self, path, data): self.bin.append(path) def tearDown(self): + self._data_dir.cleanup() for path in self.bin: if os.path.exists(path) and os.path.isfile(path): os.remove(path) diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py index 47818a8..722981f 100644 --- a/mavedbconvert/tests/test_base.py +++ b/mavedbconvert/tests/test_base.py @@ -6,9 +6,6 @@ from . import ProgramTestCase -TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") - - class TestPaths(ProgramTestCase): """ Test __init__ correctly sets up read and write directories, @@ -17,9 +14,9 @@ class TestPaths(ProgramTestCase): def setUp(self): super().setUp() - self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") - self.src_with_spaces = os.path.join(TEST_DATA_DIR, "enrich", "enrich .tsv") - self.h5_src = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") + self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv") + self.src_with_spaces = os.path.join(self.data_dir, "enrich", "enrich .tsv") + self.h5_src = os.path.join(self.data_dir, "enrich2", "dummy.h5") def tearDown(self): for path in self.bin: @@ -30,7 +27,7 @@ def tearDown(self): def test_sets_directory_as_input_directory_if_dst_is_none(self): p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA") - self.assertEqual(p.dst, os.path.join(TEST_DATA_DIR, "enrich")) + self.assertEqual(p.dst, os.path.join(self.data_dir, "enrich")) def test_error_file_not_readable(self): with self.assertRaises(IOError): @@ -42,11 +39,11 @@ def test_expands_user_and_norms_dst(self): def test_dir_with_input_fname_appended_when_h5_and_dst_is_none(self): p = base.BaseProgram(src=self.h5_src, dst=None, wt_sequence="AAA") - self.assertEqual(p.dst, os.path.join(TEST_DATA_DIR, "enrich2", "dummy")) - self.bin.append(os.path.join(TEST_DATA_DIR, "enrich2", "dummy")) + self.assertEqual(p.dst, os.path.join(self.data_dir, "enrich2", "dummy")) + self.bin.append(os.path.join(self.data_dir, "enrich2", "dummy")) def test_creates_directory_tree_if_it_doesnt_exist(self): - output = os.path.join(TEST_DATA_DIR, "enrich2", "outer_dir", "inner_dir") + output = os.path.join(self.data_dir, "enrich2", "outer_dir", "inner_dir") base.BaseProgram(src=self.h5_src, dst=output, wt_sequence="AAA") self.assertTrue(os.path.isdir(output)) self.bin.append(output) @@ -77,7 +74,7 @@ def test_dst_filename_replaces_whitespace_with_underscores(self): def test_output_file_joins_dst_and_dst_filename(self): p = base.BaseProgram(src=self.src, wt_sequence="AAA") self.assertEqual( - p.output_file, os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv") + p.output_file, os.path.join(self.data_dir, "enrich", "mavedb_enrich.csv") ) def test_output_directory_expands_user_and_norms_path(self): @@ -95,9 +92,9 @@ class TestWtSequence(ProgramTestCase): def setUp(self): super().setUp() - self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") - self.src_with_spaces = os.path.join(TEST_DATA_DIR, "enrich", "enrich .tsv") - self.h5_src = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") + self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv") + self.src_with_spaces = os.path.join(self.data_dir, "enrich", "enrich .tsv") + self.h5_src = os.path.join(self.data_dir, "enrich2", "dummy.h5") def tearDown(self): for path in self.bin: @@ -146,7 +143,7 @@ def test_wt_setter_value_error_not_valid_wt_sequence(self): class TestBaseProgramValidateAgainstWTSeq(ProgramTestCase): def setUp(self): super().setUp() - self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") + self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv") self.base = base.BaseProgram(src=self.src, wt_sequence="ATG", one_based=True) def test_error_not_a_dna_sub(self): @@ -197,7 +194,7 @@ def test_index_error_index_extends_beyond_indexable_wt_seq(self): class TestBaseProgramValidateAgainstProteinSeq(ProgramTestCase): def setUp(self): super().setUp() - self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") + self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv") self.base = base.BaseProgram(src=self.src, wt_sequence="ATGAAA", one_based=True) def test_error_not_a_protein_sub(self): diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py index 2e95225..937a42b 100644 --- a/mavedbconvert/tests/test_empiric.py +++ b/mavedbconvert/tests/test_empiric.py @@ -10,13 +10,10 @@ from . import ProgramTestCase -TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") - - class TestEmpiricInit(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") def test_error_offset_not_mult_of_three(self): with self.assertRaises(ValueError): @@ -69,7 +66,7 @@ def test_adds_codon_pos_multiplied_by_3_to_position(self): class TestEmpiric(ProgramTestCase): def setUp(self): super().setUp() - self.input = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx") + self.input = os.path.join(self.data_dir, "empiric", "empiric.xlsx") self.empiric = empiric.Empiric( src=self.input, wt_sequence="AAA", one_based=False ) @@ -176,10 +173,10 @@ def test_correctly_infers_hgvs_nt_positions_when_one_based(self): self.assertEqual(hgvs_nt, "c.[1G>A;2T>A;3A>T]") -class TestEmpiricValidateColumns(TestCase): +class TestEmpiricValidateColumns(ProgramTestCase): def setUp(self): super().setUp() - self.input = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx") + self.input = os.path.join(self.data_dir, "empiric", "empiric.xlsx") self.empiric = empiric.Empiric( src=self.input, wt_sequence="AAA", one_based=False ) @@ -218,7 +215,7 @@ def test_sets_aa_column(self): class TestEmpiricParseInput(ProgramTestCase): def setUp(self): super().setUp() - self.input = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx") + self.input = os.path.join(self.data_dir, "empiric", "empiric.xlsx") self.empiric = empiric.Empiric( src=self.input, wt_sequence="AAA", @@ -355,10 +352,10 @@ def test_keeps_int_type_as_int(self): class TestEmpiricLoadInput(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx") - self.tmp_path = os.path.join(TEST_DATA_DIR, "empiric", "tmp.csv") - self.tmp_path_tsv = os.path.join(TEST_DATA_DIR, "empiric", "tmp.tsv") - self.tmp_excel_path = os.path.join(TEST_DATA_DIR, "empiric", "tmp.xlsx") + self.path = os.path.join(self.data_dir, "empiric", "empiric.xlsx") + self.tmp_path = os.path.join(self.data_dir, "empiric", "tmp.csv") + self.tmp_path_tsv = os.path.join(self.data_dir, "empiric", "tmp.tsv") + self.tmp_excel_path = os.path.join(self.data_dir, "empiric", "tmp.xlsx") self.bin.append(self.tmp_path) self.bin.append(self.tmp_path_tsv) @@ -474,8 +471,8 @@ def test_applies_offset_to_position_column(self): class TestEmpiricConvert(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx") - self.expected = os.path.join(TEST_DATA_DIR, "empiric", "empiric_expected.csv") + self.path = os.path.join(self.data_dir, "empiric", "empiric.xlsx") + self.expected = os.path.join(self.data_dir, "empiric", "empiric_expected.csv") self.empiric = empiric.Empiric( src=self.path, wt_sequence="TTTTCTTATTGT", diff --git a/mavedbconvert/tests/test_enrich.py b/mavedbconvert/tests/test_enrich.py index c86ebe7..f18d1e4 100644 --- a/mavedbconvert/tests/test_enrich.py +++ b/mavedbconvert/tests/test_enrich.py @@ -9,9 +9,6 @@ from . import ProgramTestCase -TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") - - WT = ( "GACGTTCCACTGCCGGCTGGTTGGGAAATGGCTAAAACTAGTTCTGGTCAGCGTTACTTC" "CTGAACCACATCGACCAGACCACCACGTGGCAGGACCCGCGT" @@ -21,7 +18,7 @@ class TestEnrichInit(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich2.tsv") + self.path = os.path.join(self.data_dir, "enrich", "enrich2.tsv") def test_error_offset_not_mult_of_three(self): with self.assertRaises(ValueError): @@ -34,7 +31,7 @@ def test_ok_is_mult_of_three(self): class TestEnrichParseRow(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") + self.path = os.path.join(self.data_dir, "enrich", "enrich.tsv") self.enrich = enrich.Enrich( src=self.path, wt_sequence=WT, @@ -127,7 +124,7 @@ def test_applies_offset_divided_by_3(self): class TestEnrichParseInput(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") + self.path = os.path.join(self.data_dir, "enrich", "enrich.tsv") self.enrich = enrich.Enrich( src=self.path, wt_sequence=WT, @@ -185,21 +182,22 @@ def test_removes_non_numeric(self): class TestEnrichLoadInput(ProgramTestCase): def setUp(self): - self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") - self.path_1based = os.path.join(TEST_DATA_DIR, "enrich", "enrich_1based.tsv") - self.path_csv = os.path.join(TEST_DATA_DIR, "enrich", "enrich1.csv") - self.expected = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected.csv") + super().setUp() + self.path = os.path.join(self.data_dir, "enrich", "enrich.tsv") + self.path_1based = os.path.join(self.data_dir, "enrich", "enrich_1based.tsv") + self.path_csv = os.path.join(self.data_dir, "enrich", "enrich1.csv") + self.expected = os.path.join(self.data_dir, "enrich", "enrich_expected.csv") self.expected_offset = os.path.join( - TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv" + self.data_dir, "enrich", "enrich_expected_offset.csv" ) - self.excel_path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.xlsx") - self.no_seq_id = os.path.join(TEST_DATA_DIR, "enrich", "enrich_no_seqid.tsv") - self.tmp_path = os.path.join(TEST_DATA_DIR, "enrich", "tmp.xlsx") + self.excel_path = os.path.join(self.data_dir, "enrich", "enrich.xlsx") + self.no_seq_id = os.path.join(self.data_dir, "enrich", "enrich_no_seqid.tsv") + self.tmp_path = os.path.join(self.data_dir, "enrich", "tmp.xlsx") self.bin = [ - os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich1.csv"), - os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich1_1based.csv"), - os.path.join(TEST_DATA_DIR, "enrich", self.path_csv), + os.path.join(self.data_dir, "enrich", "mavedb_enrich1.csv"), + os.path.join(self.data_dir, "enrich", "mavedb_enrich1_1based.csv"), + os.path.join(self.data_dir, "enrich", self.path_csv), ] def test_error_seq_id_not_in_columns(self): @@ -280,19 +278,20 @@ def test_table_and_excel_load_same_dataframe(self): class TestEnrichIntegration(ProgramTestCase): def setUp(self): - self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv") - self.path_1based = os.path.join(TEST_DATA_DIR, "enrich", "enrich_1based.tsv") - self.excel_path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.xlsx") - self.no_seq_id = os.path.join(TEST_DATA_DIR, "enrich", "enrich_no_seqid.tsv") + super().setUp() + self.path = os.path.join(self.data_dir, "enrich", "enrich.tsv") + self.path_1based = os.path.join(self.data_dir, "enrich", "enrich_1based.tsv") + self.excel_path = os.path.join(self.data_dir, "enrich", "enrich.xlsx") + self.no_seq_id = os.path.join(self.data_dir, "enrich", "enrich_no_seqid.tsv") - self.expected = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected.csv") + self.expected = os.path.join(self.data_dir, "enrich", "enrich_expected.csv") self.expected_offset = os.path.join( - TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv" + self.data_dir, "enrich", "enrich_expected_offset.csv" ) self.bin = [ - os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv"), - os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich_1based.csv"), + os.path.join(self.data_dir, "enrich", "mavedb_enrich.csv"), + os.path.join(self.data_dir, "enrich", "mavedb_enrich_1based.csv"), ] def test_saves_to_input_dst_by_default(self): diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py index d7f254f..1b9503b 100644 --- a/mavedbconvert/tests/test_enrich2.py +++ b/mavedbconvert/tests/test_enrich2.py @@ -15,19 +15,17 @@ from . import ProgramTestCase -TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") - - # Utility tests # --------------------------------------------------------------------------- # -class TestGetCountDataFrames(TestCase): +class TestGetCountDataFrames(ProgramTestCase): """ Test method get_count_dataframes checking if conditions are correctly parsed. """ def setUp(self): - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "test_store.h5") + super().setUp() + self.path = os.path.join(self.data_dir, "enrich2", "test_store.h5") self.store = pd.HDFStore(self.path, "w") index = pd.MultiIndex.from_product( [["c1", "c2"], ["rep1", "rep2"], ["t0", "t1"]], @@ -96,14 +94,15 @@ def test_column_names_combine_columns_using_ordering(self): self.assertListEqual(cnames, ["t0_rep1", "t1_rep1", "t0_rep2", "t1_rep2"]) -class TestReplicateScoreDataFrames(TestCase): +class TestReplicateScoreDataFrames(ProgramTestCase): """ Test method get_replicate_score_dataframes checking if conditions are correctly parsed. """ def setUp(self): - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "test_store.h5") + super().setUp() + self.path = os.path.join(self.data_dir, "enrich2", "test_store.h5") self.store = pd.HDFStore(self.path, "w") shared_index = pd.MultiIndex.from_product( @@ -125,6 +124,7 @@ def setUp(self): ) def tearDown(self): + super().tearDown() self.store.close() if os.path.isfile(self.path): os.unlink(self.path) @@ -276,7 +276,7 @@ def test_scores_and_counts_columns_separated_after_join(self): class TestEnrich2ConvertH5Filepath(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.h5") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") self.bin.append(self.path.replace(".h5", "")) @@ -293,12 +293,12 @@ def test_concats_basename_elem_type_then_cnd_and_csv_ext(self): class TestEnrich2ConvertH5Df(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.h5") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") - self.bin.append(os.path.join(TEST_DATA_DIR, "enrich2", "enrich2")) + self.bin.append(os.path.join(self.data_dir, "enrich2", "enrich2")) def test_doesnt_open_invalid_rows_file_if_there_are_no_invalid_rows(self): - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv" @@ -331,7 +331,7 @@ def test_sets_index_as_input_index(self): assert_index_equal(result.index, df.index) def test_opens_invalid_rows_file_for_invalid_rows(self): - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") df = pd.DataFrame(data={"score": [1], "B": ["a"]}, index=["c.1T>G (p.Lys1Val)"]) with self.assertRaises(ValueError): @@ -340,11 +340,13 @@ def test_opens_invalid_rows_file_for_invalid_rows(self): ) fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv" + print(fpath) + print(self.data_dir) self.assertTrue(os.path.isfile(fpath)) self.bin.append(fpath) def test_invalid_rows_file_contains_error_description(self): - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv" @@ -367,7 +369,7 @@ class TestEnrich2ParseInput(ProgramTestCase): def setUp(self): super().setUp() self.wt = "GCTGAT" - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "test_store.h5") + self.path = os.path.join(self.data_dir, "enrich2", "test_store.h5") self.store = pd.HDFStore(self.path, "w") self.enrich2 = enrich2.Enrich2( self.path, wt_sequence=self.wt, offset=0, one_based=True @@ -386,7 +388,7 @@ def setUp(self): self.files = [ os.path.normpath( os.path.join( - TEST_DATA_DIR, + self.data_dir, "enrich2", "test_store", "mavedb_test_store_synonymous_counts_c1.csv", @@ -394,7 +396,7 @@ def setUp(self): ), os.path.normpath( os.path.join( - TEST_DATA_DIR, + self.data_dir, "enrich2", "test_store", "mavedb_test_store_synonymous_counts_c2.csv", @@ -402,7 +404,7 @@ def setUp(self): ), os.path.normpath( os.path.join( - TEST_DATA_DIR, + self.data_dir, "enrich2", "test_store", "mavedb_test_store_synonymous_scores_c1.csv", @@ -410,7 +412,7 @@ def setUp(self): ), os.path.normpath( os.path.join( - TEST_DATA_DIR, + self.data_dir, "enrich2", "test_store", "mavedb_test_store_synonymous_scores_c2.csv", @@ -418,7 +420,7 @@ def setUp(self): ), os.path.normpath( os.path.join( - TEST_DATA_DIR, + self.data_dir, "enrich2", "test_store", "mavedb_test_store_variants_counts_c1.csv", @@ -426,7 +428,7 @@ def setUp(self): ), os.path.normpath( os.path.join( - TEST_DATA_DIR, + self.data_dir, "enrich2", "test_store", "mavedb_test_store_variants_counts_c2.csv", @@ -434,7 +436,7 @@ def setUp(self): ), os.path.normpath( os.path.join( - TEST_DATA_DIR, + self.data_dir, "enrich2", "test_store", "mavedb_test_store_variants_scores_c1.csv", @@ -442,7 +444,7 @@ def setUp(self): ), os.path.normpath( os.path.join( - TEST_DATA_DIR, + self.data_dir, "enrich2", "test_store", "mavedb_test_store_variants_scores_c2.csv", @@ -554,7 +556,7 @@ def parse_rows(self, variants, element=None): @mock.patch.object(pd.DataFrame, "to_csv", return_value=None) def test_saves_to_output_directory(self, patch): - output = os.path.join(TEST_DATA_DIR, "enrich2", "new") + output = os.path.join(self.data_dir, "enrich2", "new") p = enrich2.Enrich2(src=self.store, dst=output, wt_sequence=self.wt, offset=0) p.parse_input(p.load_input_file()) for call_args in patch.call_args_list: @@ -566,7 +568,7 @@ def test_saves_to_file_location_if_no_dst_supplied(self, patch): p = enrich2.Enrich2(src=self.store, wt_sequence=self.wt, offset=0) p.parse_input(self.enrich2.load_input_file()) expected_base_path = os.path.normpath( - os.path.join(TEST_DATA_DIR, "enrich2", "test_store") + os.path.join(self.data_dir, "enrich2", "test_store") ) for call_args in patch.call_args_list: self.assertIn(expected_base_path, call_args[0][0]) @@ -855,15 +857,15 @@ def test_drops_null_rows(self): self.assertNotIn("p.Ala1=", df_scores[constants.pro_variant_col]) -class TestEnrich2LoadInput(TestCase): +class TestEnrich2LoadInput(ProgramTestCase): def test_error_file_not_h5_or_tsv(self): - path = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx") + path = os.path.join(self.data_dir, "empiric", "empiric.xlsx") p = enrich2.Enrich2(path, wt_sequence="AAA") with self.assertRaises(TypeError): p.load_input_file() def test_scores_tsv_missing_score_column(self): - path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") + path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") p = enrich2.Enrich2( path, wt_sequence="AAA", @@ -875,7 +877,7 @@ def test_scores_tsv_missing_score_column(self): p.load_input_file() def test_input_type_counts_doesnt_raise_keyerror(self): - path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") + path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") p = enrich2.Enrich2( path, wt_sequence="AAA", @@ -885,7 +887,7 @@ def test_input_type_counts_doesnt_raise_keyerror(self): p.load_input_file() def test_scores_tsv_missing_hgvs_column(self): - path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") + path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") p = enrich2.Enrich2(path, wt_sequence="AAA", hgvs_column="hgvs") with self.assertRaises(KeyError): p.load_input_file() @@ -894,7 +896,7 @@ def test_scores_tsv_missing_hgvs_column(self): class TestEnrich2ParseRow(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") + self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="ACT") self.bin.append(self.path.replace(".h5", "")) @@ -966,7 +968,7 @@ def test_uses_three_qmarks(self): class TestProteinHGVSParsing(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") + self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") self.bin.append(self.path.replace(".h5", "")) @@ -1026,7 +1028,7 @@ def test_maintains_ordering(self): class TestNucleotideHGVSParing(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") + self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") self.bin.append(self.path.replace(".h5", "")) @@ -1080,7 +1082,7 @@ def test_strips_ws(self): class TestEnrich2MixedHGVSParsing(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") + self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5") self.wt = "ACT" self.wt_aa = constants.AA_CODES[constants.CODON_TABLE[self.wt]] self.enrich2 = enrich2.Enrich2(self.path, wt_sequence=self.wt) @@ -1174,7 +1176,7 @@ def test_protein_set_as_nt_when_table_is_not_syn_and_variant_is_special(self): class TestInferSilentAASub(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") + self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA", offset=0) self.bin.append(self.path.replace(".h5", "")) @@ -1219,7 +1221,7 @@ def test_correctly_infers_aa_from_silent_variants(self): self.assertEqual("p.Leu1=", self.enrich2.infer_silent_aa_substitution(group)) -class TestApplyOffset(TestCase): +class TestApplyOffset(ProgramTestCase): def test_mixed_variant_uses_nt_position_to_compute_codon_pos(self): variant = "c.-9A>T (p.Thr2Pro), c.-6C>A (p.Gln3Lys)" offset = -10 @@ -1253,14 +1255,14 @@ def test_applies_offset_to_protein_variant_modulo_3(self): @mock.patch.object(enrich2.base.BaseProgram, "validate_against_wt_sequence") def test_validates_against_wt_sequence(self, patch): variant = "c.-9C>T" - path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") + path = os.path.join(self.data_dir, "enrich2", "dummy.h5") p = enrich2.Enrich2(path, wt_sequence="ACT") enrich2.apply_offset(variant, offset=-10, enrich2=p) # pass patch.assert_called_with(*("c.1C>T",)) def test_value_error_base_mismatch_after_offset_applied(self): variant = "c.-9G>T" - path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") + path = os.path.join(self.data_dir, "enrich2", "dummy.h5") p = enrich2.Enrich2(path, wt_sequence="ACT") with self.assertRaises(ValueError): enrich2.apply_offset(variant, offset=-10, enrich2=p) @@ -1268,23 +1270,23 @@ def test_value_error_base_mismatch_after_offset_applied(self): @mock.patch.object(enrich2.base.BaseProgram, "validate_against_protein_sequence") def test_validates_against_pro_sequence(self, patch): variant = "p.Gly3Leu" - path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") + path = os.path.join(self.data_dir, "enrich2", "dummy.h5") p = enrich2.Enrich2(path, wt_sequence="ACG") enrich2.apply_offset(variant, offset=6, enrich2=p) # pass patch.assert_called_with(*("p.Gly1Leu",)) def test_value_error_pro_mismatch_after_offset_applied(self): variant = "p.Gly3Leu" - path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5") + path = os.path.join(self.data_dir, "enrich2", "dummy.h5") p = enrich2.Enrich2(path, wt_sequence="ACG") with self.assertRaises(ValueError): enrich2.apply_offset(variant, offset=6, enrich2=p) -class TestEnrich2Init(TestCase): +class TestEnrich2Init(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") def test_error_is_coding_and_offset_not_mult_of_three(self): with self.assertRaises(ValueError): diff --git a/mavedbconvert/tests/test_fasta.py b/mavedbconvert/tests/test_fasta.py index 970120a..c39fca9 100644 --- a/mavedbconvert/tests/test_fasta.py +++ b/mavedbconvert/tests/test_fasta.py @@ -1,48 +1,47 @@ import os from unittest import TestCase -from ..fasta import parse_fasta, split_fasta_path - +from . import ProgramTestCase -TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") +from ..fasta import parse_fasta, split_fasta_path -class TestFastaPath(TestCase): +class TestFastaPath(ProgramTestCase): def test_infers_bzip(self): head, base, ext, compression = split_fasta_path( - os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.bz2") + os.path.join(self.data_dir, "fasta", "wt.fasta.bz2") ) self.assertEqual(ext, ".fasta") self.assertEqual(compression, "bz2") def test_infers_gzip(self): head, base, ext, compression = split_fasta_path( - os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.gz") + os.path.join(self.data_dir, "fasta", "wt.fasta.gz") ) self.assertEqual(ext, ".fasta") self.assertEqual(compression, "gz") def test_infers_uncompressed(self): head, base, ext, compression = split_fasta_path( - os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta") + os.path.join(self.data_dir, "fasta", "wt.fasta") ) self.assertEqual(ext, ".fasta") self.assertEqual(compression, None) head, base, ext, compression = split_fasta_path( - os.path.join(TEST_DATA_DIR, "fasta", "lower.fa") + os.path.join(self.data_dir, "fasta", "lower.fa") ) self.assertEqual(ext, ".fa") self.assertEqual(compression, None) def test_ioerror_invalid_ext(self): with self.assertRaises(IOError): - split_fasta_path(os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")) + split_fasta_path(os.path.join(self.data_dir, "enrich", "enrich.tsv")) -class TestFastaReader(TestCase): +class TestFastaReader(ProgramTestCase): def test_can_read_first_sequence(self): - sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta")) + sequence = parse_fasta(os.path.join(self.data_dir, "fasta", "wt.fasta")) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA" @@ -52,7 +51,7 @@ def test_can_read_first_sequence(self): self.assertEqual(sequence, expected) def test_converts_to_uppercase(self): - sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "lower.fa")) + sequence = parse_fasta(os.path.join(self.data_dir, "fasta", "lower.fa")) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA" @@ -63,11 +62,11 @@ def test_converts_to_uppercase(self): def test_error_more_than_one_sequence(self): with self.assertRaises(ValueError): - parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "two.fasta")) + parse_fasta(os.path.join(self.data_dir, "fasta", "two.fasta")) def test_error_invalid_chars_in_sequence(self): with self.assertRaises(ValueError): - parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "invalid_chars.fasta")) + parse_fasta(os.path.join(self.data_dir, "fasta", "invalid_chars.fasta")) def test_ignores_blank_lines(self): expected = ( @@ -76,15 +75,15 @@ def test_ignores_blank_lines(self): "GAGGCTACGATCGTGCCTTGTGGCAAGTCTTCGCTCGCAC" "GCCCTTCCTACCGTGCTATGAGAGGAAATCTCGGGCGTAA" ) - seq = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "spaces.fasta")) + seq = parse_fasta(os.path.join(self.data_dir, "fasta", "spaces.fasta")) self.assertEqual(seq, expected) def test_error_missing_gt_on_first_line(self): with self.assertRaises(IOError): - parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "bad_format.fasta")) + parse_fasta(os.path.join(self.data_dir, "fasta", "bad_format.fasta")) def test_can_open_with_gzip(self): - sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.gz")) + sequence = parse_fasta(os.path.join(self.data_dir, "fasta", "wt.fasta.gz")) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA" @@ -94,7 +93,7 @@ def test_can_open_with_gzip(self): self.assertEqual(sequence, expected) def test_can_open_with_bzip(self): - sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.bz2")) + sequence = parse_fasta(os.path.join(self.data_dir, "fasta", "wt.fasta.bz2")) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA" From c68e67d2a4dc9265e6f9dfcaa9db40c38c9456ee Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Tue, 3 Sep 2019 14:52:51 +1000 Subject: [PATCH 07/26] black formatting --- mavedbconvert/tests/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mavedbconvert/tests/__init__.py b/mavedbconvert/tests/__init__.py index 07d0191..d89a100 100644 --- a/mavedbconvert/tests/__init__.py +++ b/mavedbconvert/tests/__init__.py @@ -22,7 +22,9 @@ class ProgramTestCase(TestCase): def setUp(self): self._data_dir = TemporaryDirectory() # store the object - self.data_dir = os.path.join(self._data_dir.name, "data") # store the directory path + self.data_dir = os.path.join( + self._data_dir.name, "data" + ) # store the directory path shutil.copytree( src=os.path.join(os.path.dirname(os.path.abspath(__file__)), "data"), dst=self.data_dir, From 4360a1e5339ec570a9920de3675fb70364482025 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Wed, 4 Sep 2019 19:41:42 +1000 Subject: [PATCH 08/26] fixed bug in generating file path for test output --- mavedbconvert/tests/test_enrich2.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py index 1b9503b..d3547f7 100644 --- a/mavedbconvert/tests/test_enrich2.py +++ b/mavedbconvert/tests/test_enrich2.py @@ -300,13 +300,15 @@ def setUp(self): def test_doesnt_open_invalid_rows_file_if_there_are_no_invalid_rows(self): self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") - fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv" + invalid_rows_path = os.path.join( + os.path.dirname(self.path), "enrich2_invalid_rows.csv" + ) df = pd.DataFrame(data={"score": [1]}, index=["c.1A>G (p.Lys1Val)"]) self.enrich2.convert_h5_df( df=df, element=constants.variants_table, df_type=constants.score_type ) - self.assertFalse(os.path.isfile(fpath)) + self.assertFalse(os.path.isfile(invalid_rows_path)) def test_drops_non_numeric_columns(self): df = pd.DataFrame(data={"score": [1], "B": ["a"]}, index=["c.1A>G (p.Lys1Val)"]) @@ -339,16 +341,19 @@ def test_opens_invalid_rows_file_for_invalid_rows(self): df=df, element=constants.variants_table, df_type=constants.score_type ) - fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv" - print(fpath) - print(self.data_dir) - self.assertTrue(os.path.isfile(fpath)) - self.bin.append(fpath) + invalid_rows_path = os.path.join( + os.path.dirname(self.path), "enrich2_invalid_rows.csv" + ) + + self.assertTrue(os.path.isfile(invalid_rows_path)) + self.bin.append(invalid_rows_path) def test_invalid_rows_file_contains_error_description(self): self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") - fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv" + invalid_rows_path = os.path.join( + os.path.dirname(self.path), "enrich2_invalid_rows.csv" + ) df = pd.DataFrame( data={"score": [1.1, 1.2]}, @@ -356,13 +361,13 @@ def test_invalid_rows_file_contains_error_description(self): ) self.enrich2.convert_h5_df(df=df, df_type=constants.score_type, element=None) - self.assertTrue(os.path.isfile(fpath)) + self.assertTrue(os.path.isfile(invalid_rows_path)) - invalid = pd.read_csv(fpath, sep=",", index_col=0) + invalid = pd.read_csv(invalid_rows_path, sep=",", index_col=0) self.assertEqual(len(invalid), 1) self.assertEqual(invalid.index[0], "c.1T>G (p.Lys1Val)") self.assertIn("error_description", invalid.columns) - self.bin.append(fpath) + self.bin.append(invalid_rows_path) class TestEnrich2ParseInput(ProgramTestCase): From 547f11a02958efa7719c60f604a2470a703bdcc0 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Wed, 4 Sep 2019 20:23:35 +1000 Subject: [PATCH 09/26] added some todo notes --- mavedbconvert/tests/__init__.py | 2 ++ mavedbconvert/tests/test_parsers.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mavedbconvert/tests/__init__.py b/mavedbconvert/tests/__init__.py index d89a100..24aa494 100644 --- a/mavedbconvert/tests/__init__.py +++ b/mavedbconvert/tests/__init__.py @@ -19,6 +19,8 @@ ] +# TODO: think up a better name for this class +# TODO: remove the old self.bin stuff class ProgramTestCase(TestCase): def setUp(self): self._data_dir = TemporaryDirectory() # store the object diff --git a/mavedbconvert/tests/test_parsers.py b/mavedbconvert/tests/test_parsers.py index 28560df..b8afeec 100644 --- a/mavedbconvert/tests/test_parsers.py +++ b/mavedbconvert/tests/test_parsers.py @@ -6,7 +6,7 @@ from . import ProgramTestCase - +# TODO: convert these tests to use temp directories TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") From a8dcb856ffa0e0b26cbdb65352c2d78e8be4faab Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Wed, 4 Sep 2019 20:25:34 +1000 Subject: [PATCH 10/26] removed relative imports in tests and added main functions --- mavedbconvert/tests/test_base.py | 9 +++++-- mavedbconvert/tests/test_empiric.py | 14 ++++++---- mavedbconvert/tests/test_enrich.py | 9 +++++-- mavedbconvert/tests/test_enrich2.py | 14 ++++++---- mavedbconvert/tests/test_fasta.py | 10 ++++--- mavedbconvert/tests/test_filters.py | 12 ++++++--- mavedbconvert/tests/test_parsers.py | 30 +++++++++++---------- mavedbconvert/tests/test_utilities.py | 36 ++++++++++++++------------ mavedbconvert/tests/test_validators.py | 22 +++++++++------- 9 files changed, 97 insertions(+), 59 deletions(-) diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py index 722981f..a678e31 100644 --- a/mavedbconvert/tests/test_base.py +++ b/mavedbconvert/tests/test_base.py @@ -1,9 +1,10 @@ import os import mock +import unittest -from .. import base, exceptions +from mavedbconvert import base, exceptions -from . import ProgramTestCase +from mavedbconvert.tests import ProgramTestCase class TestPaths(ProgramTestCase): @@ -241,3 +242,7 @@ def test_index_error_index_extends_beyond_indexable_pro_seq(self): with self.assertRaises(IndexError): self.base.one_based = False self.base.validate_against_protein_sequence("p.Met2Lys") + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py index 937a42b..235a09a 100644 --- a/mavedbconvert/tests/test_empiric.py +++ b/mavedbconvert/tests/test_empiric.py @@ -1,13 +1,13 @@ import os -from unittest import TestCase +import unittest import pandas as pd import numpy as np from pandas.testing import assert_frame_equal, assert_series_equal -from .. import empiric, constants +from mavedbconvert import empiric, constants -from . import ProgramTestCase +from mavedbconvert.tests import ProgramTestCase class TestEmpiricInit(ProgramTestCase): @@ -23,7 +23,7 @@ def test_ok_is_mult_of_three(self): empiric.Empiric(src=self.path, wt_sequence="ATC", offset=3) -class TestInferProEvent(TestCase): +class TestInferProEvent(unittest.TestCase): def test_infers_equal_event(self): self.assertEqual( empiric.infer_pro_substitution(mut_aa="V", wt_aa="v", codon_pos=0), @@ -43,7 +43,7 @@ def test_converts_triple_q_to_Xaa(self): ) -class TestInferNTEvent(TestCase): +class TestInferNTEvent(unittest.TestCase): def test_infers_equal_event(self): self.assertEqual( empiric.infer_nt_substitution(wt_codon="aaa", mut_codon="AAA", codon_pos=0), @@ -499,3 +499,7 @@ def test_integration(self): pd.read_csv(self.empiric.output_file, delimiter=","), pd.read_csv(self.expected, delimiter=","), ) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_enrich.py b/mavedbconvert/tests/test_enrich.py index f18d1e4..dc40baa 100644 --- a/mavedbconvert/tests/test_enrich.py +++ b/mavedbconvert/tests/test_enrich.py @@ -1,12 +1,13 @@ import os +import unittest import pandas as pd import numpy as np from pandas.testing import assert_frame_equal -from .. import enrich, constants, utilities +from mavedbconvert import enrich, constants, utilities -from . import ProgramTestCase +from mavedbconvert.tests import ProgramTestCase WT = ( @@ -331,3 +332,7 @@ def test_output_from_one_based_input(self): result = pd.read_csv(self.bin[1]) expected = pd.read_csv(self.expected) assert_frame_equal(expected, result) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py index d3547f7..f2d9c20 100644 --- a/mavedbconvert/tests/test_enrich2.py +++ b/mavedbconvert/tests/test_enrich2.py @@ -1,6 +1,6 @@ import os import mock -from unittest import TestCase +import unittest from itertools import product import hgvsp @@ -10,9 +10,9 @@ import pandas as pd from pandas.testing import assert_index_equal, assert_frame_equal -from .. import validators, enrich2, constants, exceptions +from mavedbconvert import validators, enrich2, constants, exceptions -from . import ProgramTestCase +from mavedbconvert.tests import ProgramTestCase # Utility tests @@ -74,7 +74,7 @@ def test_returns_empty_when_missing_counts_key(self): self.assertIsNone(cnd_df) -class TestFlattenColumnNames(TestCase): +class TestFlattenColumnNames(unittest.TestCase): def setUp(self): index = pd.MultiIndex.from_product( [["c1", "c2"], ["rep1", "rep2"], ["t0", "t1"]], @@ -166,7 +166,7 @@ def test_assertion_error_scores_shared_scores_different_index(self): enrich2.get_replicate_score_dataframes(self.store) -class TestDropNull(TestCase): +class TestDropNull(unittest.TestCase): def test_calls_drop_na_rows_from_scores_inplace(self): df = pd.DataFrame({"A": [None, 1]}) enrich2.drop_null(df) @@ -1302,3 +1302,7 @@ def test_ok_is_coding_false_and_offset_not_mult_of_three(self): def test_ok_is_coding_and_offset_mult_of_three(self): enrich2.Enrich2(src=self.path, wt_sequence="ATC", is_coding=True, offset=-3) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_fasta.py b/mavedbconvert/tests/test_fasta.py index c39fca9..6c6fafb 100644 --- a/mavedbconvert/tests/test_fasta.py +++ b/mavedbconvert/tests/test_fasta.py @@ -1,9 +1,9 @@ import os -from unittest import TestCase +import unittest -from . import ProgramTestCase +from mavedbconvert.tests import ProgramTestCase -from ..fasta import parse_fasta, split_fasta_path +from mavedbconvert.fasta import parse_fasta, split_fasta_path class TestFastaPath(ProgramTestCase): @@ -101,3 +101,7 @@ def test_can_open_with_bzip(self): "GCCCTTCCTACCGTGCTATGAGAGGAAATCTCGGGCGTAA" ) self.assertEqual(sequence, expected) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_filters.py b/mavedbconvert/tests/test_filters.py index 2b2cbb2..d7b037d 100644 --- a/mavedbconvert/tests/test_filters.py +++ b/mavedbconvert/tests/test_filters.py @@ -1,13 +1,13 @@ -from unittest import TestCase +import unittest import pandas as pd import numpy as np -from .. import filters, constants +from mavedbconvert import filters, constants -class TestDropNaColumns(TestCase): +class TestDropNaColumns(unittest.TestCase): def test_drops_null_nt_column(self): df = pd.DataFrame( { @@ -59,7 +59,7 @@ def test_does_not_drop_column_containing_non_null_values(self): self.assertIn("A", df) -class TestDropNaRows(TestCase): +class TestDropNaRows(unittest.TestCase): def test_drops_null_row(self): df = pd.DataFrame({"A": [None], "B": [np.NaN]}) filters.drop_na_rows(df, inplace=True) @@ -69,3 +69,7 @@ def test_does_not_drop_row_containing_non_null_values(self): df = pd.DataFrame({"A": [None], "B": [0.0]}) filters.drop_na_rows(df, inplace=True) self.assertEqual(len(df), 1) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_parsers.py b/mavedbconvert/tests/test_parsers.py index b8afeec..fabc836 100644 --- a/mavedbconvert/tests/test_parsers.py +++ b/mavedbconvert/tests/test_parsers.py @@ -1,16 +1,16 @@ import os import mock -from unittest import TestCase +import unittest -from .. import parsers, exceptions, constants +from mavedbconvert import parsers, exceptions, constants -from . import ProgramTestCase +from mavedbconvert.tests import ProgramTestCase # TODO: convert these tests to use temp directories TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") -class TestParseBoolean(TestCase): +class TestParseBoolean(unittest.TestCase): def test_true_if_str_of_true(self): self.assertTrue(parsers.parse_boolean(True)) self.assertTrue(parsers.parse_boolean("True")) @@ -22,7 +22,7 @@ def test_false_if_not_repr_of_true(self): self.assertFalse(parsers.parse_boolean(False)) -class TestParseNumeric(TestCase): +class TestParseNumeric(unittest.TestCase): def test_converts_to_dtype(self): self.assertIsInstance( parsers.parse_numeric("1", name="int", dtype=float), float @@ -35,7 +35,7 @@ def test_value_error_cannot_cast_to_dtype(self): parsers.parse_numeric("a", name="value", dtype=int) -class TestParseString(TestCase): +class TestParseString(unittest.TestCase): def test_returns_none_if_falsey(self): self.assertIsNone(parsers.parse_string(None)) self.assertIsNone(parsers.parse_string(" ")) @@ -45,7 +45,7 @@ def test_returns_string_stripped_of_ws(self): self.assertEqual(parsers.parse_string(" aaa "), "aaa") -class TestParseSrc(TestCase): +class TestParseSrc(unittest.TestCase): @mock.patch( "mavedbconvert.parsers.parse_string", return_value=os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv"), @@ -100,7 +100,7 @@ def test_makes_dst_directory_tree(self): self.bin.append(path) -class TestParseProgram(TestCase): +class TestParseProgram(unittest.TestCase): def test_ok_supported_program(self): for p in ("enrich2", "enrich", "empiric"): parsers.parse_program(p) @@ -124,7 +124,7 @@ def test_sets_correct_program_from_dict(self): parsers.parse_program(program) -class TestParseWildTypeSequence(TestCase): +class TestParseWildTypeSequence(unittest.TestCase): def test_can_read_from_fasta(self): path = os.path.join(TEST_DATA_DIR, "fasta", "lower.fa") wtseq = parsers.parse_wt_sequence(path, program="enrich2", non_coding=True) @@ -161,7 +161,7 @@ def test_ok_divisible_by_three_enrich_empiric(self): parsers.parse_wt_sequence("ATGATC", program="empiric") -class TestParseInputType(TestCase): +class TestParseInputType(unittest.TestCase): @mock.patch("mavedbconvert.parsers.parse_string", return_value="counts") def test_calls_parse_string(self, patch): parsers.parse_input_type(constants.count_type) @@ -176,7 +176,7 @@ def test_ok_recognised_input_type(self): parsers.parse_input_type(v) -class TestParseScoreColumn(TestCase): +class TestParseScoreColumn(unittest.TestCase): @mock.patch("mavedbconvert.parsers.parse_string", return_value="score") def test_calls_parse_string(self, patch): parsers.parse_score_column("score", constants.score_type, program="enrich") @@ -210,7 +210,7 @@ def test_ok_enrich2_and_column_not_defined(self): ) -class TestParseOffset(TestCase): +class TestParseOffset(unittest.TestCase): @mock.patch("mavedbconvert.parsers.parse_numeric", return_value=0) def test_calls_parse_numeric(self, patch): parsers.parse_offset(0, program="enrich") @@ -238,7 +238,7 @@ def test_ok_enrich_empiric_offset_mult_of_three(self): self.assertEqual(-6, parsers.parse_offset("-6", "empiric")) -class TestParseDocopt(TestCase): +class TestParseDocopt(unittest.TestCase): @staticmethod def mock_args( program=None, @@ -315,3 +315,7 @@ def test_contains_skip_header_rows_key(self): args = self.mock_args() _, kwargs = parsers.parse_docopt(args) self.assertIn("skip_header_rows", kwargs) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_utilities.py b/mavedbconvert/tests/test_utilities.py index 25b530f..7e1c244 100644 --- a/mavedbconvert/tests/test_utilities.py +++ b/mavedbconvert/tests/test_utilities.py @@ -1,11 +1,11 @@ -from unittest import TestCase +import unittest import numpy as np -from .. import utilities, constants, exceptions +from mavedbconvert import utilities, constants, exceptions -class TestSlicer(TestCase): +class TestSlicer(unittest.TestCase): def test_slicer_returns_chunks_of_size_n(self): self.assertEqual(list(utilities.slicer("aaabbbccc", 3)), ["aaa", "bbb", "ccc"]) @@ -15,7 +15,7 @@ def test_slicer_returns_clips_if_cannot_chunk(self): ) -class TestTranslateWTSequence(TestCase): +class TestTranslateWTSequence(unittest.TestCase): def test_translate_wt_seq_no_offset(self): self.assertEqual(utilities.translate_dna("GTGGCGGAG", offset=0), "VAE") @@ -31,7 +31,7 @@ def test_error_offset_negative(self): utilities.translate_dna("GTGGCGGAG", offset=-3) -class TestIsNull(TestCase): +class TestIsNull(unittest.TestCase): def test_is_null_true_for_none_nan_and_na(self): for v in constants.extra_na: self.assertTrue(utilities.is_null(v)) @@ -46,7 +46,7 @@ def test_is_null_false(self): self.assertFalse(utilities.is_null("1.2")) -class TestFormatColumn(TestCase): +class TestFormatColumn(unittest.TestCase): def test_replaces_null_with_nan(self): self.assertIs(utilities.format_column([" "])[0], np.NaN) self.assertIs(utilities.format_column(["none"])[0], np.NaN) @@ -67,7 +67,7 @@ def test_replaces_null_with_none_if_astype_is_not_int_or_float(self): self.assertIs(utilities.format_column(["none"], astype=str)[0], None) -class TestIsNumeric(TestCase): +class TestIsNumeric(unittest.TestCase): def test_true_for_float(self): self.assertTrue(utilities.is_numeric(float)) @@ -90,7 +90,7 @@ def test_false_for_np_object(self): self.assertFalse(utilities.is_numeric(np.object)) -class TestNucleotideSubstitutionEvent(TestCase): +class TestNucleotideSubstitutionEvent(unittest.TestCase): def test_parses_negative_positions(self): nt = utilities.NucleotideSubstitutionEvent("n.-100A>T") self.assertEqual(nt.position, -100) @@ -166,7 +166,7 @@ def test_infers_within_frame_position(self): ) -class TestProteinSubstitutionEvent(TestCase): +class TestProteinSubstitutionEvent(unittest.TestCase): def test_error_set_position_less_than_1(self): pro = utilities.ProteinSubstitutionEvent("p.Gly4Leu") with self.assertRaises(ValueError): @@ -208,7 +208,7 @@ def test_formats_event_string_correctly(self): ) -class TestSplitVariant(TestCase): +class TestSplitVariant(unittest.TestCase): def test_split_hgvs_singular_list_non_multi_variant(self): self.assertListEqual(["c.100A>G"], utilities.split_variant("c.100A>G")) @@ -218,7 +218,7 @@ def test_split_hgvs_returns_list_of_single_variants(self): ) -class TestNormalizeVariant(TestCase): +class TestNormalizeVariant(unittest.TestCase): def test_stripts_white_space(self): self.assertEqual(utilities.normalize_variant(" c.1A>G "), "c.1A>G") @@ -256,7 +256,7 @@ def test_replaces_X_with_N_in_rna_variant(self): ) -class TestFormatVariant(TestCase): +class TestFormatVariant(unittest.TestCase): def test_stripts_white_space(self): self.assertEqual(utilities.format_variant(" c.1A>G "), "c.1A>G") @@ -264,7 +264,7 @@ def test_passes_on_none(self): self.assertIsNone(utilities.format_variant(None)) -class TestHGVSProFromEventList(TestCase): +class TestHGVSProFromEventList(unittest.TestCase): def test_returns_single_event(self): result = utilities.hgvs_pro_from_event_list(["L4V"]) self.assertEqual(result, "p.L4V") @@ -292,7 +292,7 @@ def test_error_invalid_hgvs(self): utilities.hgvs_pro_from_event_list(["aaaa"]) -class TestHGVSNTFromEventList(TestCase): +class TestHGVSNTFromEventList(unittest.TestCase): def test_returns_single_event(self): result = utilities.hgvs_nt_from_event_list(["45A>G"], prefix="c") self.assertEqual(result, "c.45A>G") @@ -316,7 +316,7 @@ def test_error_invalid_hgvs(self): utilities.hgvs_nt_from_event_list(["aaaa"], prefix="c") -class TestNonHgvsColumns(TestCase): +class TestNonHgvsColumns(unittest.TestCase): def test_returns_non_hgvs_columns(self): self.assertListEqual( ["score"], @@ -328,7 +328,7 @@ def test_returns_non_hgvs_columns(self): ) -class TestHgvsColumns(TestCase): +class TestHgvsColumns(unittest.TestCase): def test_returns_only_hgvs_columns(self): self.assertListEqual( [constants.nt_variant_col, constants.pro_variant_col], @@ -338,3 +338,7 @@ def test_returns_only_hgvs_columns(self): ) ), ) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_validators.py b/mavedbconvert/tests/test_validators.py index bad4735..301d4a6 100644 --- a/mavedbconvert/tests/test_validators.py +++ b/mavedbconvert/tests/test_validators.py @@ -1,13 +1,13 @@ -from unittest import TestCase +import unittest import pandas as pd from hgvs.sequencevariant import SequenceVariant -from .. import validators, constants, exceptions +from mavedbconvert import validators, constants, exceptions -class TestHGVSPatternsBackend(TestCase): +class TestHGVSPatternsBackend(unittest.TestCase): def setUp(self): self.backend = validators.HGVSPatternsBackend() @@ -25,7 +25,7 @@ def test_returns_str_variant(self): self.assertIsInstance(self.backend.validate("c.1A>G"), str) -class TestHGVSBiocommonsBackend(TestCase): +class TestHGVSBiocommonsBackend(unittest.TestCase): def setUp(self): self.backend = validators.HGVSBiocommonsBackend("NM_000000001.1") @@ -68,7 +68,7 @@ def test_validate_hgvs_uses_dummy_ref_if_transcript_not_passed(self): ) -class TestValidateHGVS(TestCase): +class TestValidateHGVS(unittest.TestCase): def test_uses_biocommons_backend_if_transcript_provided(self): result = validators.validate_variants( ["c.[1A>G;2A>G]"], n_jobs=2, verbose=0, transcript=constants.dummy_ref @@ -80,7 +80,7 @@ def test_uses_patterns_backend_as_default(self): self.assertIsInstance(result[0], str) -class TestDfValidators(TestCase): +class TestDfValidators(unittest.TestCase): def test_validate_column_raise_keyerror_column_not_exist(self): df = pd.DataFrame({"a": [1]}) with self.assertRaises(KeyError): @@ -100,7 +100,7 @@ def test_pass_all_numeric(self): validators.validate_columns_are_numeric(df) -class TestHGVSValidators(TestCase): +class TestHGVSValidators(unittest.TestCase): def test_validate_hgvs_nt_not_redef_raise_error_if_redefined(self): df = pd.DataFrame({constants.nt_variant_col: ["a", "b"]}) validators.validate_hgvs_nt_uniqueness(df) # Should pass @@ -124,7 +124,7 @@ def test_validate_hgvs_pro_not_redef_ignores_none(self): validators.validate_hgvs_pro_uniqueness(df) # Should pass -class TestMaveDBCompliance(TestCase): +class TestMaveDBCompliance(unittest.TestCase): def test_error_primary_column_contains_null(self): df = pd.DataFrame( { @@ -207,7 +207,7 @@ def test_keyerror_missing_score_column_df_type_is_scores(self): validators.validate_mavedb_compliance(df, df_type=constants.score_type) -class TestValidateSameVariants(TestCase): +class TestValidateSameVariants(unittest.TestCase): def test_ve_counts_defines_different_nt_variants(self): scores = pd.DataFrame( { @@ -260,3 +260,7 @@ def test_error_dfs_define_different_hgvs_columns(self): counts = pd.DataFrame({constants.pro_variant_col: ["p.Leu75Glu"]}) with self.assertRaises(AssertionError): validators.validate_datasets_define_same_variants(scores, counts) + + +if __name__ == "__main__": + unittest.main() From 03b48a2bddeeff6303c450c20169510091e24fad Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 14 Oct 2019 14:47:07 +1100 Subject: [PATCH 11/26] refactored file path names --- mavedbconvert/tests/test_empiric.py | 46 ++++++++++++++--------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py index 235a09a..44310a9 100644 --- a/mavedbconvert/tests/test_empiric.py +++ b/mavedbconvert/tests/test_empiric.py @@ -352,20 +352,20 @@ def test_keeps_int_type_as_int(self): class TestEmpiricLoadInput(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(self.data_dir, "empiric", "empiric.xlsx") - self.tmp_path = os.path.join(self.data_dir, "empiric", "tmp.csv") - self.tmp_path_tsv = os.path.join(self.data_dir, "empiric", "tmp.tsv") - self.tmp_excel_path = os.path.join(self.data_dir, "empiric", "tmp.xlsx") + self.excel_path = os.path.join(self.data_dir, "empiric", "empiric.xlsx") + self.csv_path = os.path.join(self.data_dir, "empiric", "tmp.csv") + self.tsv_path = os.path.join(self.data_dir, "empiric", "tmp.tsv") + self.multisheet_excel_path = os.path.join(self.data_dir, "empiric", "tmp.xlsx") self.bin.append(self.tmp_path) self.bin.append(self.tmp_path_tsv) def test_extra_na_load_as_nan(self): for value in constants.extra_na: - df = pd.read_excel(self.path) + df = pd.read_excel(self.excel_path) df["A"] = [value] * len(df) - df.to_csv(self.tmp_path, index=False) + df.to_csv(self.csv_path, index=False) e = empiric.Empiric( - src=self.tmp_path, + src=self.csv_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, @@ -380,9 +380,9 @@ def test_loads_first_sheet_by_default(self): {"Position": [0], "Amino Acid": ["K"], "score": [1.2]}, {"Position": [1], "Amino Acid": ["G"], "score": [1.4]}, ] - self.mock_multi_sheet_excel_file(self.tmp_excel_path, data) + self.mock_multi_sheet_excel_file(self.multisheet_excel_path, data) p = empiric.Empiric( - src=self.tmp_excel_path, + src=self.multisheet_excel_path, wt_sequence="TTTTCTTATTGT", score_column="score", input_type=constants.score_type, @@ -392,10 +392,10 @@ def test_loads_first_sheet_by_default(self): assert_frame_equal(df, expected) def test_handles_csv(self): - df = pd.read_excel(self.path) - df.to_csv(self.tmp_path, index=False, sep=",") + df = pd.read_excel(self.excel_path) + df.to_csv(self.csv_path, index=False, sep=",") e = empiric.Empiric( - src=self.tmp_path, + src=self.csv_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, @@ -405,10 +405,10 @@ def test_handles_csv(self): assert_frame_equal(result, df) def test_handles_tsv(self): - df = pd.read_excel(self.path) - df.to_csv(self.tmp_path_tsv, index=False, sep="\t") + df = pd.read_excel(self.excel_path) + df.to_csv(self.tsv_path, index=False, sep="\t") e = empiric.Empiric( - src=self.tmp_path_tsv, + src=self.tsv_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, @@ -418,12 +418,12 @@ def test_handles_tsv(self): assert_frame_equal(result, df) def test_error_position_not_in_columns(self): - df = pd.read_excel(self.path) + df = pd.read_excel(self.excel_path) df = df.drop(columns=["Position"]) - df.to_csv(self.tmp_path, index=False, sep="\t") + df.to_csv(self.csv_path, index=False, sep="\t") with self.assertRaises(ValueError): e = empiric.Empiric( - src=self.tmp_path, + src=self.csv_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, @@ -432,12 +432,12 @@ def test_error_position_not_in_columns(self): e.load_input_file() def test_error_amino_acid_not_in_columns(self): - df = pd.read_excel(self.path) + df = pd.read_excel(self.excel_path) df = df.drop(columns=["Amino Acid"]) - df.to_csv(self.tmp_path, index=False, sep="\t") + df.to_csv(self.csv_path, index=False, sep="\t") with self.assertRaises(ValueError): e = empiric.Empiric( - src=self.tmp_path, + src=self.csv_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, @@ -448,7 +448,7 @@ def test_error_amino_acid_not_in_columns(self): def test_not_scores_column_but_input_type_is_scores(self): with self.assertRaises(ValueError): empiric.Empiric( - src=self.tmp_path, + src=self.csv_path, wt_sequence="TTTTCTTATTGT", score_column=None, input_type=constants.score_type, @@ -457,7 +457,7 @@ def test_not_scores_column_but_input_type_is_scores(self): def test_applies_offset_to_position_column(self): e = empiric.Empiric( - src=self.path, + src=self.excel_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, From 1efefb11842b59e94a1f082218f9f0b63aabc4b6 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 14 Oct 2019 14:47:31 +1100 Subject: [PATCH 12/26] removed unnecessary temp file tracking --- mavedbconvert/tests/test_empiric.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py index 44310a9..7c5080c 100644 --- a/mavedbconvert/tests/test_empiric.py +++ b/mavedbconvert/tests/test_empiric.py @@ -356,8 +356,6 @@ def setUp(self): self.csv_path = os.path.join(self.data_dir, "empiric", "tmp.csv") self.tsv_path = os.path.join(self.data_dir, "empiric", "tmp.tsv") self.multisheet_excel_path = os.path.join(self.data_dir, "empiric", "tmp.xlsx") - self.bin.append(self.tmp_path) - self.bin.append(self.tmp_path_tsv) def test_extra_na_load_as_nan(self): for value in constants.extra_na: From ea5f62731aaa0546dbb62c40333fe3ea63562e1a Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 14 Oct 2019 14:48:58 +1100 Subject: [PATCH 13/26] refactored file path names --- mavedbconvert/tests/test_empiric.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py index 7c5080c..ceeaac2 100644 --- a/mavedbconvert/tests/test_empiric.py +++ b/mavedbconvert/tests/test_empiric.py @@ -469,10 +469,10 @@ def test_applies_offset_to_position_column(self): class TestEmpiricConvert(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(self.data_dir, "empiric", "empiric.xlsx") + self.excel_path = os.path.join(self.data_dir, "empiric", "empiric.xlsx") self.expected = os.path.join(self.data_dir, "empiric", "empiric_expected.csv") self.empiric = empiric.Empiric( - src=self.path, + src=self.excel_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, @@ -486,7 +486,7 @@ def test_saves_to_dst(self): def test_integration(self): self.empiric = empiric.Empiric( - src=self.path, + src=self.excel_path, wt_sequence="TCTTATTGT", score_column="col_A", input_type=constants.score_type, From 3812f660fc89f8c3ca730a20e7c2ec28c411c4f5 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 14 Oct 2019 14:49:11 +1100 Subject: [PATCH 14/26] removed unnecessary temp file tracking --- mavedbconvert/tests/test_empiric.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py index ceeaac2..f68feed 100644 --- a/mavedbconvert/tests/test_empiric.py +++ b/mavedbconvert/tests/test_empiric.py @@ -478,7 +478,6 @@ def setUp(self): input_type=constants.score_type, one_based=False, ) - self.bin.append(self.empiric.output_file) def test_saves_to_dst(self): self.empiric.convert() From 82cce0b0c1f16de714860408b98b34f44f9d540a Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 14 Oct 2019 15:35:52 +1100 Subject: [PATCH 15/26] don't print the pandas row numbers when generating excel test files --- mavedbconvert/tests/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mavedbconvert/tests/__init__.py b/mavedbconvert/tests/__init__.py index 24aa494..4f9493c 100644 --- a/mavedbconvert/tests/__init__.py +++ b/mavedbconvert/tests/__init__.py @@ -37,7 +37,7 @@ def mock_multi_sheet_excel_file(self, path, data): writer = pd.ExcelWriter(path, engine="xlsxwriter") for i, di in enumerate(data): df = pd.DataFrame(di) - df.to_excel(writer, sheet_name="Sheet{}".format(i)) + df.to_excel(writer, sheet_name="Sheet{}".format(i), index=False) writer.save() self.bin.append(path) From a08fc7b320da7df092d28adf433d418d4d539045 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 30 Mar 2020 16:11:53 +1100 Subject: [PATCH 16/26] added travis config --- .travis.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..cb1cab5 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,29 @@ +language: python +matrix: + include: + - python: "3.6" + - python: "3.7" + - python: "3.8" + - python: "3.8-dev" + - python: "3.9-dev" + - python: "pypy3" + env: NO_MYPY=true + allow_failures: + - python: "3.8-dev" + - python: "3.9-dev" + - python: "pypy3" + env: NO_MYPY=true +install: + - pip3 install . +before_script: + - pip3 install coverage + - pip3 install coveralls + - if ! $NO_MYPY; then pip3 install mypy; fi + - pip3 install sphinx + - pip3 install sphinx-rtd-theme +script: + - coverage run --source fqfa -m unittest + - if ! $NO_MYPY; then mypy fqfa tests; fi + - cd docs && make doctest +after_success: + - coveralls From 7e26a39dc526f6a8bcc5950e38df33e598375ea7 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 30 Mar 2020 16:26:00 +1100 Subject: [PATCH 17/26] updated setup.py details --- setup.py | 50 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/setup.py b/setup.py index e236978..ab4722f 100644 --- a/setup.py +++ b/setup.py @@ -1,17 +1,43 @@ -from setuptools import setup +import setuptools -setup( +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( name="mavedbconvert", - version="0.6.0-alpha", - packages=["mavedbconvert", "mavedbconvert.tests"], - url="https://github.com/FowlerLab/mavedb-convert", - license="AGPLv3", - author="Daniel Esposito", - author_email="esposito.d@wehi.edu.au", + version="0.1.0-beta", + author="Alan F Rubin, Daniel Esposito", + author_email="alan.rubin@wehi.edu.au", description=( - "A command line tool for converting alternate " - "file formats into a MaveDB compliant format." + "A command line tool for converting Multiplex Assay of Variant Effect datasets into a MaveDB-ready format." ), - # install_requires=open("requirements/install.txt", "rt").read().split("\n"), - entry_points={"console_scripts": ["mavedb-convert=mavedbconvert.main:main"]}, + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/VariantEffect/mavedbconvert", + packages=setuptools.find_packages(), + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Programming Language :: Python :: 3", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + ], + python_requires=">=3.6", + install_requires=[ + "tables>=3.2.0", + "pandas>=0.18.0", + "xlrd >= 0.9.0", + "tqdm", + "docopt", + "hgvsp @ git+https://github.com/FowlerLab/hgvs-patterns", + "hgvs", + "requests", + "numpy", + "scipy", + "joblib", + "xlsxwriter", + ], + entry_points={"console_scripts": ["mavedbconvert=mavedbconvert.main:main"]}, + test_suite="tests", ) From f23dd740319a7bfec1c1363d90047f169b2a1ab5 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 30 Mar 2020 17:06:02 +1100 Subject: [PATCH 18/26] renamed entrypoint --- mavedbconvert/main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mavedbconvert/main.py b/mavedbconvert/main.py index 2cb84c8..5df79d3 100644 --- a/mavedbconvert/main.py +++ b/mavedbconvert/main.py @@ -12,11 +12,11 @@ All outputs are in 1-based coordinates. Usage: - mavedb-convert enrich2 [--dst=D] [--wtseq=W] [--offset=O] [--hgvs-column=A] [--input-type=T] [--skip-header=H] [--skip-footer=H] [--non-coding] - mavedb-convert enrich [--dst=D] [--wtseq=W] [--offset=O] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H] - mavedb-convert empiric [--dst=D] [--wtseq=W] [--offset=O] [--zero-based] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H] - mavedb-convert -h | --help - mavedb-convert --version + mavedbconvert enrich2 [--dst=D] [--wtseq=W] [--offset=O] [--hgvs-column=A] [--input-type=T] [--skip-header=H] [--skip-footer=H] [--non-coding] + mavedbconvert enrich [--dst=D] [--wtseq=W] [--offset=O] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H] + mavedbconvert empiric [--dst=D] [--wtseq=W] [--offset=O] [--zero-based] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H] + mavedbconvert -h | --help + mavedbconvert --version Options: From 6e8189929166f34eee4372a563587b50749dacf4 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 30 Mar 2020 17:10:05 +1100 Subject: [PATCH 19/26] use unittest.mock instead of requiring mock --- mavedbconvert/tests/test_base.py | 6 +++--- mavedbconvert/tests/test_enrich2.py | 20 ++++++++++---------- mavedbconvert/tests/test_parsers.py | 12 ++++++------ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py index a678e31..7b356b3 100644 --- a/mavedbconvert/tests/test_base.py +++ b/mavedbconvert/tests/test_base.py @@ -1,6 +1,6 @@ import os -import mock import unittest +from unittest.mock import patch from mavedbconvert import base, exceptions @@ -49,12 +49,12 @@ def test_creates_directory_tree_if_it_doesnt_exist(self): self.assertTrue(os.path.isdir(output)) self.bin.append(output) - @mock.patch("os.access") + @patch("os.access") def test_checks_read_permission(self, patch): p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA") self.assertEqual(patch.call_args_list[0][0], (p.src, os.R_OK)) - @mock.patch("os.access") + @patch("os.access") def test_checks_write_permission(self, patch): p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA") self.assertEqual(patch.call_args_list[1][0], (p.dst, os.W_OK)) diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py index f2d9c20..02ac246 100644 --- a/mavedbconvert/tests/test_enrich2.py +++ b/mavedbconvert/tests/test_enrich2.py @@ -1,6 +1,6 @@ import os -import mock import unittest +from unittest.mock import patch from itertools import product import hgvsp @@ -559,7 +559,7 @@ def tearDown(self): def parse_rows(self, variants, element=None): return [self.enrich2.parse_row((v, element)) for v in list(variants)] - @mock.patch.object(pd.DataFrame, "to_csv", return_value=None) + @patch.object(pd.DataFrame, "to_csv", return_value=None) def test_saves_to_output_directory(self, patch): output = os.path.join(self.data_dir, "enrich2", "new") p = enrich2.Enrich2(src=self.store, dst=output, wt_sequence=self.wt, offset=0) @@ -568,7 +568,7 @@ def test_saves_to_output_directory(self, patch): self.assertIn(output, call_args[0][0]) self.bin.append(output) - @mock.patch.object(pd.DataFrame, "to_csv", return_value=None) + @patch.object(pd.DataFrame, "to_csv", return_value=None) def test_saves_to_file_location_if_no_dst_supplied(self, patch): p = enrich2.Enrich2(src=self.store, wt_sequence=self.wt, offset=0) p.parse_input(self.enrich2.load_input_file()) @@ -578,13 +578,13 @@ def test_saves_to_file_location_if_no_dst_supplied(self, patch): for call_args in patch.call_args_list: self.assertIn(expected_base_path, call_args[0][0]) - @mock.patch("mavedbconvert.enrich2.get_replicate_score_dataframes") + @patch("mavedbconvert.enrich2.get_replicate_score_dataframes") def test_iterates_over_all_available_tables(self, patch): self.enrich2.parse_input(self.enrich2.load_input_file()) self.assertIn(constants.synonymous_table, patch.call_args_list[0][0]) self.assertIn(constants.variants_table, patch.call_args_list[1][0]) - @mock.patch( + @patch( "mavedbconvert.enrich2.drop_null", side_effect=lambda scores_df, counts_df: (scores_df, counts_df), ) @@ -928,7 +928,7 @@ def test_nt_variant_is_none_special_variant_is_from_synonymous_table(self): ), ) - @mock.patch("mavedbconvert.enrich2.apply_offset", return_value="c.3T>C (p.Thr1=)") + @patch("mavedbconvert.enrich2.apply_offset", return_value="c.3T>C (p.Thr1=)") def test_calls_apply_offset_to_variant(self, patch): variant = "c.3T>C (p.=)" self.enrich2.parse_row((variant, None)) @@ -1123,7 +1123,7 @@ def test_variant_order_maintained(self): self.assertEqual(nt, "c.[1=;6T>G;2A>T]") self.assertEqual(pro, "p.[Lys1Ile;Asn2Lys]") - @mock.patch.object( + @patch.object( enrich2.Enrich2, "infer_silent_aa_substitution", return_value="p.Lys1=" ) def test_groups_codons(self, patch): @@ -1132,7 +1132,7 @@ def test_groups_codons(self, patch): _, _ = self.enrich2.parse_mixed_variant(variant) patch.assert_called_with(*(["c.1=", "c.2="], variant)) - @mock.patch.object( + @patch.object( enrich2.Enrich2, "infer_silent_aa_substitution", return_value="p.Lys1=" ) def test_calls_infer_with_synonymous_variants_only(self, patch): @@ -1257,7 +1257,7 @@ def test_applies_offset_to_protein_variant_modulo_3(self): self.assertEqual("p.Leu7=, p.Leu10=", enrich2.apply_offset(variant, offset)) self.assertEqual("p.Leu7=", enrich2.apply_offset("p.Leu10=", offset)) - @mock.patch.object(enrich2.base.BaseProgram, "validate_against_wt_sequence") + @patch.object(enrich2.base.BaseProgram, "validate_against_wt_sequence") def test_validates_against_wt_sequence(self, patch): variant = "c.-9C>T" path = os.path.join(self.data_dir, "enrich2", "dummy.h5") @@ -1272,7 +1272,7 @@ def test_value_error_base_mismatch_after_offset_applied(self): with self.assertRaises(ValueError): enrich2.apply_offset(variant, offset=-10, enrich2=p) - @mock.patch.object(enrich2.base.BaseProgram, "validate_against_protein_sequence") + @patch.object(enrich2.base.BaseProgram, "validate_against_protein_sequence") def test_validates_against_pro_sequence(self, patch): variant = "p.Gly3Leu" path = os.path.join(self.data_dir, "enrich2", "dummy.h5") diff --git a/mavedbconvert/tests/test_parsers.py b/mavedbconvert/tests/test_parsers.py index fabc836..f0e3bae 100644 --- a/mavedbconvert/tests/test_parsers.py +++ b/mavedbconvert/tests/test_parsers.py @@ -1,6 +1,6 @@ import os -import mock import unittest +from unittest.mock import patch from mavedbconvert import parsers, exceptions, constants @@ -46,7 +46,7 @@ def test_returns_string_stripped_of_ws(self): class TestParseSrc(unittest.TestCase): - @mock.patch( + @patch( "mavedbconvert.parsers.parse_string", return_value=os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv"), ) @@ -74,7 +74,7 @@ def test_error_file_is_a_dir(self): class TestParseDst(ProgramTestCase): - @mock.patch( + @patch( "mavedbconvert.parsers.parse_string", return_value=os.path.join(TEST_DATA_DIR) ) def test_calls_parse_string(self, patch): @@ -162,7 +162,7 @@ def test_ok_divisible_by_three_enrich_empiric(self): class TestParseInputType(unittest.TestCase): - @mock.patch("mavedbconvert.parsers.parse_string", return_value="counts") + @patch("mavedbconvert.parsers.parse_string", return_value="counts") def test_calls_parse_string(self, patch): parsers.parse_input_type(constants.count_type) patch.assert_called() @@ -177,7 +177,7 @@ def test_ok_recognised_input_type(self): class TestParseScoreColumn(unittest.TestCase): - @mock.patch("mavedbconvert.parsers.parse_string", return_value="score") + @patch("mavedbconvert.parsers.parse_string", return_value="score") def test_calls_parse_string(self, patch): parsers.parse_score_column("score", constants.score_type, program="enrich") patch.assert_called() @@ -211,7 +211,7 @@ def test_ok_enrich2_and_column_not_defined(self): class TestParseOffset(unittest.TestCase): - @mock.patch("mavedbconvert.parsers.parse_numeric", return_value=0) + @patch("mavedbconvert.parsers.parse_numeric", return_value=0) def test_calls_parse_numeric(self, patch): parsers.parse_offset(0, program="enrich") patch.assert_called() From fb7f2ca03e49f618464106885b22fdc692be2c91 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 30 Mar 2020 17:10:33 +1100 Subject: [PATCH 20/26] requirements files no longer needed --- requirements/dev.txt | 8 -------- requirements/install.txt | 12 ------------ 2 files changed, 20 deletions(-) delete mode 100644 requirements/dev.txt delete mode 100644 requirements/install.txt diff --git a/requirements/dev.txt b/requirements/dev.txt deleted file mode 100644 index e003b59..0000000 --- a/requirements/dev.txt +++ /dev/null @@ -1,8 +0,0 @@ -black -ipython -pylint -tox -pytest -pytest-sugar -pytest-cov -mock \ No newline at end of file diff --git a/requirements/install.txt b/requirements/install.txt deleted file mode 100644 index a1f1556..0000000 --- a/requirements/install.txt +++ /dev/null @@ -1,12 +0,0 @@ -tables>=3.2.0 -pandas>=0.18.0,<=0.24.0 -xlrd >= 0.9.0 -tqdm -docopt -git+https://github.com/FowlerLab/hgvs-patterns.git -hgvs -requests -numpy -scipy -joblib -xlsxwriter \ No newline at end of file From d251b6883e97422cb0f4d2e6fcfd2f6dd9aba03e Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 30 Mar 2020 17:17:48 +1100 Subject: [PATCH 21/26] added expected failures for lingering Enrich2 converter issues --- mavedbconvert/tests/test_enrich2.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py index 02ac246..384051d 100644 --- a/mavedbconvert/tests/test_enrich2.py +++ b/mavedbconvert/tests/test_enrich2.py @@ -49,16 +49,22 @@ def tearDown(self): if os.path.isfile(self.path): os.unlink(self.path) + # TODO: pandas deprecation fix + @unittest.expectedFailure def test_column_names_combine_selection_and_timepoint(self): cnd_df = enrich2.get_count_dataframe_by_condition(self.store, cnd="c1") self.assertListEqual( list(cnd_df.columns), ["rep1_t0", "rep1_t1", "rep2_t0", "rep2_t1"] ) + # TODO: pandas deprecation fix + @unittest.expectedFailure def test_index_of_dfs_match_index_of_scores(self): cnd_df = enrich2.get_count_dataframe_by_condition(self.store, cnd="c1") assert_index_equal(self.store["/main/variants/scores/"].index, cnd_df.index) + # TODO: pandas deprecation fix + @unittest.expectedFailure def test_row_filled_with_nans_filtered_index_not_in_counts(self): cnd_df = enrich2.get_count_dataframe_by_condition(self.store, cnd="c1") self.assertTrue(np.all(cnd_df.loc["c.3A>G", :].isnull())) @@ -808,6 +814,8 @@ def test_outputs_expected_variants_scores_for_each_condition(self): ].values.astype(float) assert_frame_equal(result, expected) + # TODO: pandas deprecation fix + @unittest.expectedFailure def test_counts_and_scores_output_define_same_variants_when_input_does_not(self): self.store.close() self.store = pd.HDFStore(self.path, "w") @@ -869,6 +877,8 @@ def test_error_file_not_h5_or_tsv(self): with self.assertRaises(TypeError): p.load_input_file() + # TODO: sort out reason why hardcoding Enrich2 score column was a fix + @unittest.expectedFailure def test_scores_tsv_missing_score_column(self): path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") p = enrich2.Enrich2( From dd59f2495d9dca1a91c8058787e148a9dd3d2b06 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 30 Mar 2020 17:20:59 +1100 Subject: [PATCH 22/26] removed extraneous docs references --- .travis.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index cb1cab5..205f79c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,11 +19,8 @@ before_script: - pip3 install coverage - pip3 install coveralls - if ! $NO_MYPY; then pip3 install mypy; fi - - pip3 install sphinx - - pip3 install sphinx-rtd-theme script: - coverage run --source fqfa -m unittest - if ! $NO_MYPY; then mypy fqfa tests; fi - - cd docs && make doctest after_success: - coveralls From 25cc364ddd4c260c74afcdc0787ce34b816aae69 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 30 Mar 2020 17:23:15 +1100 Subject: [PATCH 23/26] updated name and instructions --- README.md | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index d12bdc4..fb1d7c3 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,21 @@ -# mavedb-convert -A command line tool for converting alternate file formats into a MaveDB compliant format. +# mavedbconvert +A command line tool for converting Multiplex Assay of Variant Effect datasets into a MaveDB-ready format. # Installation -Download the `mavedb-convert` source and navigate to that directory. +Download the mavedbconvert source and navigate to that directory. We recommend creating a [virtual environment](https://docs.python.org/3/library/venv.html) before proceeding with the installation. -Install dependencies using the requirements file and then install the package: +Install the package using pip: - pip3 install -r requirements/install.txt pip3 install . -Additional requirements needed for running the unit tests and doing package development are in `reuirements/dev.txt` - ## Troubleshooting -If you are a OSX user, you may experience header related issues when installing `pysam`. The current workaround -is to install pysam version `0.13` manually before installing the requirements: +If you are a OSX user, you may experience header related issues when installing pysam. The current workaround +is to install pysam v0.13 manually before installing the requirements: - pip install pysam==0.13 + pip3 install pysam==0.13 This is the latest version known to compile without errors. -Although `pysam` is not required for `mavedb-convert` directly, it is installed by some of our dependencies. Until it is removed or made optional by those libraries, `mavedb-convert` will unfortunately not be installable on Windows. +Although pysam is not required for mavedbconvert directly, it is installed by some of our dependencies. +Until it is removed or made optional by those libraries, mavedbconvert will unfortunately not be installable on Windows. From 3e2a2b543d281a3beb5f333a64a73f618ef1a7ca Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 30 Mar 2020 17:26:38 +1100 Subject: [PATCH 24/26] fixed incorrect package name for coverage and mypy --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 205f79c..ba833e5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,7 @@ before_script: - pip3 install coveralls - if ! $NO_MYPY; then pip3 install mypy; fi script: - - coverage run --source fqfa -m unittest - - if ! $NO_MYPY; then mypy fqfa tests; fi + - coverage run --source mavedbconvert -m unittest + - if ! $NO_MYPY; then mypy mavedbconvert tests; fi after_success: - coveralls From 8f67a2fa4622e4791d7e3b9b91674ca7e00ccf56 Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Mon, 30 Mar 2020 17:36:34 +1100 Subject: [PATCH 25/26] added badges for master branch --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index fb1d7c3..dbc1b07 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +[![Build Status](https://travis-ci.com/VariantEffect/mavedbconvert.svg?branch=master)](https://travis-ci.com/VariantEffect/mavedbconvert) +[![Coverage Status](https://coveralls.io/repos/github/VariantEffect/mavedbconvert/badge.svg?branch=master)](https://coveralls.io/github/VariantEffect/mavedbconvert?branch=master) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + # mavedbconvert A command line tool for converting Multiplex Assay of Variant Effect datasets into a MaveDB-ready format. From b991b7fb29b6c69b54e949366ee8843762503ebd Mon Sep 17 00:00:00 2001 From: Alan Rubin Date: Wed, 1 Apr 2020 15:57:39 +1100 Subject: [PATCH 26/26] resolved expected failures --- mavedbconvert/enrich2.py | 7 +++++-- mavedbconvert/tests/test_enrich2.py | 10 ---------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/mavedbconvert/enrich2.py b/mavedbconvert/enrich2.py index d99ffe6..2263976 100644 --- a/mavedbconvert/enrich2.py +++ b/mavedbconvert/enrich2.py @@ -241,7 +241,10 @@ def get_count_dataframe_by_condition( return None filtered = store["/main/{}/scores".format(element)].index - df = store[count_key].loc[filtered, idx[cnd, :, :]] + # TODO: revisit tests to see if preserving the all-NA rows makes sense + store_df = store[count_key] + store_df = store_df.reindex(filtered) + df = store_df.loc[filtered, idx[cnd, :, :]] df.columns = flatten_column_names(df.columns, (1, 2)) return df @@ -275,7 +278,7 @@ def __init__( skip_header_rows=skip_header_rows, skip_footer_rows=skip_footer_rows, sheet_name=sheet_name, - score_column="score", + score_column=score_column, hgvs_column=hgvs_column, input_type=input_type, ) diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py index 384051d..02ac246 100644 --- a/mavedbconvert/tests/test_enrich2.py +++ b/mavedbconvert/tests/test_enrich2.py @@ -49,22 +49,16 @@ def tearDown(self): if os.path.isfile(self.path): os.unlink(self.path) - # TODO: pandas deprecation fix - @unittest.expectedFailure def test_column_names_combine_selection_and_timepoint(self): cnd_df = enrich2.get_count_dataframe_by_condition(self.store, cnd="c1") self.assertListEqual( list(cnd_df.columns), ["rep1_t0", "rep1_t1", "rep2_t0", "rep2_t1"] ) - # TODO: pandas deprecation fix - @unittest.expectedFailure def test_index_of_dfs_match_index_of_scores(self): cnd_df = enrich2.get_count_dataframe_by_condition(self.store, cnd="c1") assert_index_equal(self.store["/main/variants/scores/"].index, cnd_df.index) - # TODO: pandas deprecation fix - @unittest.expectedFailure def test_row_filled_with_nans_filtered_index_not_in_counts(self): cnd_df = enrich2.get_count_dataframe_by_condition(self.store, cnd="c1") self.assertTrue(np.all(cnd_df.loc["c.3A>G", :].isnull())) @@ -814,8 +808,6 @@ def test_outputs_expected_variants_scores_for_each_condition(self): ].values.astype(float) assert_frame_equal(result, expected) - # TODO: pandas deprecation fix - @unittest.expectedFailure def test_counts_and_scores_output_define_same_variants_when_input_does_not(self): self.store.close() self.store = pd.HDFStore(self.path, "w") @@ -877,8 +869,6 @@ def test_error_file_not_h5_or_tsv(self): with self.assertRaises(TypeError): p.load_input_file() - # TODO: sort out reason why hardcoding Enrich2 score column was a fix - @unittest.expectedFailure def test_scores_tsv_missing_score_column(self): path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") p = enrich2.Enrich2(