From 0bae9e34e80ae16265860ba36f1ae1ae59681faa Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Sat, 31 Aug 2019 23:40:37 +1000
Subject: [PATCH 01/26] restructuring test data directory

---
 .../tests/data/{ => empiric}/empiric.xlsx     | Bin
 .../data/{ => empiric}/empiric_expected.csv   |   0
 .../data/{enrich1.tsv => enrich/enrich.tsv}   |   0
 .../data/{enrich1.xlsx => enrich/enrich.xlsx} | Bin
 .../enrich_1based.tsv}                        |   0
 .../enrich_expected.csv}                      |   0
 .../enrich_expected_offset.csv}               |   0
 .../enrich_no_seqid.tsv}                      |   0
 .../tests/data/{ => enrich2}/dummy.h5         |   0
 .../tests/data/{ => enrich2}/enrich2.tsv      |   0
 .../tests/data/{ => fasta}/bad_format.fasta   |   0
 .../data/{ => fasta}/invalid_chars.fasta      |   0
 mavedbconvert/tests/data/{ => fasta}/lower.fa |   0
 .../tests/data/{ => fasta}/spaces.fasta       |   0
 .../tests/data/{ => fasta}/two.fasta          |   0
 mavedbconvert/tests/data/{ => fasta}/wt.fasta |   0
 .../tests/data/{ => fasta}/wt.fasta.bz2       | Bin
 .../tests/data/{ => fasta}/wt.fasta.gz        | Bin
 mavedbconvert/tests/test_base.py              |  19 +++++-----
 mavedbconvert/tests/test_enrich.py            |  28 +++++++--------
 mavedbconvert/tests/test_fasta.py             |   2 +-
 mavedbconvert/tests/test_parsers.py           |  33 +++++++++---------
 22 files changed, 40 insertions(+), 42 deletions(-)
 rename mavedbconvert/tests/data/{ => empiric}/empiric.xlsx (100%)
 rename mavedbconvert/tests/data/{ => empiric}/empiric_expected.csv (100%)
 rename mavedbconvert/tests/data/{enrich1.tsv => enrich/enrich.tsv} (100%)
 rename mavedbconvert/tests/data/{enrich1.xlsx => enrich/enrich.xlsx} (100%)
 rename mavedbconvert/tests/data/{enrich1_1based.tsv => enrich/enrich_1based.tsv} (100%)
 rename mavedbconvert/tests/data/{enrich1_expected.csv => enrich/enrich_expected.csv} (100%)
 rename mavedbconvert/tests/data/{enrich1_expected_offset.csv => enrich/enrich_expected_offset.csv} (100%)
 rename mavedbconvert/tests/data/{enrich1_no_seqid.tsv => enrich/enrich_no_seqid.tsv} (100%)
 rename mavedbconvert/tests/data/{ => enrich2}/dummy.h5 (100%)
 rename mavedbconvert/tests/data/{ => enrich2}/enrich2.tsv (100%)
 rename mavedbconvert/tests/data/{ => fasta}/bad_format.fasta (100%)
 rename mavedbconvert/tests/data/{ => fasta}/invalid_chars.fasta (100%)
 rename mavedbconvert/tests/data/{ => fasta}/lower.fa (100%)
 rename mavedbconvert/tests/data/{ => fasta}/spaces.fasta (100%)
 rename mavedbconvert/tests/data/{ => fasta}/two.fasta (100%)
 rename mavedbconvert/tests/data/{ => fasta}/wt.fasta (100%)
 rename mavedbconvert/tests/data/{ => fasta}/wt.fasta.bz2 (100%)
 rename mavedbconvert/tests/data/{ => fasta}/wt.fasta.gz (100%)

diff --git a/mavedbconvert/tests/data/empiric.xlsx b/mavedbconvert/tests/data/empiric/empiric.xlsx
similarity index 100%
rename from mavedbconvert/tests/data/empiric.xlsx
rename to mavedbconvert/tests/data/empiric/empiric.xlsx
diff --git a/mavedbconvert/tests/data/empiric_expected.csv b/mavedbconvert/tests/data/empiric/empiric_expected.csv
similarity index 100%
rename from mavedbconvert/tests/data/empiric_expected.csv
rename to mavedbconvert/tests/data/empiric/empiric_expected.csv
diff --git a/mavedbconvert/tests/data/enrich1.tsv b/mavedbconvert/tests/data/enrich/enrich.tsv
similarity index 100%
rename from mavedbconvert/tests/data/enrich1.tsv
rename to mavedbconvert/tests/data/enrich/enrich.tsv
diff --git a/mavedbconvert/tests/data/enrich1.xlsx b/mavedbconvert/tests/data/enrich/enrich.xlsx
similarity index 100%
rename from mavedbconvert/tests/data/enrich1.xlsx
rename to mavedbconvert/tests/data/enrich/enrich.xlsx
diff --git a/mavedbconvert/tests/data/enrich1_1based.tsv b/mavedbconvert/tests/data/enrich/enrich_1based.tsv
similarity index 100%
rename from mavedbconvert/tests/data/enrich1_1based.tsv
rename to mavedbconvert/tests/data/enrich/enrich_1based.tsv
diff --git a/mavedbconvert/tests/data/enrich1_expected.csv b/mavedbconvert/tests/data/enrich/enrich_expected.csv
similarity index 100%
rename from mavedbconvert/tests/data/enrich1_expected.csv
rename to mavedbconvert/tests/data/enrich/enrich_expected.csv
diff --git a/mavedbconvert/tests/data/enrich1_expected_offset.csv b/mavedbconvert/tests/data/enrich/enrich_expected_offset.csv
similarity index 100%
rename from mavedbconvert/tests/data/enrich1_expected_offset.csv
rename to mavedbconvert/tests/data/enrich/enrich_expected_offset.csv
diff --git a/mavedbconvert/tests/data/enrich1_no_seqid.tsv b/mavedbconvert/tests/data/enrich/enrich_no_seqid.tsv
similarity index 100%
rename from mavedbconvert/tests/data/enrich1_no_seqid.tsv
rename to mavedbconvert/tests/data/enrich/enrich_no_seqid.tsv
diff --git a/mavedbconvert/tests/data/dummy.h5 b/mavedbconvert/tests/data/enrich2/dummy.h5
similarity index 100%
rename from mavedbconvert/tests/data/dummy.h5
rename to mavedbconvert/tests/data/enrich2/dummy.h5
diff --git a/mavedbconvert/tests/data/enrich2.tsv b/mavedbconvert/tests/data/enrich2/enrich2.tsv
similarity index 100%
rename from mavedbconvert/tests/data/enrich2.tsv
rename to mavedbconvert/tests/data/enrich2/enrich2.tsv
diff --git a/mavedbconvert/tests/data/bad_format.fasta b/mavedbconvert/tests/data/fasta/bad_format.fasta
similarity index 100%
rename from mavedbconvert/tests/data/bad_format.fasta
rename to mavedbconvert/tests/data/fasta/bad_format.fasta
diff --git a/mavedbconvert/tests/data/invalid_chars.fasta b/mavedbconvert/tests/data/fasta/invalid_chars.fasta
similarity index 100%
rename from mavedbconvert/tests/data/invalid_chars.fasta
rename to mavedbconvert/tests/data/fasta/invalid_chars.fasta
diff --git a/mavedbconvert/tests/data/lower.fa b/mavedbconvert/tests/data/fasta/lower.fa
similarity index 100%
rename from mavedbconvert/tests/data/lower.fa
rename to mavedbconvert/tests/data/fasta/lower.fa
diff --git a/mavedbconvert/tests/data/spaces.fasta b/mavedbconvert/tests/data/fasta/spaces.fasta
similarity index 100%
rename from mavedbconvert/tests/data/spaces.fasta
rename to mavedbconvert/tests/data/fasta/spaces.fasta
diff --git a/mavedbconvert/tests/data/two.fasta b/mavedbconvert/tests/data/fasta/two.fasta
similarity index 100%
rename from mavedbconvert/tests/data/two.fasta
rename to mavedbconvert/tests/data/fasta/two.fasta
diff --git a/mavedbconvert/tests/data/wt.fasta b/mavedbconvert/tests/data/fasta/wt.fasta
similarity index 100%
rename from mavedbconvert/tests/data/wt.fasta
rename to mavedbconvert/tests/data/fasta/wt.fasta
diff --git a/mavedbconvert/tests/data/wt.fasta.bz2 b/mavedbconvert/tests/data/fasta/wt.fasta.bz2
similarity index 100%
rename from mavedbconvert/tests/data/wt.fasta.bz2
rename to mavedbconvert/tests/data/fasta/wt.fasta.bz2
diff --git a/mavedbconvert/tests/data/wt.fasta.gz b/mavedbconvert/tests/data/fasta/wt.fasta.gz
similarity index 100%
rename from mavedbconvert/tests/data/wt.fasta.gz
rename to mavedbconvert/tests/data/fasta/wt.fasta.gz
diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py
index 352482b..3f06812 100644
--- a/mavedbconvert/tests/test_base.py
+++ b/mavedbconvert/tests/test_base.py
@@ -6,8 +6,7 @@
 from . import ProgramTestCase
 
 
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-DATA_DIR = os.path.normpath(BASE_DIR + "/data/")
+TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
 class TestBaseProgram(ProgramTestCase):
@@ -18,9 +17,9 @@ class TestBaseProgram(ProgramTestCase):
 
     def setUp(self):
         super().setUp()
-        self.src = os.path.join(DATA_DIR, "enrich1.tsv")
-        self.src_with_spaces = os.path.join(DATA_DIR, "enrich   1.tsv")
-        self.h5_src = os.path.join(DATA_DIR, "dummy.h5")
+        self.src = os.path.join(TESTS_DIR, "data", "enrich", "enrich.tsv")
+        self.src_with_spaces = os.path.join(TESTS_DIR, "data", "enrich", "e    nrich.tsv")
+        self.h5_src = os.path.join(TESTS_DIR, "data", "enrich2", "dummy.h5")
 
     def tearDown(self):
         for path in self.bin:
@@ -64,7 +63,7 @@ def test_checks_write_permission(self, patch):
 
     def test_splits_src_into_filename_and_ext(self):
         p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA")
-        self.assertEqual(p.src_filename, "enrich1")
+        self.assertEqual(p.src_filename, "enrich")
         self.assertEqual(p.ext, ".tsv")
 
     def test_lower_cases_ext(self):
@@ -77,11 +76,11 @@ def test_value_error_coding_offset_not_multiple_of_three(self):
 
     def test_dst_filename_replaces_whitespace_with_underscores(self):
         p = base.BaseProgram(src=self.src_with_spaces, wt_sequence="AAA")
-        self.assertEqual(p.dst_filename, "mavedb_enrich_1.csv")
+        self.assertEqual(p.dst_filename, "mavedb_e_nrich.csv")
 
     def test_output_file_joins_dst_and_dst_filename(self):
         p = base.BaseProgram(src=self.src, wt_sequence="AAA")
-        self.assertEqual(p.output_file, os.path.join(DATA_DIR, "mavedb_enrich1.csv"))
+        self.assertEqual(p.output_file, os.path.join(TESTS_DIR, "data", "enrich", "mavedb_enrich.csv"))
 
     def test_output_directory_expands_user_and_norms_path(self):
         p = base.BaseProgram(src=self.src, wt_sequence="AAA")
@@ -126,7 +125,7 @@ def test_wt_setter_value_error_not_valid_wt_sequence(self):
 class TestBaseProgramValidateAgainstWTSeq(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.src = os.path.join(DATA_DIR, "enrich1.tsv")
+        self.src = os.path.join(TESTS_DIR, "data", "enrich", "enrich.tsv")
         self.base = base.BaseProgram(src=self.src, wt_sequence="ATG", one_based=True)
 
     def test_error_not_a_dna_sub(self):
@@ -177,7 +176,7 @@ def test_index_error_index_extends_beyond_indexable_wt_seq(self):
 class TestBaseProgramValidateAgainstProteinSeq(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.src = os.path.join(DATA_DIR, "enrich1.tsv")
+        self.src = os.path.join(TESTS_DIR, "data", "enrich", "enrich.tsv")
         self.base = base.BaseProgram(src=self.src, wt_sequence="ATGAAA", one_based=True)
 
     def test_error_not_a_protein_sub(self):
diff --git a/mavedbconvert/tests/test_enrich.py b/mavedbconvert/tests/test_enrich.py
index b30e3b0..92800f3 100644
--- a/mavedbconvert/tests/test_enrich.py
+++ b/mavedbconvert/tests/test_enrich.py
@@ -35,7 +35,7 @@ def test_ok_is_mult_of_three(self):
 class TestEnrichParseRow(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "enrich1.tsv")
+        self.path = os.path.join(DATA_DIR, "enrich.tsv")
         self.enrich = enrich.Enrich(
             src=self.path,
             wt_sequence=WT,
@@ -128,7 +128,7 @@ def test_applies_offset_divided_by_3(self):
 class TestEnrichParseInput(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "enrich1.tsv")
+        self.path = os.path.join(DATA_DIR, "enrich.tsv")
         self.enrich = enrich.Enrich(
             src=self.path,
             wt_sequence=WT,
@@ -186,13 +186,13 @@ def test_removes_non_numeric(self):
 
 class TestEnrichLoadInput(ProgramTestCase):
     def setUp(self):
-        self.path = os.path.join(DATA_DIR, "enrich1.tsv")
-        self.path_1based = os.path.join(DATA_DIR, "enrich1_1based.tsv")
+        self.path = os.path.join(DATA_DIR, "enrich.tsv")
+        self.path_1based = os.path.join(DATA_DIR, "enrich_1based.tsv")
         self.path_csv = os.path.join(DATA_DIR, "enrich1.csv")
-        self.expected = os.path.join(DATA_DIR, "enrich1_expected.csv")
-        self.expected_offset = os.path.join(DATA_DIR, "enrich1_expected_offset.csv")
-        self.excel_path = os.path.join(DATA_DIR, "enrich1.xlsx")
-        self.no_seq_id = os.path.join(DATA_DIR, "enrich1_no_seqid.tsv")
+        self.expected = os.path.join(DATA_DIR, "enrich_expected.csv")
+        self.expected_offset = os.path.join(DATA_DIR, "enrich_expected_offset.csv")
+        self.excel_path = os.path.join(DATA_DIR, "enrich.xlsx")
+        self.no_seq_id = os.path.join(DATA_DIR, "enrich_no_seqid.tsv")
         self.tmp_path = os.path.join(DATA_DIR, "tmp.xlsx")
 
         self.bin = [
@@ -279,13 +279,13 @@ def test_table_and_excel_load_same_dataframe(self):
 
 class TestEnrichIntegration(ProgramTestCase):
     def setUp(self):
-        self.path = os.path.join(DATA_DIR, "enrich1.tsv")
-        self.path_1based = os.path.join(DATA_DIR, "enrich1_1based.tsv")
-        self.excel_path = os.path.join(DATA_DIR, "enrich1.xlsx")
-        self.no_seq_id = os.path.join(DATA_DIR, "enrich1_no_seqid.tsv")
+        self.path = os.path.join(DATA_DIR, "enrich.tsv")
+        self.path_1based = os.path.join(DATA_DIR, "enrich_1based.tsv")
+        self.excel_path = os.path.join(DATA_DIR, "enrich.xlsx")
+        self.no_seq_id = os.path.join(DATA_DIR, "enrich_no_seqid.tsv")
 
-        self.expected = os.path.join(DATA_DIR, "enrich1_expected.csv")
-        self.expected_offset = os.path.join(DATA_DIR, "enrich1_expected_offset.csv")
+        self.expected = os.path.join(DATA_DIR, "enrich_expected.csv")
+        self.expected_offset = os.path.join(DATA_DIR, "enrich_expected_offset.csv")
 
         self.bin = [
             os.path.join(DATA_DIR, "mavedb_enrich1.csv"),
diff --git a/mavedbconvert/tests/test_fasta.py b/mavedbconvert/tests/test_fasta.py
index 923e6ad..4aaba22 100644
--- a/mavedbconvert/tests/test_fasta.py
+++ b/mavedbconvert/tests/test_fasta.py
@@ -37,7 +37,7 @@ def test_infers_none(self):
 
     def test_ioerror_invalid_ext(self):
         with self.assertRaises(IOError):
-            split_fasta_path(os.path.join(DATA_DIR, "enrich1.tsv"))
+            split_fasta_path(os.path.join(DATA_DIR, "enrich.tsv"))
 
 
 class TestFastaReader(TestCase):
diff --git a/mavedbconvert/tests/test_parsers.py b/mavedbconvert/tests/test_parsers.py
index db62ea9..3a97891 100644
--- a/mavedbconvert/tests/test_parsers.py
+++ b/mavedbconvert/tests/test_parsers.py
@@ -7,8 +7,7 @@
 from . import ProgramTestCase
 
 
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-DATA_DIR = os.path.normpath(BASE_DIR + "/data/")
+TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
 class TestParseBoolean(TestCase):
@@ -49,14 +48,14 @@ def test_returns_string_stripped_of_ws(self):
 class TestParseSrc(TestCase):
     @mock.patch(
         "mavedbconvert.parsers.parse_string",
-        return_value=os.path.join(DATA_DIR, "enrich2.tsv"),
+        return_value=os.path.join(TESTS_DIR, "data", "enrich2", "enrich2.tsv"),
     )
     def test_calls_parse_string(self, patch):
-        parsers.parse_src(os.path.join(DATA_DIR, "enrich2.tsv"))
+        parsers.parse_src(os.path.join(TESTS_DIR, "data", "enrich2", "enrich2.tsv"))
         patch.assert_called()
 
     def test_ok_file_exists(self):
-        path = os.path.join(DATA_DIR, "enrich2.tsv")
+        path = os.path.join(TESTS_DIR, "data", "enrich2", "enrich2.tsv")
         self.assertEqual(path, parsers.parse_src(path))
 
     def test_error_no_value(self):
@@ -65,23 +64,23 @@ def test_error_no_value(self):
                 parsers.parse_src(v)
 
     def test_error_file_not_found(self):
-        path = os.path.join(DATA_DIR, "missing_file.tsv")
+        path = os.path.join(TESTS_DIR, "data", "enrich2", "missing_file.tsv")
         with self.assertRaises(FileNotFoundError):
             parsers.parse_src(path)
 
     def test_error_file_is_a_dir(self):
         with self.assertRaises(IsADirectoryError):
-            parsers.parse_src(DATA_DIR)
+            parsers.parse_src(os.path.join(TESTS_DIR, "data"))
 
 
 class TestParseDst(ProgramTestCase):
-    @mock.patch("mavedbconvert.parsers.parse_string", return_value=DATA_DIR)
+    @mock.patch("mavedbconvert.parsers.parse_string", return_value=os.path.join(TESTS_DIR, "data"))
     def test_calls_parse_string(self, patch):
-        parsers.parse_dst(DATA_DIR)
+        parsers.parse_dst(os.path.join(TESTS_DIR, "data"))
         patch.assert_called()
 
     def test_ok_dst_exists(self):
-        path = os.path.join(DATA_DIR)
+        path = os.path.join(os.path.join(TESTS_DIR, "data"))
         self.assertEqual(path, parsers.parse_dst(path))
 
     def test_returns_none_no_value(self):
@@ -89,11 +88,11 @@ def test_returns_none_no_value(self):
             self.assertIsNone(parsers.parse_dst(v))
 
     def test_dst_path_is_normalised(self):
-        path = BASE_DIR + "//data"
-        self.assertEqual(parsers.parse_dst(path), DATA_DIR)
+        path = TESTS_DIR + "//data"
+        self.assertEqual(parsers.parse_dst(path), os.path.join(TESTS_DIR, "data"))
 
     def test_makes_dst_directory_tree(self):
-        path = os.path.join(DATA_DIR, "subdir")
+        path = os.path.join(TESTS_DIR, "data", "subdir")
         parsers.parse_dst(path)
         self.assertTrue(os.path.isdir(path))
         self.bin.append(path)
@@ -125,7 +124,7 @@ def test_sets_correct_program_from_dict(self):
 
 class TestParseWildTypeSequence(TestCase):
     def test_can_read_from_fasta(self):
-        path = os.path.join(DATA_DIR, "lower.fa")
+        path = os.path.join(TESTS_DIR, "fasta", "lower.fa")
         wtseq = parsers.parse_wt_sequence(path, program="enrich2", non_coding=True)
         expected = (
             "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT"
@@ -258,13 +257,13 @@ def mock_args(
         if program is None:
             program = "enrich2"
         if src is None:
-            src = os.path.join(DATA_DIR, "enrich2.tsv")
+            src = os.path.join(TESTS_DIR, "enrich2", "enrich2.tsv")
         return {
             "enrich": True if program == "enrich" else False,
             "enrich2": True if program == "enrich2" else False,
             "empiric": True if program == "empiric" else False,
-            "<src>": os.path.join(DATA_DIR, src),
-            "--dst": os.path.join(DATA_DIR, dst) if dst else dst,
+            "<src>": os.path.join(TESTS_DIR, "data", program, src),
+            "--dst": os.path.join(TESTS_DIR, "data", program, dst) if dst else dst,
             "--score-column": score_column,
             "--hgvs-column": hgvs_column,
             "--skip-header": skip_header,

From b933cf8a1af5eb7f8f996103f2ff6bb58372fea9 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Sun, 1 Sep 2019 19:29:57 +1000
Subject: [PATCH 02/26] removed the nonfunctional install_requires

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 98aca6d..d4a98cb 100644
--- a/setup.py
+++ b/setup.py
@@ -12,6 +12,7 @@
         "A command line tool for converting alternate "
         "file formats into a MaveDB compliant format."
     ),
-    install_requires=open("requirements/install.txt", "rt").read().split("\n"),
+    # TODO: this fails to recognize hgvs-patterns from github
+    # install_requires=open("requirements/install.txt", "rt").read().split("\n"),
     entry_points={"console_scripts": ["mavedb-convert=mavedbconvert.main:main"]},
 )

From 84d2cabdff3cbba33c05b5864f79c0fab4b43ab9 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Sun, 1 Sep 2019 20:31:51 +1000
Subject: [PATCH 03/26] updated all file paths to new structure

---
 mavedbconvert/tests/test_base.py    | 24 +++++-----
 mavedbconvert/tests/test_empiric.py | 23 +++++-----
 mavedbconvert/tests/test_enrich.py  | 47 ++++++++++----------
 mavedbconvert/tests/test_enrich2.py | 69 ++++++++++++++---------------
 mavedbconvert/tests/test_fasta.py   | 34 +++++++-------
 mavedbconvert/tests/test_parsers.py | 32 ++++++-------
 6 files changed, 113 insertions(+), 116 deletions(-)

diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py
index 3f06812..60b592d 100644
--- a/mavedbconvert/tests/test_base.py
+++ b/mavedbconvert/tests/test_base.py
@@ -6,7 +6,7 @@
 from . import ProgramTestCase
 
 
-TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
+TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
 
 
 class TestBaseProgram(ProgramTestCase):
@@ -17,9 +17,9 @@ class TestBaseProgram(ProgramTestCase):
 
     def setUp(self):
         super().setUp()
-        self.src = os.path.join(TESTS_DIR, "data", "enrich", "enrich.tsv")
-        self.src_with_spaces = os.path.join(TESTS_DIR, "data", "enrich", "e    nrich.tsv")
-        self.h5_src = os.path.join(TESTS_DIR, "data", "enrich2", "dummy.h5")
+        self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
+        self.src_with_spaces = os.path.join(TEST_DATA_DIR, "enrich", "enrich   .tsv")
+        self.h5_src = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
 
     def tearDown(self):
         for path in self.bin:
@@ -30,7 +30,7 @@ def tearDown(self):
 
     def test_sets_directory_as_input_directory_if_dst_is_none(self):
         p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA")
-        self.assertEqual(p.dst, DATA_DIR)
+        self.assertEqual(p.dst, os.path.join(TEST_DATA_DIR, "enrich"))
 
     def test_error_file_not_readable(self):
         with self.assertRaises(IOError):
@@ -42,11 +42,11 @@ def test_expands_user_and_norms_dst(self):
 
     def test_dir_with_input_fname_appended_when_h5_and_dst_is_none(self):
         p = base.BaseProgram(src=self.h5_src, dst=None, wt_sequence="AAA")
-        self.assertEqual(p.dst, os.path.join(DATA_DIR, "dummy"))
-        self.bin.append(os.path.join(DATA_DIR, "dummy"))
+        self.assertEqual(p.dst, os.path.join(TEST_DATA_DIR, "enrich2", "dummy"))
+        self.bin.append(os.path.join(TEST_DATA_DIR, "enrich2", "dummy"))
 
     def test_creates_directory_tree_if_it_doesnt_exist(self):
-        output = os.path.join(DATA_DIR, "outer_dir/inner_dir/")
+        output = os.path.join(TEST_DATA_DIR, "enrich2", "outer_dir", "inner_dir")
         base.BaseProgram(src=self.h5_src, dst=output, wt_sequence="AAA")
         self.assertTrue(os.path.isdir(output))
         self.bin.append(output)
@@ -76,11 +76,11 @@ def test_value_error_coding_offset_not_multiple_of_three(self):
 
     def test_dst_filename_replaces_whitespace_with_underscores(self):
         p = base.BaseProgram(src=self.src_with_spaces, wt_sequence="AAA")
-        self.assertEqual(p.dst_filename, "mavedb_e_nrich.csv")
+        self.assertEqual(p.dst_filename, "mavedb_enrich_.csv")
 
     def test_output_file_joins_dst_and_dst_filename(self):
         p = base.BaseProgram(src=self.src, wt_sequence="AAA")
-        self.assertEqual(p.output_file, os.path.join(TESTS_DIR, "data", "enrich", "mavedb_enrich.csv"))
+        self.assertEqual(p.output_file, os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv"))
 
     def test_output_directory_expands_user_and_norms_path(self):
         p = base.BaseProgram(src=self.src, wt_sequence="AAA")
@@ -125,7 +125,7 @@ def test_wt_setter_value_error_not_valid_wt_sequence(self):
 class TestBaseProgramValidateAgainstWTSeq(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.src = os.path.join(TESTS_DIR, "data", "enrich", "enrich.tsv")
+        self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
         self.base = base.BaseProgram(src=self.src, wt_sequence="ATG", one_based=True)
 
     def test_error_not_a_dna_sub(self):
@@ -176,7 +176,7 @@ def test_index_error_index_extends_beyond_indexable_wt_seq(self):
 class TestBaseProgramValidateAgainstProteinSeq(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.src = os.path.join(TESTS_DIR, "data", "enrich", "enrich.tsv")
+        self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
         self.base = base.BaseProgram(src=self.src, wt_sequence="ATGAAA", one_based=True)
 
     def test_error_not_a_protein_sub(self):
diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py
index ec8b9b7..2e95225 100644
--- a/mavedbconvert/tests/test_empiric.py
+++ b/mavedbconvert/tests/test_empiric.py
@@ -10,14 +10,13 @@
 from . import ProgramTestCase
 
 
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-DATA_DIR = os.path.normpath(BASE_DIR + "/data/")
+TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
 
 
 class TestEmpiricInit(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "enrich2.tsv")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
 
     def test_error_offset_not_mult_of_three(self):
         with self.assertRaises(ValueError):
@@ -70,7 +69,7 @@ def test_adds_codon_pos_multiplied_by_3_to_position(self):
 class TestEmpiric(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.input = os.path.join(DATA_DIR, "empiric.xlsx")
+        self.input = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx")
         self.empiric = empiric.Empiric(
             src=self.input, wt_sequence="AAA", one_based=False
         )
@@ -180,7 +179,7 @@ def test_correctly_infers_hgvs_nt_positions_when_one_based(self):
 class TestEmpiricValidateColumns(TestCase):
     def setUp(self):
         super().setUp()
-        self.input = os.path.join(DATA_DIR, "empiric.xlsx")
+        self.input = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx")
         self.empiric = empiric.Empiric(
             src=self.input, wt_sequence="AAA", one_based=False
         )
@@ -219,7 +218,7 @@ def test_sets_aa_column(self):
 class TestEmpiricParseInput(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.input = os.path.join(DATA_DIR, "empiric.xlsx")
+        self.input = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx")
         self.empiric = empiric.Empiric(
             src=self.input,
             wt_sequence="AAA",
@@ -356,10 +355,10 @@ def test_keeps_int_type_as_int(self):
 class TestEmpiricLoadInput(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "empiric.xlsx")
-        self.tmp_path = os.path.join(DATA_DIR, "tmp.csv")
-        self.tmp_path_tsv = os.path.join(DATA_DIR, "tmp.tsv")
-        self.tmp_excel_path = os.path.join(DATA_DIR, "tmp.xlsx")
+        self.path = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx")
+        self.tmp_path = os.path.join(TEST_DATA_DIR, "empiric", "tmp.csv")
+        self.tmp_path_tsv = os.path.join(TEST_DATA_DIR, "empiric", "tmp.tsv")
+        self.tmp_excel_path = os.path.join(TEST_DATA_DIR, "empiric", "tmp.xlsx")
         self.bin.append(self.tmp_path)
         self.bin.append(self.tmp_path_tsv)
 
@@ -475,8 +474,8 @@ def test_applies_offset_to_position_column(self):
 class TestEmpiricConvert(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "empiric.xlsx")
-        self.expected = os.path.join(DATA_DIR, "empiric_expected.csv")
+        self.path = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx")
+        self.expected = os.path.join(TEST_DATA_DIR, "empiric", "empiric_expected.csv")
         self.empiric = empiric.Empiric(
             src=self.path,
             wt_sequence="TTTTCTTATTGT",
diff --git a/mavedbconvert/tests/test_enrich.py b/mavedbconvert/tests/test_enrich.py
index 92800f3..a3f9867 100644
--- a/mavedbconvert/tests/test_enrich.py
+++ b/mavedbconvert/tests/test_enrich.py
@@ -9,8 +9,7 @@
 from . import ProgramTestCase
 
 
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-DATA_DIR = os.path.normpath(BASE_DIR + "/data/")
+TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
 
 
 WT = (
@@ -22,7 +21,7 @@
 class TestEnrichInit(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "enrich2.tsv")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich2.tsv")
 
     def test_error_offset_not_mult_of_three(self):
         with self.assertRaises(ValueError):
@@ -35,7 +34,7 @@ def test_ok_is_mult_of_three(self):
 class TestEnrichParseRow(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "enrich.tsv")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
         self.enrich = enrich.Enrich(
             src=self.path,
             wt_sequence=WT,
@@ -128,7 +127,7 @@ def test_applies_offset_divided_by_3(self):
 class TestEnrichParseInput(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "enrich.tsv")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
         self.enrich = enrich.Enrich(
             src=self.path,
             wt_sequence=WT,
@@ -186,19 +185,19 @@ def test_removes_non_numeric(self):
 
 class TestEnrichLoadInput(ProgramTestCase):
     def setUp(self):
-        self.path = os.path.join(DATA_DIR, "enrich.tsv")
-        self.path_1based = os.path.join(DATA_DIR, "enrich_1based.tsv")
-        self.path_csv = os.path.join(DATA_DIR, "enrich1.csv")
-        self.expected = os.path.join(DATA_DIR, "enrich_expected.csv")
-        self.expected_offset = os.path.join(DATA_DIR, "enrich_expected_offset.csv")
-        self.excel_path = os.path.join(DATA_DIR, "enrich.xlsx")
-        self.no_seq_id = os.path.join(DATA_DIR, "enrich_no_seqid.tsv")
-        self.tmp_path = os.path.join(DATA_DIR, "tmp.xlsx")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
+        self.path_1based = os.path.join(TEST_DATA_DIR, "enrich", "enrich_1based.tsv")
+        self.path_csv = os.path.join(TEST_DATA_DIR, "enrich", "enrich1.csv")
+        self.expected = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected.csv")
+        self.expected_offset = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv")
+        self.excel_path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.xlsx")
+        self.no_seq_id = os.path.join(TEST_DATA_DIR, "enrich", "enrich_no_seqid.tsv")
+        self.tmp_path = os.path.join(TEST_DATA_DIR, "enrich", "tmp.xlsx")
 
         self.bin = [
-            os.path.join(DATA_DIR, "mavedb_enrich1.csv"),
-            os.path.join(DATA_DIR, "mavedb_enrich1_1based.csv"),
-            os.path.join(DATA_DIR, self.path_csv),
+            os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich1.csv"),
+            os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich1_1based.csv"),
+            os.path.join(TEST_DATA_DIR, "enrich", self.path_csv),
         ]
 
     def test_error_seq_id_not_in_columns(self):
@@ -279,17 +278,17 @@ def test_table_and_excel_load_same_dataframe(self):
 
 class TestEnrichIntegration(ProgramTestCase):
     def setUp(self):
-        self.path = os.path.join(DATA_DIR, "enrich.tsv")
-        self.path_1based = os.path.join(DATA_DIR, "enrich_1based.tsv")
-        self.excel_path = os.path.join(DATA_DIR, "enrich.xlsx")
-        self.no_seq_id = os.path.join(DATA_DIR, "enrich_no_seqid.tsv")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
+        self.path_1based = os.path.join(TEST_DATA_DIR, "enrich", "enrich_1based.tsv")
+        self.excel_path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.xlsx")
+        self.no_seq_id = os.path.join(TEST_DATA_DIR, "enrich", "enrich_no_seqid.tsv")
 
-        self.expected = os.path.join(DATA_DIR, "enrich_expected.csv")
-        self.expected_offset = os.path.join(DATA_DIR, "enrich_expected_offset.csv")
+        self.expected = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected.csv")
+        self.expected_offset = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv")
 
         self.bin = [
-            os.path.join(DATA_DIR, "mavedb_enrich1.csv"),
-            os.path.join(DATA_DIR, "mavedb_enrich1_1based.csv"),
+            os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv"),
+            os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich_1based.csv"),
         ]
 
     def test_saves_to_input_dst_by_default(self):
diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py
index 882f07d..7aa59ce 100644
--- a/mavedbconvert/tests/test_enrich2.py
+++ b/mavedbconvert/tests/test_enrich2.py
@@ -15,8 +15,7 @@
 from . import ProgramTestCase
 
 
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-DATA_DIR = os.path.normpath(BASE_DIR + "/data/")
+TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
 
 
 # Utility tests
@@ -28,7 +27,7 @@ class TestGetCountDataFrames(TestCase):
     """
 
     def setUp(self):
-        self.path = os.path.join(DATA_DIR, "test_store.h5")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "test_store.h5")
         self.store = pd.HDFStore(self.path, "w")
         index = pd.MultiIndex.from_product(
             [["c1", "c2"], ["rep1", "rep2"], ["t0", "t1"]],
@@ -104,7 +103,7 @@ class TestReplicateScoreDataFrames(TestCase):
     """
 
     def setUp(self):
-        self.path = os.path.join(DATA_DIR, "test_store.h5")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "test_store.h5")
         self.store = pd.HDFStore(self.path, "w")
 
         shared_index = pd.MultiIndex.from_product(
@@ -277,7 +276,7 @@ def test_scores_and_counts_columns_separated_after_join(self):
 class TestEnrich2ConvertH5Filepath(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "enrich2.h5")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.h5")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
         self.bin.append(self.path.replace(".h5", ""))
 
@@ -294,12 +293,12 @@ def test_concats_basename_elem_type_then_cnd_and_csv_ext(self):
 class TestEnrich2ConvertH5Df(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "enrich2.h5")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.h5")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
-        self.bin.append(os.path.join(DATA_DIR, "enrich2"))
+        self.bin.append(os.path.join(TEST_DATA_DIR, "enrich2", "enrich2"))
 
     def test_doesnt_open_invalid_rows_file_if_there_are_no_invalid_rows(self):
-        self.path = os.path.join(DATA_DIR, "enrich2.tsv")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
         fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv"
 
@@ -332,7 +331,7 @@ def test_sets_index_as_input_index(self):
         assert_index_equal(result.index, df.index)
 
     def test_opens_invalid_rows_file_for_invalid_rows(self):
-        self.path = os.path.join(DATA_DIR, "enrich2.tsv")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
         df = pd.DataFrame(data={"score": [1], "B": ["a"]}, index=["c.1T>G (p.Lys1Val)"])
         with self.assertRaises(ValueError):
@@ -345,7 +344,7 @@ def test_opens_invalid_rows_file_for_invalid_rows(self):
         self.bin.append(fpath)
 
     def test_invalid_rows_file_contains_error_description(self):
-        self.path = os.path.join(DATA_DIR, "enrich2.tsv")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
         fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv"
 
@@ -368,7 +367,7 @@ class TestEnrich2ParseInput(ProgramTestCase):
     def setUp(self):
         super().setUp()
         self.wt = "GCTGAT"
-        self.path = os.path.join(DATA_DIR, "test_store.h5")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "test_store.h5")
         self.store = pd.HDFStore(self.path, "w")
         self.enrich2 = enrich2.Enrich2(
             self.path, wt_sequence=self.wt, offset=0, one_based=True
@@ -387,42 +386,42 @@ def setUp(self):
         self.files = [
             os.path.normpath(
                 os.path.join(
-                    DATA_DIR, "test_store", "mavedb_test_store_synonymous_counts_c1.csv"
+                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_counts_c1.csv"
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    DATA_DIR, "test_store", "mavedb_test_store_synonymous_counts_c2.csv"
+                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_counts_c2.csv"
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    DATA_DIR, "test_store", "mavedb_test_store_synonymous_scores_c1.csv"
+                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_scores_c1.csv"
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    DATA_DIR, "test_store", "mavedb_test_store_synonymous_scores_c2.csv"
+                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_scores_c2.csv"
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    DATA_DIR, "test_store", "mavedb_test_store_variants_counts_c1.csv"
+                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_counts_c1.csv"
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    DATA_DIR, "test_store", "mavedb_test_store_variants_counts_c2.csv"
+                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_counts_c2.csv"
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    DATA_DIR, "test_store", "mavedb_test_store_variants_scores_c1.csv"
+                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_scores_c1.csv"
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    DATA_DIR, "test_store", "mavedb_test_store_variants_scores_c2.csv"
+                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_scores_c2.csv"
                 )
             ),
         ]
@@ -531,7 +530,7 @@ def parse_rows(self, variants, element=None):
 
     @mock.patch.object(pd.DataFrame, "to_csv", return_value=None)
     def test_saves_to_output_directory(self, patch):
-        output = os.path.join(DATA_DIR, "new")
+        output = os.path.join(TEST_DATA_DIR, "enrich2", "new")
         p = enrich2.Enrich2(src=self.store, dst=output, wt_sequence=self.wt, offset=0)
         p.parse_input(p.load_input_file())
         for call_args in patch.call_args_list:
@@ -542,7 +541,7 @@ def test_saves_to_output_directory(self, patch):
     def test_saves_to_file_location_if_no_dst_supplied(self, patch):
         p = enrich2.Enrich2(src=self.store, wt_sequence=self.wt, offset=0)
         p.parse_input(self.enrich2.load_input_file())
-        expected_base_path = os.path.normpath(os.path.join(DATA_DIR, "test_store"))
+        expected_base_path = os.path.normpath(os.path.join(TEST_DATA_DIR, "enrich2", "test_store"))
         for call_args in patch.call_args_list:
             self.assertIn(expected_base_path, call_args[0][0])
 
@@ -832,13 +831,13 @@ def test_drops_null_rows(self):
 
 class TestEnrich2LoadInput(TestCase):
     def test_error_file_not_h5_or_tsv(self):
-        path = os.path.join(DATA_DIR, "empiric.xlsx")
+        path = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx")
         p = enrich2.Enrich2(path, wt_sequence="AAA")
         with self.assertRaises(TypeError):
             p.load_input_file()
 
     def test_scores_tsv_missing_score_column(self):
-        path = os.path.join(DATA_DIR, "enrich2.tsv")
+        path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
         p = enrich2.Enrich2(
             path,
             wt_sequence="AAA",
@@ -850,7 +849,7 @@ def test_scores_tsv_missing_score_column(self):
             p.load_input_file()
 
     def test_input_type_counts_doesnt_raise_keyerror(self):
-        path = os.path.join(DATA_DIR, "enrich2.tsv")
+        path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
         p = enrich2.Enrich2(
             path,
             wt_sequence="AAA",
@@ -860,7 +859,7 @@ def test_input_type_counts_doesnt_raise_keyerror(self):
         p.load_input_file()
 
     def test_scores_tsv_missing_hgvs_column(self):
-        path = os.path.join(DATA_DIR, "enrich2.tsv")
+        path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
         p = enrich2.Enrich2(path, wt_sequence="AAA", hgvs_column="hgvs")
         with self.assertRaises(KeyError):
             p.load_input_file()
@@ -869,7 +868,7 @@ def test_scores_tsv_missing_hgvs_column(self):
 class TestEnrich2ParseRow(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "dummy.h5")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="ACT")
         self.bin.append(self.path.replace(".h5", ""))
 
@@ -941,7 +940,7 @@ def test_uses_three_qmarks(self):
 class TestProteinHGVSParsing(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "dummy.h5")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
         self.bin.append(self.path.replace(".h5", ""))
 
@@ -1001,7 +1000,7 @@ def test_maintains_ordering(self):
 class TestNucleotideHGVSParing(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "dummy.h5")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
         self.bin.append(self.path.replace(".h5", ""))
 
@@ -1055,7 +1054,7 @@ def test_strips_ws(self):
 class TestEnrich2MixedHGVSParsing(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "dummy.h5")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
         self.wt = "ACT"
         self.wt_aa = constants.AA_CODES[constants.CODON_TABLE[self.wt]]
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence=self.wt)
@@ -1149,7 +1148,7 @@ def test_protein_set_as_nt_when_table_is_not_syn_and_variant_is_special(self):
 class TestInferSilentAASub(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "dummy.h5")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA", offset=0)
         self.bin.append(self.path.replace(".h5", ""))
 
@@ -1228,14 +1227,14 @@ def test_applies_offset_to_protein_variant_modulo_3(self):
     @mock.patch.object(enrich2.base.BaseProgram, "validate_against_wt_sequence")
     def test_validates_against_wt_sequence(self, patch):
         variant = "c.-9C>T"
-        path = os.path.join(DATA_DIR, "dummy.h5")
+        path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
         p = enrich2.Enrich2(path, wt_sequence="ACT")
         enrich2.apply_offset(variant, offset=-10, enrich2=p)  # pass
         patch.assert_called_with(*("c.1C>T",))
 
     def test_value_error_base_mismatch_after_offset_applied(self):
         variant = "c.-9G>T"
-        path = os.path.join(DATA_DIR, "dummy.h5")
+        path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
         p = enrich2.Enrich2(path, wt_sequence="ACT")
         with self.assertRaises(ValueError):
             enrich2.apply_offset(variant, offset=-10, enrich2=p)
@@ -1243,14 +1242,14 @@ def test_value_error_base_mismatch_after_offset_applied(self):
     @mock.patch.object(enrich2.base.BaseProgram, "validate_against_protein_sequence")
     def test_validates_against_pro_sequence(self, patch):
         variant = "p.Gly3Leu"
-        path = os.path.join(DATA_DIR, "dummy.h5")
+        path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
         p = enrich2.Enrich2(path, wt_sequence="ACG")
         enrich2.apply_offset(variant, offset=6, enrich2=p)  # pass
         patch.assert_called_with(*("p.Gly1Leu",))
 
     def test_value_error_pro_mismatch_after_offset_applied(self):
         variant = "p.Gly3Leu"
-        path = os.path.join(DATA_DIR, "dummy.h5")
+        path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
         p = enrich2.Enrich2(path, wt_sequence="ACG")
         with self.assertRaises(ValueError):
             enrich2.apply_offset(variant, offset=6, enrich2=p)
@@ -1259,7 +1258,7 @@ def test_value_error_pro_mismatch_after_offset_applied(self):
 class TestEnrich2Init(TestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(DATA_DIR, "enrich2.tsv")
+        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
 
     def test_error_is_coding_and_offset_not_mult_of_three(self):
         with self.assertRaises(ValueError):
diff --git a/mavedbconvert/tests/test_fasta.py b/mavedbconvert/tests/test_fasta.py
index 4aaba22..970120a 100644
--- a/mavedbconvert/tests/test_fasta.py
+++ b/mavedbconvert/tests/test_fasta.py
@@ -3,46 +3,46 @@
 
 from ..fasta import parse_fasta, split_fasta_path
 
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-DATA_DIR = os.path.normpath(BASE_DIR + "/data/")
 
+TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
 
-class TestSplitFasta(TestCase):
+
+class TestFastaPath(TestCase):
     def test_infers_bzip(self):
         head, base, ext, compression = split_fasta_path(
-            os.path.join(DATA_DIR, "wt.fasta.bz2")
+            os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.bz2")
         )
         self.assertEqual(ext, ".fasta")
         self.assertEqual(compression, "bz2")
 
     def test_infers_gzip(self):
         head, base, ext, compression = split_fasta_path(
-            os.path.join(DATA_DIR, "wt.fasta.gz")
+            os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.gz")
         )
         self.assertEqual(ext, ".fasta")
         self.assertEqual(compression, "gz")
 
-    def test_infers_none(self):
+    def test_infers_uncompressed(self):
         head, base, ext, compression = split_fasta_path(
-            os.path.join(DATA_DIR, "wt.fasta")
+            os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta")
         )
         self.assertEqual(ext, ".fasta")
         self.assertEqual(compression, None)
 
         head, base, ext, compression = split_fasta_path(
-            os.path.join(DATA_DIR, "lower.fa")
+            os.path.join(TEST_DATA_DIR, "fasta", "lower.fa")
         )
         self.assertEqual(ext, ".fa")
         self.assertEqual(compression, None)
 
     def test_ioerror_invalid_ext(self):
         with self.assertRaises(IOError):
-            split_fasta_path(os.path.join(DATA_DIR, "enrich.tsv"))
+            split_fasta_path(os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv"))
 
 
 class TestFastaReader(TestCase):
     def test_can_read_first_sequence(self):
-        sequence = parse_fasta(os.path.join(DATA_DIR, "wt.fasta"))
+        sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta"))
         expected = (
             "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT"
             "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA"
@@ -52,7 +52,7 @@ def test_can_read_first_sequence(self):
         self.assertEqual(sequence, expected)
 
     def test_converts_to_uppercase(self):
-        sequence = parse_fasta(os.path.join(DATA_DIR, "lower.fa"))
+        sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "lower.fa"))
         expected = (
             "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT"
             "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA"
@@ -63,11 +63,11 @@ def test_converts_to_uppercase(self):
 
     def test_error_more_than_one_sequence(self):
         with self.assertRaises(ValueError):
-            parse_fasta(os.path.join(DATA_DIR, "two.fasta"))
+            parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "two.fasta"))
 
     def test_error_invalid_chars_in_sequence(self):
         with self.assertRaises(ValueError):
-            parse_fasta(os.path.join(DATA_DIR, "invalid_chars.fasta"))
+            parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "invalid_chars.fasta"))
 
     def test_ignores_blank_lines(self):
         expected = (
@@ -76,15 +76,15 @@ def test_ignores_blank_lines(self):
             "GAGGCTACGATCGTGCCTTGTGGCAAGTCTTCGCTCGCAC"
             "GCCCTTCCTACCGTGCTATGAGAGGAAATCTCGGGCGTAA"
         )
-        seq = parse_fasta(os.path.join(DATA_DIR, "spaces.fasta"))
+        seq = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "spaces.fasta"))
         self.assertEqual(seq, expected)
 
     def test_error_missing_gt_on_first_line(self):
         with self.assertRaises(IOError):
-            parse_fasta(os.path.join(DATA_DIR, "bad_format.fasta"))
+            parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "bad_format.fasta"))
 
     def test_can_open_with_gzip(self):
-        sequence = parse_fasta(os.path.join(DATA_DIR, "wt.fasta.gz"))
+        sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.gz"))
         expected = (
             "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT"
             "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA"
@@ -94,7 +94,7 @@ def test_can_open_with_gzip(self):
         self.assertEqual(sequence, expected)
 
     def test_can_open_with_bzip(self):
-        sequence = parse_fasta(os.path.join(DATA_DIR, "wt.fasta.bz2"))
+        sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.bz2"))
         expected = (
             "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT"
             "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA"
diff --git a/mavedbconvert/tests/test_parsers.py b/mavedbconvert/tests/test_parsers.py
index 3a97891..4a8b5c1 100644
--- a/mavedbconvert/tests/test_parsers.py
+++ b/mavedbconvert/tests/test_parsers.py
@@ -7,7 +7,7 @@
 from . import ProgramTestCase
 
 
-TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
+TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
 
 
 class TestParseBoolean(TestCase):
@@ -48,14 +48,14 @@ def test_returns_string_stripped_of_ws(self):
 class TestParseSrc(TestCase):
     @mock.patch(
         "mavedbconvert.parsers.parse_string",
-        return_value=os.path.join(TESTS_DIR, "data", "enrich2", "enrich2.tsv"),
+        return_value=os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv"),
     )
     def test_calls_parse_string(self, patch):
-        parsers.parse_src(os.path.join(TESTS_DIR, "data", "enrich2", "enrich2.tsv"))
+        parsers.parse_src(os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv"))
         patch.assert_called()
 
     def test_ok_file_exists(self):
-        path = os.path.join(TESTS_DIR, "data", "enrich2", "enrich2.tsv")
+        path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
         self.assertEqual(path, parsers.parse_src(path))
 
     def test_error_no_value(self):
@@ -64,23 +64,23 @@ def test_error_no_value(self):
                 parsers.parse_src(v)
 
     def test_error_file_not_found(self):
-        path = os.path.join(TESTS_DIR, "data", "enrich2", "missing_file.tsv")
+        path = os.path.join(TEST_DATA_DIR, "enrich2", "missing_file.tsv")
         with self.assertRaises(FileNotFoundError):
             parsers.parse_src(path)
 
     def test_error_file_is_a_dir(self):
         with self.assertRaises(IsADirectoryError):
-            parsers.parse_src(os.path.join(TESTS_DIR, "data"))
+            parsers.parse_src(os.path.join(TEST_DATA_DIR))
 
 
 class TestParseDst(ProgramTestCase):
-    @mock.patch("mavedbconvert.parsers.parse_string", return_value=os.path.join(TESTS_DIR, "data"))
+    @mock.patch("mavedbconvert.parsers.parse_string", return_value=os.path.join(TEST_DATA_DIR))
     def test_calls_parse_string(self, patch):
-        parsers.parse_dst(os.path.join(TESTS_DIR, "data"))
+        parsers.parse_dst(os.path.join(TEST_DATA_DIR))
         patch.assert_called()
 
     def test_ok_dst_exists(self):
-        path = os.path.join(os.path.join(TESTS_DIR, "data"))
+        path = os.path.join(os.path.join(TEST_DATA_DIR))
         self.assertEqual(path, parsers.parse_dst(path))
 
     def test_returns_none_no_value(self):
@@ -88,11 +88,11 @@ def test_returns_none_no_value(self):
             self.assertIsNone(parsers.parse_dst(v))
 
     def test_dst_path_is_normalised(self):
-        path = TESTS_DIR + "//data"
-        self.assertEqual(parsers.parse_dst(path), os.path.join(TESTS_DIR, "data"))
+        path = TEST_DATA_DIR + "//fasta"
+        self.assertEqual(parsers.parse_dst(path), os.path.join(TEST_DATA_DIR, "fasta"))
 
     def test_makes_dst_directory_tree(self):
-        path = os.path.join(TESTS_DIR, "data", "subdir")
+        path = os.path.join(TEST_DATA_DIR, "subdir")
         parsers.parse_dst(path)
         self.assertTrue(os.path.isdir(path))
         self.bin.append(path)
@@ -124,7 +124,7 @@ def test_sets_correct_program_from_dict(self):
 
 class TestParseWildTypeSequence(TestCase):
     def test_can_read_from_fasta(self):
-        path = os.path.join(TESTS_DIR, "fasta", "lower.fa")
+        path = os.path.join(TEST_DATA_DIR, "fasta", "lower.fa")
         wtseq = parsers.parse_wt_sequence(path, program="enrich2", non_coding=True)
         expected = (
             "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT"
@@ -257,13 +257,13 @@ def mock_args(
         if program is None:
             program = "enrich2"
         if src is None:
-            src = os.path.join(TESTS_DIR, "enrich2", "enrich2.tsv")
+            src = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
         return {
             "enrich": True if program == "enrich" else False,
             "enrich2": True if program == "enrich2" else False,
             "empiric": True if program == "empiric" else False,
-            "<src>": os.path.join(TESTS_DIR, "data", program, src),
-            "--dst": os.path.join(TESTS_DIR, "data", program, dst) if dst else dst,
+            "<src>": os.path.join(TEST_DATA_DIR, program, src),
+            "--dst": os.path.join(TEST_DATA_DIR, program, dst) if dst else dst,
             "--score-column": score_column,
             "--hgvs-column": hgvs_column,
             "--skip-header": skip_header,

From 86145e7342c755e8057d65de67e161a7066cb8da Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Tue, 3 Sep 2019 10:39:22 +1000
Subject: [PATCH 04/26] split file paths and wt seq into different test cases

---
 mavedbconvert/tests/test_base.py | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py
index 60b592d..bf1ca5e 100644
--- a/mavedbconvert/tests/test_base.py
+++ b/mavedbconvert/tests/test_base.py
@@ -9,10 +9,10 @@
 TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
 
 
-class TestBaseProgram(ProgramTestCase):
+class TestPaths(ProgramTestCase):
     """
     Test __init__ correctly sets up read and write directories,
-    sequence information etc.
+    etc.
     """
 
     def setUp(self):
@@ -70,10 +70,6 @@ def test_lower_cases_ext(self):
         p = base.BaseProgram(src=self.src.replace("tsv", "TSV"), wt_sequence="AAA")
         self.assertEqual(p.ext, ".tsv")
 
-    def test_value_error_coding_offset_not_multiple_of_three(self):
-        with self.assertRaises(ValueError):
-            base.BaseProgram(src=self.src, wt_sequence="ATCA", offset=-1)
-
     def test_dst_filename_replaces_whitespace_with_underscores(self):
         p = base.BaseProgram(src=self.src_with_spaces, wt_sequence="AAA")
         self.assertEqual(p.dst_filename, "mavedb_enrich_.csv")
@@ -89,6 +85,29 @@ def test_output_directory_expands_user_and_norms_path(self):
             p.output_directory, os.path.join(os.path.expanduser("~"), "user")
         )
 
+
+class TestWtSequence(ProgramTestCase):
+    """
+    Test __init__ correctly sets up sequence information etc.
+    """
+
+    def setUp(self):
+        super().setUp()
+        self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
+        self.src_with_spaces = os.path.join(TEST_DATA_DIR, "enrich", "enrich   .tsv")
+        self.h5_src = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
+
+    def tearDown(self):
+        for path in self.bin:
+            if os.path.exists(path) and os.path.isfile(path):
+                os.remove(path)
+            elif os.path.exists(path) and os.path.isdir(path):
+                os.removedirs(path)
+
+    def test_value_error_coding_offset_not_multiple_of_three(self):
+        with self.assertRaises(ValueError):
+            base.BaseProgram(src=self.src, wt_sequence="ATCA", offset=-1)
+
     # --- Test property setters --- #
     def test_wt_setter_upper_cases_wt_sequence(self):
         p = base.BaseProgram(src=self.src, wt_sequence="AAA")

From bd788fda71249cdf66d2bd6308b6690d68b5023d Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Tue, 3 Sep 2019 14:23:55 +1000
Subject: [PATCH 05/26] black formatting

---
 mavedbconvert/tests/test_base.py    |  4 ++-
 mavedbconvert/tests/test_enrich.py  |  8 ++++--
 mavedbconvert/tests/test_enrich2.py | 44 +++++++++++++++++++++++------
 mavedbconvert/tests/test_parsers.py |  4 ++-
 4 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py
index bf1ca5e..47818a8 100644
--- a/mavedbconvert/tests/test_base.py
+++ b/mavedbconvert/tests/test_base.py
@@ -76,7 +76,9 @@ def test_dst_filename_replaces_whitespace_with_underscores(self):
 
     def test_output_file_joins_dst_and_dst_filename(self):
         p = base.BaseProgram(src=self.src, wt_sequence="AAA")
-        self.assertEqual(p.output_file, os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv"))
+        self.assertEqual(
+            p.output_file, os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv")
+        )
 
     def test_output_directory_expands_user_and_norms_path(self):
         p = base.BaseProgram(src=self.src, wt_sequence="AAA")
diff --git a/mavedbconvert/tests/test_enrich.py b/mavedbconvert/tests/test_enrich.py
index a3f9867..c86ebe7 100644
--- a/mavedbconvert/tests/test_enrich.py
+++ b/mavedbconvert/tests/test_enrich.py
@@ -189,7 +189,9 @@ def setUp(self):
         self.path_1based = os.path.join(TEST_DATA_DIR, "enrich", "enrich_1based.tsv")
         self.path_csv = os.path.join(TEST_DATA_DIR, "enrich", "enrich1.csv")
         self.expected = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected.csv")
-        self.expected_offset = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv")
+        self.expected_offset = os.path.join(
+            TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv"
+        )
         self.excel_path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.xlsx")
         self.no_seq_id = os.path.join(TEST_DATA_DIR, "enrich", "enrich_no_seqid.tsv")
         self.tmp_path = os.path.join(TEST_DATA_DIR, "enrich", "tmp.xlsx")
@@ -284,7 +286,9 @@ def setUp(self):
         self.no_seq_id = os.path.join(TEST_DATA_DIR, "enrich", "enrich_no_seqid.tsv")
 
         self.expected = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected.csv")
-        self.expected_offset = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv")
+        self.expected_offset = os.path.join(
+            TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv"
+        )
 
         self.bin = [
             os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv"),
diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py
index 7aa59ce..d7f254f 100644
--- a/mavedbconvert/tests/test_enrich2.py
+++ b/mavedbconvert/tests/test_enrich2.py
@@ -386,42 +386,66 @@ def setUp(self):
         self.files = [
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_counts_c1.csv"
+                    TEST_DATA_DIR,
+                    "enrich2",
+                    "test_store",
+                    "mavedb_test_store_synonymous_counts_c1.csv",
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_counts_c2.csv"
+                    TEST_DATA_DIR,
+                    "enrich2",
+                    "test_store",
+                    "mavedb_test_store_synonymous_counts_c2.csv",
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_scores_c1.csv"
+                    TEST_DATA_DIR,
+                    "enrich2",
+                    "test_store",
+                    "mavedb_test_store_synonymous_scores_c1.csv",
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_synonymous_scores_c2.csv"
+                    TEST_DATA_DIR,
+                    "enrich2",
+                    "test_store",
+                    "mavedb_test_store_synonymous_scores_c2.csv",
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_counts_c1.csv"
+                    TEST_DATA_DIR,
+                    "enrich2",
+                    "test_store",
+                    "mavedb_test_store_variants_counts_c1.csv",
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_counts_c2.csv"
+                    TEST_DATA_DIR,
+                    "enrich2",
+                    "test_store",
+                    "mavedb_test_store_variants_counts_c2.csv",
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_scores_c1.csv"
+                    TEST_DATA_DIR,
+                    "enrich2",
+                    "test_store",
+                    "mavedb_test_store_variants_scores_c1.csv",
                 )
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR, "enrich2", "test_store", "mavedb_test_store_variants_scores_c2.csv"
+                    TEST_DATA_DIR,
+                    "enrich2",
+                    "test_store",
+                    "mavedb_test_store_variants_scores_c2.csv",
                 )
             ),
         ]
@@ -541,7 +565,9 @@ def test_saves_to_output_directory(self, patch):
     def test_saves_to_file_location_if_no_dst_supplied(self, patch):
         p = enrich2.Enrich2(src=self.store, wt_sequence=self.wt, offset=0)
         p.parse_input(self.enrich2.load_input_file())
-        expected_base_path = os.path.normpath(os.path.join(TEST_DATA_DIR, "enrich2", "test_store"))
+        expected_base_path = os.path.normpath(
+            os.path.join(TEST_DATA_DIR, "enrich2", "test_store")
+        )
         for call_args in patch.call_args_list:
             self.assertIn(expected_base_path, call_args[0][0])
 
diff --git a/mavedbconvert/tests/test_parsers.py b/mavedbconvert/tests/test_parsers.py
index 4a8b5c1..28560df 100644
--- a/mavedbconvert/tests/test_parsers.py
+++ b/mavedbconvert/tests/test_parsers.py
@@ -74,7 +74,9 @@ def test_error_file_is_a_dir(self):
 
 
 class TestParseDst(ProgramTestCase):
-    @mock.patch("mavedbconvert.parsers.parse_string", return_value=os.path.join(TEST_DATA_DIR))
+    @mock.patch(
+        "mavedbconvert.parsers.parse_string", return_value=os.path.join(TEST_DATA_DIR)
+    )
     def test_calls_parse_string(self, patch):
         parsers.parse_dst(os.path.join(TEST_DATA_DIR))
         patch.assert_called()

From 0ae8315a54fb68b2da0b5dad368eb5fb3ab0e320 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Tue, 3 Sep 2019 14:52:23 +1000
Subject: [PATCH 06/26] copy the test data into a temp directory and run there
 instead

---
 mavedbconvert/tests/__init__.py     |  9 ++++
 mavedbconvert/tests/test_base.py    | 29 +++++-----
 mavedbconvert/tests/test_empiric.py | 25 ++++-----
 mavedbconvert/tests/test_enrich.py  | 49 +++++++++--------
 mavedbconvert/tests/test_enrich2.py | 84 +++++++++++++++--------------
 mavedbconvert/tests/test_fasta.py   | 35 ++++++------
 6 files changed, 117 insertions(+), 114 deletions(-)

diff --git a/mavedbconvert/tests/__init__.py b/mavedbconvert/tests/__init__.py
index 37466b8..07d0191 100644
--- a/mavedbconvert/tests/__init__.py
+++ b/mavedbconvert/tests/__init__.py
@@ -1,5 +1,7 @@
 import os
+import shutil
 from unittest import TestCase
+from tempfile import TemporaryDirectory
 
 import pandas as pd
 
@@ -19,6 +21,12 @@
 
 class ProgramTestCase(TestCase):
     def setUp(self):
+        self._data_dir = TemporaryDirectory()  # store the object
+        self.data_dir = os.path.join(self._data_dir.name, "data")  # store the directory path
+        shutil.copytree(
+            src=os.path.join(os.path.dirname(os.path.abspath(__file__)), "data"),
+            dst=self.data_dir,
+        )
         self.bin = []
 
     def mock_multi_sheet_excel_file(self, path, data):
@@ -30,6 +38,7 @@ def mock_multi_sheet_excel_file(self, path, data):
         self.bin.append(path)
 
     def tearDown(self):
+        self._data_dir.cleanup()
         for path in self.bin:
             if os.path.exists(path) and os.path.isfile(path):
                 os.remove(path)
diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py
index 47818a8..722981f 100644
--- a/mavedbconvert/tests/test_base.py
+++ b/mavedbconvert/tests/test_base.py
@@ -6,9 +6,6 @@
 from . import ProgramTestCase
 
 
-TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
-
-
 class TestPaths(ProgramTestCase):
     """
     Test __init__ correctly sets up read and write directories,
@@ -17,9 +14,9 @@ class TestPaths(ProgramTestCase):
 
     def setUp(self):
         super().setUp()
-        self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
-        self.src_with_spaces = os.path.join(TEST_DATA_DIR, "enrich", "enrich   .tsv")
-        self.h5_src = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
+        self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv")
+        self.src_with_spaces = os.path.join(self.data_dir, "enrich", "enrich   .tsv")
+        self.h5_src = os.path.join(self.data_dir, "enrich2", "dummy.h5")
 
     def tearDown(self):
         for path in self.bin:
@@ -30,7 +27,7 @@ def tearDown(self):
 
     def test_sets_directory_as_input_directory_if_dst_is_none(self):
         p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA")
-        self.assertEqual(p.dst, os.path.join(TEST_DATA_DIR, "enrich"))
+        self.assertEqual(p.dst, os.path.join(self.data_dir, "enrich"))
 
     def test_error_file_not_readable(self):
         with self.assertRaises(IOError):
@@ -42,11 +39,11 @@ def test_expands_user_and_norms_dst(self):
 
     def test_dir_with_input_fname_appended_when_h5_and_dst_is_none(self):
         p = base.BaseProgram(src=self.h5_src, dst=None, wt_sequence="AAA")
-        self.assertEqual(p.dst, os.path.join(TEST_DATA_DIR, "enrich2", "dummy"))
-        self.bin.append(os.path.join(TEST_DATA_DIR, "enrich2", "dummy"))
+        self.assertEqual(p.dst, os.path.join(self.data_dir, "enrich2", "dummy"))
+        self.bin.append(os.path.join(self.data_dir, "enrich2", "dummy"))
 
     def test_creates_directory_tree_if_it_doesnt_exist(self):
-        output = os.path.join(TEST_DATA_DIR, "enrich2", "outer_dir", "inner_dir")
+        output = os.path.join(self.data_dir, "enrich2", "outer_dir", "inner_dir")
         base.BaseProgram(src=self.h5_src, dst=output, wt_sequence="AAA")
         self.assertTrue(os.path.isdir(output))
         self.bin.append(output)
@@ -77,7 +74,7 @@ def test_dst_filename_replaces_whitespace_with_underscores(self):
     def test_output_file_joins_dst_and_dst_filename(self):
         p = base.BaseProgram(src=self.src, wt_sequence="AAA")
         self.assertEqual(
-            p.output_file, os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv")
+            p.output_file, os.path.join(self.data_dir, "enrich", "mavedb_enrich.csv")
         )
 
     def test_output_directory_expands_user_and_norms_path(self):
@@ -95,9 +92,9 @@ class TestWtSequence(ProgramTestCase):
 
     def setUp(self):
         super().setUp()
-        self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
-        self.src_with_spaces = os.path.join(TEST_DATA_DIR, "enrich", "enrich   .tsv")
-        self.h5_src = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
+        self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv")
+        self.src_with_spaces = os.path.join(self.data_dir, "enrich", "enrich   .tsv")
+        self.h5_src = os.path.join(self.data_dir, "enrich2", "dummy.h5")
 
     def tearDown(self):
         for path in self.bin:
@@ -146,7 +143,7 @@ def test_wt_setter_value_error_not_valid_wt_sequence(self):
 class TestBaseProgramValidateAgainstWTSeq(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
+        self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv")
         self.base = base.BaseProgram(src=self.src, wt_sequence="ATG", one_based=True)
 
     def test_error_not_a_dna_sub(self):
@@ -197,7 +194,7 @@ def test_index_error_index_extends_beyond_indexable_wt_seq(self):
 class TestBaseProgramValidateAgainstProteinSeq(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.src = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
+        self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv")
         self.base = base.BaseProgram(src=self.src, wt_sequence="ATGAAA", one_based=True)
 
     def test_error_not_a_protein_sub(self):
diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py
index 2e95225..937a42b 100644
--- a/mavedbconvert/tests/test_empiric.py
+++ b/mavedbconvert/tests/test_empiric.py
@@ -10,13 +10,10 @@
 from . import ProgramTestCase
 
 
-TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
-
-
 class TestEmpiricInit(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
+        self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv")
 
     def test_error_offset_not_mult_of_three(self):
         with self.assertRaises(ValueError):
@@ -69,7 +66,7 @@ def test_adds_codon_pos_multiplied_by_3_to_position(self):
 class TestEmpiric(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.input = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx")
+        self.input = os.path.join(self.data_dir, "empiric", "empiric.xlsx")
         self.empiric = empiric.Empiric(
             src=self.input, wt_sequence="AAA", one_based=False
         )
@@ -176,10 +173,10 @@ def test_correctly_infers_hgvs_nt_positions_when_one_based(self):
         self.assertEqual(hgvs_nt, "c.[1G>A;2T>A;3A>T]")
 
 
-class TestEmpiricValidateColumns(TestCase):
+class TestEmpiricValidateColumns(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.input = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx")
+        self.input = os.path.join(self.data_dir, "empiric", "empiric.xlsx")
         self.empiric = empiric.Empiric(
             src=self.input, wt_sequence="AAA", one_based=False
         )
@@ -218,7 +215,7 @@ def test_sets_aa_column(self):
 class TestEmpiricParseInput(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.input = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx")
+        self.input = os.path.join(self.data_dir, "empiric", "empiric.xlsx")
         self.empiric = empiric.Empiric(
             src=self.input,
             wt_sequence="AAA",
@@ -355,10 +352,10 @@ def test_keeps_int_type_as_int(self):
 class TestEmpiricLoadInput(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx")
-        self.tmp_path = os.path.join(TEST_DATA_DIR, "empiric", "tmp.csv")
-        self.tmp_path_tsv = os.path.join(TEST_DATA_DIR, "empiric", "tmp.tsv")
-        self.tmp_excel_path = os.path.join(TEST_DATA_DIR, "empiric", "tmp.xlsx")
+        self.path = os.path.join(self.data_dir, "empiric", "empiric.xlsx")
+        self.tmp_path = os.path.join(self.data_dir, "empiric", "tmp.csv")
+        self.tmp_path_tsv = os.path.join(self.data_dir, "empiric", "tmp.tsv")
+        self.tmp_excel_path = os.path.join(self.data_dir, "empiric", "tmp.xlsx")
         self.bin.append(self.tmp_path)
         self.bin.append(self.tmp_path_tsv)
 
@@ -474,8 +471,8 @@ def test_applies_offset_to_position_column(self):
 class TestEmpiricConvert(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx")
-        self.expected = os.path.join(TEST_DATA_DIR, "empiric", "empiric_expected.csv")
+        self.path = os.path.join(self.data_dir, "empiric", "empiric.xlsx")
+        self.expected = os.path.join(self.data_dir, "empiric", "empiric_expected.csv")
         self.empiric = empiric.Empiric(
             src=self.path,
             wt_sequence="TTTTCTTATTGT",
diff --git a/mavedbconvert/tests/test_enrich.py b/mavedbconvert/tests/test_enrich.py
index c86ebe7..f18d1e4 100644
--- a/mavedbconvert/tests/test_enrich.py
+++ b/mavedbconvert/tests/test_enrich.py
@@ -9,9 +9,6 @@
 from . import ProgramTestCase
 
 
-TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
-
-
 WT = (
     "GACGTTCCACTGCCGGCTGGTTGGGAAATGGCTAAAACTAGTTCTGGTCAGCGTTACTTC"
     "CTGAACCACATCGACCAGACCACCACGTGGCAGGACCCGCGT"
@@ -21,7 +18,7 @@
 class TestEnrichInit(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich2.tsv")
+        self.path = os.path.join(self.data_dir, "enrich", "enrich2.tsv")
 
     def test_error_offset_not_mult_of_three(self):
         with self.assertRaises(ValueError):
@@ -34,7 +31,7 @@ def test_ok_is_mult_of_three(self):
 class TestEnrichParseRow(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
+        self.path = os.path.join(self.data_dir, "enrich", "enrich.tsv")
         self.enrich = enrich.Enrich(
             src=self.path,
             wt_sequence=WT,
@@ -127,7 +124,7 @@ def test_applies_offset_divided_by_3(self):
 class TestEnrichParseInput(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
+        self.path = os.path.join(self.data_dir, "enrich", "enrich.tsv")
         self.enrich = enrich.Enrich(
             src=self.path,
             wt_sequence=WT,
@@ -185,21 +182,22 @@ def test_removes_non_numeric(self):
 
 class TestEnrichLoadInput(ProgramTestCase):
     def setUp(self):
-        self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
-        self.path_1based = os.path.join(TEST_DATA_DIR, "enrich", "enrich_1based.tsv")
-        self.path_csv = os.path.join(TEST_DATA_DIR, "enrich", "enrich1.csv")
-        self.expected = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected.csv")
+        super().setUp()
+        self.path = os.path.join(self.data_dir, "enrich", "enrich.tsv")
+        self.path_1based = os.path.join(self.data_dir, "enrich", "enrich_1based.tsv")
+        self.path_csv = os.path.join(self.data_dir, "enrich", "enrich1.csv")
+        self.expected = os.path.join(self.data_dir, "enrich", "enrich_expected.csv")
         self.expected_offset = os.path.join(
-            TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv"
+            self.data_dir, "enrich", "enrich_expected_offset.csv"
         )
-        self.excel_path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.xlsx")
-        self.no_seq_id = os.path.join(TEST_DATA_DIR, "enrich", "enrich_no_seqid.tsv")
-        self.tmp_path = os.path.join(TEST_DATA_DIR, "enrich", "tmp.xlsx")
+        self.excel_path = os.path.join(self.data_dir, "enrich", "enrich.xlsx")
+        self.no_seq_id = os.path.join(self.data_dir, "enrich", "enrich_no_seqid.tsv")
+        self.tmp_path = os.path.join(self.data_dir, "enrich", "tmp.xlsx")
 
         self.bin = [
-            os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich1.csv"),
-            os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich1_1based.csv"),
-            os.path.join(TEST_DATA_DIR, "enrich", self.path_csv),
+            os.path.join(self.data_dir, "enrich", "mavedb_enrich1.csv"),
+            os.path.join(self.data_dir, "enrich", "mavedb_enrich1_1based.csv"),
+            os.path.join(self.data_dir, "enrich", self.path_csv),
         ]
 
     def test_error_seq_id_not_in_columns(self):
@@ -280,19 +278,20 @@ def test_table_and_excel_load_same_dataframe(self):
 
 class TestEnrichIntegration(ProgramTestCase):
     def setUp(self):
-        self.path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv")
-        self.path_1based = os.path.join(TEST_DATA_DIR, "enrich", "enrich_1based.tsv")
-        self.excel_path = os.path.join(TEST_DATA_DIR, "enrich", "enrich.xlsx")
-        self.no_seq_id = os.path.join(TEST_DATA_DIR, "enrich", "enrich_no_seqid.tsv")
+        super().setUp()
+        self.path = os.path.join(self.data_dir, "enrich", "enrich.tsv")
+        self.path_1based = os.path.join(self.data_dir, "enrich", "enrich_1based.tsv")
+        self.excel_path = os.path.join(self.data_dir, "enrich", "enrich.xlsx")
+        self.no_seq_id = os.path.join(self.data_dir, "enrich", "enrich_no_seqid.tsv")
 
-        self.expected = os.path.join(TEST_DATA_DIR, "enrich", "enrich_expected.csv")
+        self.expected = os.path.join(self.data_dir, "enrich", "enrich_expected.csv")
         self.expected_offset = os.path.join(
-            TEST_DATA_DIR, "enrich", "enrich_expected_offset.csv"
+            self.data_dir, "enrich", "enrich_expected_offset.csv"
         )
 
         self.bin = [
-            os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich.csv"),
-            os.path.join(TEST_DATA_DIR, "enrich", "mavedb_enrich_1based.csv"),
+            os.path.join(self.data_dir, "enrich", "mavedb_enrich.csv"),
+            os.path.join(self.data_dir, "enrich", "mavedb_enrich_1based.csv"),
         ]
 
     def test_saves_to_input_dst_by_default(self):
diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py
index d7f254f..1b9503b 100644
--- a/mavedbconvert/tests/test_enrich2.py
+++ b/mavedbconvert/tests/test_enrich2.py
@@ -15,19 +15,17 @@
 from . import ProgramTestCase
 
 
-TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
-
-
 # Utility tests
 # --------------------------------------------------------------------------- #
-class TestGetCountDataFrames(TestCase):
+class TestGetCountDataFrames(ProgramTestCase):
     """
     Test method get_count_dataframes checking if conditions are correctly
     parsed.
     """
 
     def setUp(self):
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "test_store.h5")
+        super().setUp()
+        self.path = os.path.join(self.data_dir, "enrich2", "test_store.h5")
         self.store = pd.HDFStore(self.path, "w")
         index = pd.MultiIndex.from_product(
             [["c1", "c2"], ["rep1", "rep2"], ["t0", "t1"]],
@@ -96,14 +94,15 @@ def test_column_names_combine_columns_using_ordering(self):
         self.assertListEqual(cnames, ["t0_rep1", "t1_rep1", "t0_rep2", "t1_rep2"])
 
 
-class TestReplicateScoreDataFrames(TestCase):
+class TestReplicateScoreDataFrames(ProgramTestCase):
     """
     Test method get_replicate_score_dataframes checking if conditions are
     correctly parsed.
     """
 
     def setUp(self):
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "test_store.h5")
+        super().setUp()
+        self.path = os.path.join(self.data_dir, "enrich2", "test_store.h5")
         self.store = pd.HDFStore(self.path, "w")
 
         shared_index = pd.MultiIndex.from_product(
@@ -125,6 +124,7 @@ def setUp(self):
         )
 
     def tearDown(self):
+        super().tearDown()
         self.store.close()
         if os.path.isfile(self.path):
             os.unlink(self.path)
@@ -276,7 +276,7 @@ def test_scores_and_counts_columns_separated_after_join(self):
 class TestEnrich2ConvertH5Filepath(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.h5")
+        self.path = os.path.join(self.data_dir, "enrich2", "enrich2.h5")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
         self.bin.append(self.path.replace(".h5", ""))
 
@@ -293,12 +293,12 @@ def test_concats_basename_elem_type_then_cnd_and_csv_ext(self):
 class TestEnrich2ConvertH5Df(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.h5")
+        self.path = os.path.join(self.data_dir, "enrich2", "enrich2.h5")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
-        self.bin.append(os.path.join(TEST_DATA_DIR, "enrich2", "enrich2"))
+        self.bin.append(os.path.join(self.data_dir, "enrich2", "enrich2"))
 
     def test_doesnt_open_invalid_rows_file_if_there_are_no_invalid_rows(self):
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
+        self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
         fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv"
 
@@ -331,7 +331,7 @@ def test_sets_index_as_input_index(self):
         assert_index_equal(result.index, df.index)
 
     def test_opens_invalid_rows_file_for_invalid_rows(self):
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
+        self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
         df = pd.DataFrame(data={"score": [1], "B": ["a"]}, index=["c.1T>G (p.Lys1Val)"])
         with self.assertRaises(ValueError):
@@ -340,11 +340,13 @@ def test_opens_invalid_rows_file_for_invalid_rows(self):
             )
 
         fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv"
+        print(fpath)
+        print(self.data_dir)
         self.assertTrue(os.path.isfile(fpath))
         self.bin.append(fpath)
 
     def test_invalid_rows_file_contains_error_description(self):
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
+        self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
         fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv"
 
@@ -367,7 +369,7 @@ class TestEnrich2ParseInput(ProgramTestCase):
     def setUp(self):
         super().setUp()
         self.wt = "GCTGAT"
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "test_store.h5")
+        self.path = os.path.join(self.data_dir, "enrich2", "test_store.h5")
         self.store = pd.HDFStore(self.path, "w")
         self.enrich2 = enrich2.Enrich2(
             self.path, wt_sequence=self.wt, offset=0, one_based=True
@@ -386,7 +388,7 @@ def setUp(self):
         self.files = [
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR,
+                    self.data_dir,
                     "enrich2",
                     "test_store",
                     "mavedb_test_store_synonymous_counts_c1.csv",
@@ -394,7 +396,7 @@ def setUp(self):
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR,
+                    self.data_dir,
                     "enrich2",
                     "test_store",
                     "mavedb_test_store_synonymous_counts_c2.csv",
@@ -402,7 +404,7 @@ def setUp(self):
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR,
+                    self.data_dir,
                     "enrich2",
                     "test_store",
                     "mavedb_test_store_synonymous_scores_c1.csv",
@@ -410,7 +412,7 @@ def setUp(self):
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR,
+                    self.data_dir,
                     "enrich2",
                     "test_store",
                     "mavedb_test_store_synonymous_scores_c2.csv",
@@ -418,7 +420,7 @@ def setUp(self):
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR,
+                    self.data_dir,
                     "enrich2",
                     "test_store",
                     "mavedb_test_store_variants_counts_c1.csv",
@@ -426,7 +428,7 @@ def setUp(self):
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR,
+                    self.data_dir,
                     "enrich2",
                     "test_store",
                     "mavedb_test_store_variants_counts_c2.csv",
@@ -434,7 +436,7 @@ def setUp(self):
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR,
+                    self.data_dir,
                     "enrich2",
                     "test_store",
                     "mavedb_test_store_variants_scores_c1.csv",
@@ -442,7 +444,7 @@ def setUp(self):
             ),
             os.path.normpath(
                 os.path.join(
-                    TEST_DATA_DIR,
+                    self.data_dir,
                     "enrich2",
                     "test_store",
                     "mavedb_test_store_variants_scores_c2.csv",
@@ -554,7 +556,7 @@ def parse_rows(self, variants, element=None):
 
     @mock.patch.object(pd.DataFrame, "to_csv", return_value=None)
     def test_saves_to_output_directory(self, patch):
-        output = os.path.join(TEST_DATA_DIR, "enrich2", "new")
+        output = os.path.join(self.data_dir, "enrich2", "new")
         p = enrich2.Enrich2(src=self.store, dst=output, wt_sequence=self.wt, offset=0)
         p.parse_input(p.load_input_file())
         for call_args in patch.call_args_list:
@@ -566,7 +568,7 @@ def test_saves_to_file_location_if_no_dst_supplied(self, patch):
         p = enrich2.Enrich2(src=self.store, wt_sequence=self.wt, offset=0)
         p.parse_input(self.enrich2.load_input_file())
         expected_base_path = os.path.normpath(
-            os.path.join(TEST_DATA_DIR, "enrich2", "test_store")
+            os.path.join(self.data_dir, "enrich2", "test_store")
         )
         for call_args in patch.call_args_list:
             self.assertIn(expected_base_path, call_args[0][0])
@@ -855,15 +857,15 @@ def test_drops_null_rows(self):
         self.assertNotIn("p.Ala1=", df_scores[constants.pro_variant_col])
 
 
-class TestEnrich2LoadInput(TestCase):
+class TestEnrich2LoadInput(ProgramTestCase):
     def test_error_file_not_h5_or_tsv(self):
-        path = os.path.join(TEST_DATA_DIR, "empiric", "empiric.xlsx")
+        path = os.path.join(self.data_dir, "empiric", "empiric.xlsx")
         p = enrich2.Enrich2(path, wt_sequence="AAA")
         with self.assertRaises(TypeError):
             p.load_input_file()
 
     def test_scores_tsv_missing_score_column(self):
-        path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
+        path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv")
         p = enrich2.Enrich2(
             path,
             wt_sequence="AAA",
@@ -875,7 +877,7 @@ def test_scores_tsv_missing_score_column(self):
             p.load_input_file()
 
     def test_input_type_counts_doesnt_raise_keyerror(self):
-        path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
+        path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv")
         p = enrich2.Enrich2(
             path,
             wt_sequence="AAA",
@@ -885,7 +887,7 @@ def test_input_type_counts_doesnt_raise_keyerror(self):
         p.load_input_file()
 
     def test_scores_tsv_missing_hgvs_column(self):
-        path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
+        path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv")
         p = enrich2.Enrich2(path, wt_sequence="AAA", hgvs_column="hgvs")
         with self.assertRaises(KeyError):
             p.load_input_file()
@@ -894,7 +896,7 @@ def test_scores_tsv_missing_hgvs_column(self):
 class TestEnrich2ParseRow(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
+        self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="ACT")
         self.bin.append(self.path.replace(".h5", ""))
 
@@ -966,7 +968,7 @@ def test_uses_three_qmarks(self):
 class TestProteinHGVSParsing(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
+        self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
         self.bin.append(self.path.replace(".h5", ""))
 
@@ -1026,7 +1028,7 @@ def test_maintains_ordering(self):
 class TestNucleotideHGVSParing(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
+        self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
         self.bin.append(self.path.replace(".h5", ""))
 
@@ -1080,7 +1082,7 @@ def test_strips_ws(self):
 class TestEnrich2MixedHGVSParsing(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
+        self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5")
         self.wt = "ACT"
         self.wt_aa = constants.AA_CODES[constants.CODON_TABLE[self.wt]]
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence=self.wt)
@@ -1174,7 +1176,7 @@ def test_protein_set_as_nt_when_table_is_not_syn_and_variant_is_special(self):
 class TestInferSilentAASub(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
+        self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA", offset=0)
         self.bin.append(self.path.replace(".h5", ""))
 
@@ -1219,7 +1221,7 @@ def test_correctly_infers_aa_from_silent_variants(self):
         self.assertEqual("p.Leu1=", self.enrich2.infer_silent_aa_substitution(group))
 
 
-class TestApplyOffset(TestCase):
+class TestApplyOffset(ProgramTestCase):
     def test_mixed_variant_uses_nt_position_to_compute_codon_pos(self):
         variant = "c.-9A>T (p.Thr2Pro), c.-6C>A (p.Gln3Lys)"
         offset = -10
@@ -1253,14 +1255,14 @@ def test_applies_offset_to_protein_variant_modulo_3(self):
     @mock.patch.object(enrich2.base.BaseProgram, "validate_against_wt_sequence")
     def test_validates_against_wt_sequence(self, patch):
         variant = "c.-9C>T"
-        path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
+        path = os.path.join(self.data_dir, "enrich2", "dummy.h5")
         p = enrich2.Enrich2(path, wt_sequence="ACT")
         enrich2.apply_offset(variant, offset=-10, enrich2=p)  # pass
         patch.assert_called_with(*("c.1C>T",))
 
     def test_value_error_base_mismatch_after_offset_applied(self):
         variant = "c.-9G>T"
-        path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
+        path = os.path.join(self.data_dir, "enrich2", "dummy.h5")
         p = enrich2.Enrich2(path, wt_sequence="ACT")
         with self.assertRaises(ValueError):
             enrich2.apply_offset(variant, offset=-10, enrich2=p)
@@ -1268,23 +1270,23 @@ def test_value_error_base_mismatch_after_offset_applied(self):
     @mock.patch.object(enrich2.base.BaseProgram, "validate_against_protein_sequence")
     def test_validates_against_pro_sequence(self, patch):
         variant = "p.Gly3Leu"
-        path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
+        path = os.path.join(self.data_dir, "enrich2", "dummy.h5")
         p = enrich2.Enrich2(path, wt_sequence="ACG")
         enrich2.apply_offset(variant, offset=6, enrich2=p)  # pass
         patch.assert_called_with(*("p.Gly1Leu",))
 
     def test_value_error_pro_mismatch_after_offset_applied(self):
         variant = "p.Gly3Leu"
-        path = os.path.join(TEST_DATA_DIR, "enrich2", "dummy.h5")
+        path = os.path.join(self.data_dir, "enrich2", "dummy.h5")
         p = enrich2.Enrich2(path, wt_sequence="ACG")
         with self.assertRaises(ValueError):
             enrich2.apply_offset(variant, offset=6, enrich2=p)
 
 
-class TestEnrich2Init(TestCase):
+class TestEnrich2Init(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")
+        self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv")
 
     def test_error_is_coding_and_offset_not_mult_of_three(self):
         with self.assertRaises(ValueError):
diff --git a/mavedbconvert/tests/test_fasta.py b/mavedbconvert/tests/test_fasta.py
index 970120a..c39fca9 100644
--- a/mavedbconvert/tests/test_fasta.py
+++ b/mavedbconvert/tests/test_fasta.py
@@ -1,48 +1,47 @@
 import os
 from unittest import TestCase
 
-from ..fasta import parse_fasta, split_fasta_path
-
+from . import ProgramTestCase
 
-TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
+from ..fasta import parse_fasta, split_fasta_path
 
 
-class TestFastaPath(TestCase):
+class TestFastaPath(ProgramTestCase):
     def test_infers_bzip(self):
         head, base, ext, compression = split_fasta_path(
-            os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.bz2")
+            os.path.join(self.data_dir, "fasta", "wt.fasta.bz2")
         )
         self.assertEqual(ext, ".fasta")
         self.assertEqual(compression, "bz2")
 
     def test_infers_gzip(self):
         head, base, ext, compression = split_fasta_path(
-            os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.gz")
+            os.path.join(self.data_dir, "fasta", "wt.fasta.gz")
         )
         self.assertEqual(ext, ".fasta")
         self.assertEqual(compression, "gz")
 
     def test_infers_uncompressed(self):
         head, base, ext, compression = split_fasta_path(
-            os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta")
+            os.path.join(self.data_dir, "fasta", "wt.fasta")
         )
         self.assertEqual(ext, ".fasta")
         self.assertEqual(compression, None)
 
         head, base, ext, compression = split_fasta_path(
-            os.path.join(TEST_DATA_DIR, "fasta", "lower.fa")
+            os.path.join(self.data_dir, "fasta", "lower.fa")
         )
         self.assertEqual(ext, ".fa")
         self.assertEqual(compression, None)
 
     def test_ioerror_invalid_ext(self):
         with self.assertRaises(IOError):
-            split_fasta_path(os.path.join(TEST_DATA_DIR, "enrich", "enrich.tsv"))
+            split_fasta_path(os.path.join(self.data_dir, "enrich", "enrich.tsv"))
 
 
-class TestFastaReader(TestCase):
+class TestFastaReader(ProgramTestCase):
     def test_can_read_first_sequence(self):
-        sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta"))
+        sequence = parse_fasta(os.path.join(self.data_dir, "fasta", "wt.fasta"))
         expected = (
             "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT"
             "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA"
@@ -52,7 +51,7 @@ def test_can_read_first_sequence(self):
         self.assertEqual(sequence, expected)
 
     def test_converts_to_uppercase(self):
-        sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "lower.fa"))
+        sequence = parse_fasta(os.path.join(self.data_dir, "fasta", "lower.fa"))
         expected = (
             "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT"
             "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA"
@@ -63,11 +62,11 @@ def test_converts_to_uppercase(self):
 
     def test_error_more_than_one_sequence(self):
         with self.assertRaises(ValueError):
-            parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "two.fasta"))
+            parse_fasta(os.path.join(self.data_dir, "fasta", "two.fasta"))
 
     def test_error_invalid_chars_in_sequence(self):
         with self.assertRaises(ValueError):
-            parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "invalid_chars.fasta"))
+            parse_fasta(os.path.join(self.data_dir, "fasta", "invalid_chars.fasta"))
 
     def test_ignores_blank_lines(self):
         expected = (
@@ -76,15 +75,15 @@ def test_ignores_blank_lines(self):
             "GAGGCTACGATCGTGCCTTGTGGCAAGTCTTCGCTCGCAC"
             "GCCCTTCCTACCGTGCTATGAGAGGAAATCTCGGGCGTAA"
         )
-        seq = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "spaces.fasta"))
+        seq = parse_fasta(os.path.join(self.data_dir, "fasta", "spaces.fasta"))
         self.assertEqual(seq, expected)
 
     def test_error_missing_gt_on_first_line(self):
         with self.assertRaises(IOError):
-            parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "bad_format.fasta"))
+            parse_fasta(os.path.join(self.data_dir, "fasta", "bad_format.fasta"))
 
     def test_can_open_with_gzip(self):
-        sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.gz"))
+        sequence = parse_fasta(os.path.join(self.data_dir, "fasta", "wt.fasta.gz"))
         expected = (
             "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT"
             "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA"
@@ -94,7 +93,7 @@ def test_can_open_with_gzip(self):
         self.assertEqual(sequence, expected)
 
     def test_can_open_with_bzip(self):
-        sequence = parse_fasta(os.path.join(TEST_DATA_DIR, "fasta", "wt.fasta.bz2"))
+        sequence = parse_fasta(os.path.join(self.data_dir, "fasta", "wt.fasta.bz2"))
         expected = (
             "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT"
             "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA"

From c68e67d2a4dc9265e6f9dfcaa9db40c38c9456ee Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Tue, 3 Sep 2019 14:52:51 +1000
Subject: [PATCH 07/26] black formatting

---
 mavedbconvert/tests/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mavedbconvert/tests/__init__.py b/mavedbconvert/tests/__init__.py
index 07d0191..d89a100 100644
--- a/mavedbconvert/tests/__init__.py
+++ b/mavedbconvert/tests/__init__.py
@@ -22,7 +22,9 @@
 class ProgramTestCase(TestCase):
     def setUp(self):
         self._data_dir = TemporaryDirectory()  # store the object
-        self.data_dir = os.path.join(self._data_dir.name, "data")  # store the directory path
+        self.data_dir = os.path.join(
+            self._data_dir.name, "data"
+        )  # store the directory path
         shutil.copytree(
             src=os.path.join(os.path.dirname(os.path.abspath(__file__)), "data"),
             dst=self.data_dir,

From 4360a1e5339ec570a9920de3675fb70364482025 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Wed, 4 Sep 2019 19:41:42 +1000
Subject: [PATCH 08/26] fixed bug in generating file path for test output

---
 mavedbconvert/tests/test_enrich2.py | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py
index 1b9503b..d3547f7 100644
--- a/mavedbconvert/tests/test_enrich2.py
+++ b/mavedbconvert/tests/test_enrich2.py
@@ -300,13 +300,15 @@ def setUp(self):
     def test_doesnt_open_invalid_rows_file_if_there_are_no_invalid_rows(self):
         self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
-        fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv"
+        invalid_rows_path = os.path.join(
+            os.path.dirname(self.path), "enrich2_invalid_rows.csv"
+        )
 
         df = pd.DataFrame(data={"score": [1]}, index=["c.1A>G (p.Lys1Val)"])
         self.enrich2.convert_h5_df(
             df=df, element=constants.variants_table, df_type=constants.score_type
         )
-        self.assertFalse(os.path.isfile(fpath))
+        self.assertFalse(os.path.isfile(invalid_rows_path))
 
     def test_drops_non_numeric_columns(self):
         df = pd.DataFrame(data={"score": [1], "B": ["a"]}, index=["c.1A>G (p.Lys1Val)"])
@@ -339,16 +341,19 @@ def test_opens_invalid_rows_file_for_invalid_rows(self):
                 df=df, element=constants.variants_table, df_type=constants.score_type
             )
 
-        fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv"
-        print(fpath)
-        print(self.data_dir)
-        self.assertTrue(os.path.isfile(fpath))
-        self.bin.append(fpath)
+        invalid_rows_path = os.path.join(
+            os.path.dirname(self.path), "enrich2_invalid_rows.csv"
+        )
+
+        self.assertTrue(os.path.isfile(invalid_rows_path))
+        self.bin.append(invalid_rows_path)
 
     def test_invalid_rows_file_contains_error_description(self):
         self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv")
         self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA")
-        fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv"
+        invalid_rows_path = os.path.join(
+            os.path.dirname(self.path), "enrich2_invalid_rows.csv"
+        )
 
         df = pd.DataFrame(
             data={"score": [1.1, 1.2]},
@@ -356,13 +361,13 @@ def test_invalid_rows_file_contains_error_description(self):
         )
 
         self.enrich2.convert_h5_df(df=df, df_type=constants.score_type, element=None)
-        self.assertTrue(os.path.isfile(fpath))
+        self.assertTrue(os.path.isfile(invalid_rows_path))
 
-        invalid = pd.read_csv(fpath, sep=",", index_col=0)
+        invalid = pd.read_csv(invalid_rows_path, sep=",", index_col=0)
         self.assertEqual(len(invalid), 1)
         self.assertEqual(invalid.index[0], "c.1T>G (p.Lys1Val)")
         self.assertIn("error_description", invalid.columns)
-        self.bin.append(fpath)
+        self.bin.append(invalid_rows_path)
 
 
 class TestEnrich2ParseInput(ProgramTestCase):

From 547f11a02958efa7719c60f604a2470a703bdcc0 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Wed, 4 Sep 2019 20:23:35 +1000
Subject: [PATCH 09/26] added some todo notes

---
 mavedbconvert/tests/__init__.py     | 2 ++
 mavedbconvert/tests/test_parsers.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/mavedbconvert/tests/__init__.py b/mavedbconvert/tests/__init__.py
index d89a100..24aa494 100644
--- a/mavedbconvert/tests/__init__.py
+++ b/mavedbconvert/tests/__init__.py
@@ -19,6 +19,8 @@
 ]
 
 
+# TODO: think up a better name for this class
+# TODO: remove the old self.bin stuff
 class ProgramTestCase(TestCase):
     def setUp(self):
         self._data_dir = TemporaryDirectory()  # store the object
diff --git a/mavedbconvert/tests/test_parsers.py b/mavedbconvert/tests/test_parsers.py
index 28560df..b8afeec 100644
--- a/mavedbconvert/tests/test_parsers.py
+++ b/mavedbconvert/tests/test_parsers.py
@@ -6,7 +6,7 @@
 
 from . import ProgramTestCase
 
-
+# TODO: convert these tests to use temp directories
 TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
 
 

From a8dcb856ffa0e0b26cbdb65352c2d78e8be4faab Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Wed, 4 Sep 2019 20:25:34 +1000
Subject: [PATCH 10/26] removed relative imports in tests and added main
 functions

---
 mavedbconvert/tests/test_base.py       |  9 +++++--
 mavedbconvert/tests/test_empiric.py    | 14 ++++++----
 mavedbconvert/tests/test_enrich.py     |  9 +++++--
 mavedbconvert/tests/test_enrich2.py    | 14 ++++++----
 mavedbconvert/tests/test_fasta.py      | 10 ++++---
 mavedbconvert/tests/test_filters.py    | 12 ++++++---
 mavedbconvert/tests/test_parsers.py    | 30 +++++++++++----------
 mavedbconvert/tests/test_utilities.py  | 36 ++++++++++++++------------
 mavedbconvert/tests/test_validators.py | 22 +++++++++-------
 9 files changed, 97 insertions(+), 59 deletions(-)

diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py
index 722981f..a678e31 100644
--- a/mavedbconvert/tests/test_base.py
+++ b/mavedbconvert/tests/test_base.py
@@ -1,9 +1,10 @@
 import os
 import mock
+import unittest
 
-from .. import base, exceptions
+from mavedbconvert import base, exceptions
 
-from . import ProgramTestCase
+from mavedbconvert.tests import ProgramTestCase
 
 
 class TestPaths(ProgramTestCase):
@@ -241,3 +242,7 @@ def test_index_error_index_extends_beyond_indexable_pro_seq(self):
         with self.assertRaises(IndexError):
             self.base.one_based = False
             self.base.validate_against_protein_sequence("p.Met2Lys")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py
index 937a42b..235a09a 100644
--- a/mavedbconvert/tests/test_empiric.py
+++ b/mavedbconvert/tests/test_empiric.py
@@ -1,13 +1,13 @@
 import os
-from unittest import TestCase
+import unittest
 
 import pandas as pd
 import numpy as np
 from pandas.testing import assert_frame_equal, assert_series_equal
 
-from .. import empiric, constants
+from mavedbconvert import empiric, constants
 
-from . import ProgramTestCase
+from mavedbconvert.tests import ProgramTestCase
 
 
 class TestEmpiricInit(ProgramTestCase):
@@ -23,7 +23,7 @@ def test_ok_is_mult_of_three(self):
         empiric.Empiric(src=self.path, wt_sequence="ATC", offset=3)
 
 
-class TestInferProEvent(TestCase):
+class TestInferProEvent(unittest.TestCase):
     def test_infers_equal_event(self):
         self.assertEqual(
             empiric.infer_pro_substitution(mut_aa="V", wt_aa="v", codon_pos=0),
@@ -43,7 +43,7 @@ def test_converts_triple_q_to_Xaa(self):
         )
 
 
-class TestInferNTEvent(TestCase):
+class TestInferNTEvent(unittest.TestCase):
     def test_infers_equal_event(self):
         self.assertEqual(
             empiric.infer_nt_substitution(wt_codon="aaa", mut_codon="AAA", codon_pos=0),
@@ -499,3 +499,7 @@ def test_integration(self):
             pd.read_csv(self.empiric.output_file, delimiter=","),
             pd.read_csv(self.expected, delimiter=","),
         )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mavedbconvert/tests/test_enrich.py b/mavedbconvert/tests/test_enrich.py
index f18d1e4..dc40baa 100644
--- a/mavedbconvert/tests/test_enrich.py
+++ b/mavedbconvert/tests/test_enrich.py
@@ -1,12 +1,13 @@
 import os
+import unittest
 
 import pandas as pd
 import numpy as np
 from pandas.testing import assert_frame_equal
 
-from .. import enrich, constants, utilities
+from mavedbconvert import enrich, constants, utilities
 
-from . import ProgramTestCase
+from mavedbconvert.tests import ProgramTestCase
 
 
 WT = (
@@ -331,3 +332,7 @@ def test_output_from_one_based_input(self):
         result = pd.read_csv(self.bin[1])
         expected = pd.read_csv(self.expected)
         assert_frame_equal(expected, result)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py
index d3547f7..f2d9c20 100644
--- a/mavedbconvert/tests/test_enrich2.py
+++ b/mavedbconvert/tests/test_enrich2.py
@@ -1,6 +1,6 @@
 import os
 import mock
-from unittest import TestCase
+import unittest
 from itertools import product
 
 import hgvsp
@@ -10,9 +10,9 @@
 import pandas as pd
 from pandas.testing import assert_index_equal, assert_frame_equal
 
-from .. import validators, enrich2, constants, exceptions
+from mavedbconvert import validators, enrich2, constants, exceptions
 
-from . import ProgramTestCase
+from mavedbconvert.tests import ProgramTestCase
 
 
 # Utility tests
@@ -74,7 +74,7 @@ def test_returns_empty_when_missing_counts_key(self):
         self.assertIsNone(cnd_df)
 
 
-class TestFlattenColumnNames(TestCase):
+class TestFlattenColumnNames(unittest.TestCase):
     def setUp(self):
         index = pd.MultiIndex.from_product(
             [["c1", "c2"], ["rep1", "rep2"], ["t0", "t1"]],
@@ -166,7 +166,7 @@ def test_assertion_error_scores_shared_scores_different_index(self):
             enrich2.get_replicate_score_dataframes(self.store)
 
 
-class TestDropNull(TestCase):
+class TestDropNull(unittest.TestCase):
     def test_calls_drop_na_rows_from_scores_inplace(self):
         df = pd.DataFrame({"A": [None, 1]})
         enrich2.drop_null(df)
@@ -1302,3 +1302,7 @@ def test_ok_is_coding_false_and_offset_not_mult_of_three(self):
 
     def test_ok_is_coding_and_offset_mult_of_three(self):
         enrich2.Enrich2(src=self.path, wt_sequence="ATC", is_coding=True, offset=-3)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mavedbconvert/tests/test_fasta.py b/mavedbconvert/tests/test_fasta.py
index c39fca9..6c6fafb 100644
--- a/mavedbconvert/tests/test_fasta.py
+++ b/mavedbconvert/tests/test_fasta.py
@@ -1,9 +1,9 @@
 import os
-from unittest import TestCase
+import unittest
 
-from . import ProgramTestCase
+from mavedbconvert.tests import ProgramTestCase
 
-from ..fasta import parse_fasta, split_fasta_path
+from mavedbconvert.fasta import parse_fasta, split_fasta_path
 
 
 class TestFastaPath(ProgramTestCase):
@@ -101,3 +101,7 @@ def test_can_open_with_bzip(self):
             "GCCCTTCCTACCGTGCTATGAGAGGAAATCTCGGGCGTAA"
         )
         self.assertEqual(sequence, expected)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mavedbconvert/tests/test_filters.py b/mavedbconvert/tests/test_filters.py
index 2b2cbb2..d7b037d 100644
--- a/mavedbconvert/tests/test_filters.py
+++ b/mavedbconvert/tests/test_filters.py
@@ -1,13 +1,13 @@
-from unittest import TestCase
+import unittest
 
 import pandas as pd
 import numpy as np
 
 
-from .. import filters, constants
+from mavedbconvert import filters, constants
 
 
-class TestDropNaColumns(TestCase):
+class TestDropNaColumns(unittest.TestCase):
     def test_drops_null_nt_column(self):
         df = pd.DataFrame(
             {
@@ -59,7 +59,7 @@ def test_does_not_drop_column_containing_non_null_values(self):
         self.assertIn("A", df)
 
 
-class TestDropNaRows(TestCase):
+class TestDropNaRows(unittest.TestCase):
     def test_drops_null_row(self):
         df = pd.DataFrame({"A": [None], "B": [np.NaN]})
         filters.drop_na_rows(df, inplace=True)
@@ -69,3 +69,7 @@ def test_does_not_drop_row_containing_non_null_values(self):
         df = pd.DataFrame({"A": [None], "B": [0.0]})
         filters.drop_na_rows(df, inplace=True)
         self.assertEqual(len(df), 1)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mavedbconvert/tests/test_parsers.py b/mavedbconvert/tests/test_parsers.py
index b8afeec..fabc836 100644
--- a/mavedbconvert/tests/test_parsers.py
+++ b/mavedbconvert/tests/test_parsers.py
@@ -1,16 +1,16 @@
 import os
 import mock
-from unittest import TestCase
+import unittest
 
-from .. import parsers, exceptions, constants
+from mavedbconvert import parsers, exceptions, constants
 
-from . import ProgramTestCase
+from mavedbconvert.tests import ProgramTestCase
 
 # TODO: convert these tests to use temp directories
 TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
 
 
-class TestParseBoolean(TestCase):
+class TestParseBoolean(unittest.TestCase):
     def test_true_if_str_of_true(self):
         self.assertTrue(parsers.parse_boolean(True))
         self.assertTrue(parsers.parse_boolean("True"))
@@ -22,7 +22,7 @@ def test_false_if_not_repr_of_true(self):
         self.assertFalse(parsers.parse_boolean(False))
 
 
-class TestParseNumeric(TestCase):
+class TestParseNumeric(unittest.TestCase):
     def test_converts_to_dtype(self):
         self.assertIsInstance(
             parsers.parse_numeric("1", name="int", dtype=float), float
@@ -35,7 +35,7 @@ def test_value_error_cannot_cast_to_dtype(self):
             parsers.parse_numeric("a", name="value", dtype=int)
 
 
-class TestParseString(TestCase):
+class TestParseString(unittest.TestCase):
     def test_returns_none_if_falsey(self):
         self.assertIsNone(parsers.parse_string(None))
         self.assertIsNone(parsers.parse_string(" "))
@@ -45,7 +45,7 @@ def test_returns_string_stripped_of_ws(self):
         self.assertEqual(parsers.parse_string(" aaa "), "aaa")
 
 
-class TestParseSrc(TestCase):
+class TestParseSrc(unittest.TestCase):
     @mock.patch(
         "mavedbconvert.parsers.parse_string",
         return_value=os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv"),
@@ -100,7 +100,7 @@ def test_makes_dst_directory_tree(self):
         self.bin.append(path)
 
 
-class TestParseProgram(TestCase):
+class TestParseProgram(unittest.TestCase):
     def test_ok_supported_program(self):
         for p in ("enrich2", "enrich", "empiric"):
             parsers.parse_program(p)
@@ -124,7 +124,7 @@ def test_sets_correct_program_from_dict(self):
             parsers.parse_program(program)
 
 
-class TestParseWildTypeSequence(TestCase):
+class TestParseWildTypeSequence(unittest.TestCase):
     def test_can_read_from_fasta(self):
         path = os.path.join(TEST_DATA_DIR, "fasta", "lower.fa")
         wtseq = parsers.parse_wt_sequence(path, program="enrich2", non_coding=True)
@@ -161,7 +161,7 @@ def test_ok_divisible_by_three_enrich_empiric(self):
         parsers.parse_wt_sequence("ATGATC", program="empiric")
 
 
-class TestParseInputType(TestCase):
+class TestParseInputType(unittest.TestCase):
     @mock.patch("mavedbconvert.parsers.parse_string", return_value="counts")
     def test_calls_parse_string(self, patch):
         parsers.parse_input_type(constants.count_type)
@@ -176,7 +176,7 @@ def test_ok_recognised_input_type(self):
             parsers.parse_input_type(v)
 
 
-class TestParseScoreColumn(TestCase):
+class TestParseScoreColumn(unittest.TestCase):
     @mock.patch("mavedbconvert.parsers.parse_string", return_value="score")
     def test_calls_parse_string(self, patch):
         parsers.parse_score_column("score", constants.score_type, program="enrich")
@@ -210,7 +210,7 @@ def test_ok_enrich2_and_column_not_defined(self):
         )
 
 
-class TestParseOffset(TestCase):
+class TestParseOffset(unittest.TestCase):
     @mock.patch("mavedbconvert.parsers.parse_numeric", return_value=0)
     def test_calls_parse_numeric(self, patch):
         parsers.parse_offset(0, program="enrich")
@@ -238,7 +238,7 @@ def test_ok_enrich_empiric_offset_mult_of_three(self):
         self.assertEqual(-6, parsers.parse_offset("-6", "empiric"))
 
 
-class TestParseDocopt(TestCase):
+class TestParseDocopt(unittest.TestCase):
     @staticmethod
     def mock_args(
         program=None,
@@ -315,3 +315,7 @@ def test_contains_skip_header_rows_key(self):
         args = self.mock_args()
         _, kwargs = parsers.parse_docopt(args)
         self.assertIn("skip_header_rows", kwargs)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mavedbconvert/tests/test_utilities.py b/mavedbconvert/tests/test_utilities.py
index 25b530f..7e1c244 100644
--- a/mavedbconvert/tests/test_utilities.py
+++ b/mavedbconvert/tests/test_utilities.py
@@ -1,11 +1,11 @@
-from unittest import TestCase
+import unittest
 
 import numpy as np
 
-from .. import utilities, constants, exceptions
+from mavedbconvert import utilities, constants, exceptions
 
 
-class TestSlicer(TestCase):
+class TestSlicer(unittest.TestCase):
     def test_slicer_returns_chunks_of_size_n(self):
         self.assertEqual(list(utilities.slicer("aaabbbccc", 3)), ["aaa", "bbb", "ccc"])
 
@@ -15,7 +15,7 @@ def test_slicer_returns_clips_if_cannot_chunk(self):
         )
 
 
-class TestTranslateWTSequence(TestCase):
+class TestTranslateWTSequence(unittest.TestCase):
     def test_translate_wt_seq_no_offset(self):
         self.assertEqual(utilities.translate_dna("GTGGCGGAG", offset=0), "VAE")
 
@@ -31,7 +31,7 @@ def test_error_offset_negative(self):
             utilities.translate_dna("GTGGCGGAG", offset=-3)
 
 
-class TestIsNull(TestCase):
+class TestIsNull(unittest.TestCase):
     def test_is_null_true_for_none_nan_and_na(self):
         for v in constants.extra_na:
             self.assertTrue(utilities.is_null(v))
@@ -46,7 +46,7 @@ def test_is_null_false(self):
         self.assertFalse(utilities.is_null("1.2"))
 
 
-class TestFormatColumn(TestCase):
+class TestFormatColumn(unittest.TestCase):
     def test_replaces_null_with_nan(self):
         self.assertIs(utilities.format_column(["   "])[0], np.NaN)
         self.assertIs(utilities.format_column(["none"])[0], np.NaN)
@@ -67,7 +67,7 @@ def test_replaces_null_with_none_if_astype_is_not_int_or_float(self):
         self.assertIs(utilities.format_column(["none"], astype=str)[0], None)
 
 
-class TestIsNumeric(TestCase):
+class TestIsNumeric(unittest.TestCase):
     def test_true_for_float(self):
         self.assertTrue(utilities.is_numeric(float))
 
@@ -90,7 +90,7 @@ def test_false_for_np_object(self):
         self.assertFalse(utilities.is_numeric(np.object))
 
 
-class TestNucleotideSubstitutionEvent(TestCase):
+class TestNucleotideSubstitutionEvent(unittest.TestCase):
     def test_parses_negative_positions(self):
         nt = utilities.NucleotideSubstitutionEvent("n.-100A>T")
         self.assertEqual(nt.position, -100)
@@ -166,7 +166,7 @@ def test_infers_within_frame_position(self):
         )
 
 
-class TestProteinSubstitutionEvent(TestCase):
+class TestProteinSubstitutionEvent(unittest.TestCase):
     def test_error_set_position_less_than_1(self):
         pro = utilities.ProteinSubstitutionEvent("p.Gly4Leu")
         with self.assertRaises(ValueError):
@@ -208,7 +208,7 @@ def test_formats_event_string_correctly(self):
         )
 
 
-class TestSplitVariant(TestCase):
+class TestSplitVariant(unittest.TestCase):
     def test_split_hgvs_singular_list_non_multi_variant(self):
         self.assertListEqual(["c.100A>G"], utilities.split_variant("c.100A>G"))
 
@@ -218,7 +218,7 @@ def test_split_hgvs_returns_list_of_single_variants(self):
         )
 
 
-class TestNormalizeVariant(TestCase):
+class TestNormalizeVariant(unittest.TestCase):
     def test_stripts_white_space(self):
         self.assertEqual(utilities.normalize_variant(" c.1A>G "), "c.1A>G")
 
@@ -256,7 +256,7 @@ def test_replaces_X_with_N_in_rna_variant(self):
         )
 
 
-class TestFormatVariant(TestCase):
+class TestFormatVariant(unittest.TestCase):
     def test_stripts_white_space(self):
         self.assertEqual(utilities.format_variant(" c.1A>G "), "c.1A>G")
 
@@ -264,7 +264,7 @@ def test_passes_on_none(self):
         self.assertIsNone(utilities.format_variant(None))
 
 
-class TestHGVSProFromEventList(TestCase):
+class TestHGVSProFromEventList(unittest.TestCase):
     def test_returns_single_event(self):
         result = utilities.hgvs_pro_from_event_list(["L4V"])
         self.assertEqual(result, "p.L4V")
@@ -292,7 +292,7 @@ def test_error_invalid_hgvs(self):
             utilities.hgvs_pro_from_event_list(["aaaa"])
 
 
-class TestHGVSNTFromEventList(TestCase):
+class TestHGVSNTFromEventList(unittest.TestCase):
     def test_returns_single_event(self):
         result = utilities.hgvs_nt_from_event_list(["45A>G"], prefix="c")
         self.assertEqual(result, "c.45A>G")
@@ -316,7 +316,7 @@ def test_error_invalid_hgvs(self):
             utilities.hgvs_nt_from_event_list(["aaaa"], prefix="c")
 
 
-class TestNonHgvsColumns(TestCase):
+class TestNonHgvsColumns(unittest.TestCase):
     def test_returns_non_hgvs_columns(self):
         self.assertListEqual(
             ["score"],
@@ -328,7 +328,7 @@ def test_returns_non_hgvs_columns(self):
         )
 
 
-class TestHgvsColumns(TestCase):
+class TestHgvsColumns(unittest.TestCase):
     def test_returns_only_hgvs_columns(self):
         self.assertListEqual(
             [constants.nt_variant_col, constants.pro_variant_col],
@@ -338,3 +338,7 @@ def test_returns_only_hgvs_columns(self):
                 )
             ),
         )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mavedbconvert/tests/test_validators.py b/mavedbconvert/tests/test_validators.py
index bad4735..301d4a6 100644
--- a/mavedbconvert/tests/test_validators.py
+++ b/mavedbconvert/tests/test_validators.py
@@ -1,13 +1,13 @@
-from unittest import TestCase
+import unittest
 
 import pandas as pd
 
 from hgvs.sequencevariant import SequenceVariant
 
-from .. import validators, constants, exceptions
+from mavedbconvert import validators, constants, exceptions
 
 
-class TestHGVSPatternsBackend(TestCase):
+class TestHGVSPatternsBackend(unittest.TestCase):
     def setUp(self):
         self.backend = validators.HGVSPatternsBackend()
 
@@ -25,7 +25,7 @@ def test_returns_str_variant(self):
         self.assertIsInstance(self.backend.validate("c.1A>G"), str)
 
 
-class TestHGVSBiocommonsBackend(TestCase):
+class TestHGVSBiocommonsBackend(unittest.TestCase):
     def setUp(self):
         self.backend = validators.HGVSBiocommonsBackend("NM_000000001.1")
 
@@ -68,7 +68,7 @@ def test_validate_hgvs_uses_dummy_ref_if_transcript_not_passed(self):
         )
 
 
-class TestValidateHGVS(TestCase):
+class TestValidateHGVS(unittest.TestCase):
     def test_uses_biocommons_backend_if_transcript_provided(self):
         result = validators.validate_variants(
             ["c.[1A>G;2A>G]"], n_jobs=2, verbose=0, transcript=constants.dummy_ref
@@ -80,7 +80,7 @@ def test_uses_patterns_backend_as_default(self):
         self.assertIsInstance(result[0], str)
 
 
-class TestDfValidators(TestCase):
+class TestDfValidators(unittest.TestCase):
     def test_validate_column_raise_keyerror_column_not_exist(self):
         df = pd.DataFrame({"a": [1]})
         with self.assertRaises(KeyError):
@@ -100,7 +100,7 @@ def test_pass_all_numeric(self):
         validators.validate_columns_are_numeric(df)
 
 
-class TestHGVSValidators(TestCase):
+class TestHGVSValidators(unittest.TestCase):
     def test_validate_hgvs_nt_not_redef_raise_error_if_redefined(self):
         df = pd.DataFrame({constants.nt_variant_col: ["a", "b"]})
         validators.validate_hgvs_nt_uniqueness(df)  # Should pass
@@ -124,7 +124,7 @@ def test_validate_hgvs_pro_not_redef_ignores_none(self):
         validators.validate_hgvs_pro_uniqueness(df)  # Should pass
 
 
-class TestMaveDBCompliance(TestCase):
+class TestMaveDBCompliance(unittest.TestCase):
     def test_error_primary_column_contains_null(self):
         df = pd.DataFrame(
             {
@@ -207,7 +207,7 @@ def test_keyerror_missing_score_column_df_type_is_scores(self):
             validators.validate_mavedb_compliance(df, df_type=constants.score_type)
 
 
-class TestValidateSameVariants(TestCase):
+class TestValidateSameVariants(unittest.TestCase):
     def test_ve_counts_defines_different_nt_variants(self):
         scores = pd.DataFrame(
             {
@@ -260,3 +260,7 @@ def test_error_dfs_define_different_hgvs_columns(self):
         counts = pd.DataFrame({constants.pro_variant_col: ["p.Leu75Glu"]})
         with self.assertRaises(AssertionError):
             validators.validate_datasets_define_same_variants(scores, counts)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 03b48a2bddeeff6303c450c20169510091e24fad Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 14 Oct 2019 14:47:07 +1100
Subject: [PATCH 11/26] refactored file path names

---
 mavedbconvert/tests/test_empiric.py | 46 ++++++++++++++---------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py
index 235a09a..44310a9 100644
--- a/mavedbconvert/tests/test_empiric.py
+++ b/mavedbconvert/tests/test_empiric.py
@@ -352,20 +352,20 @@ def test_keeps_int_type_as_int(self):
 class TestEmpiricLoadInput(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(self.data_dir, "empiric", "empiric.xlsx")
-        self.tmp_path = os.path.join(self.data_dir, "empiric", "tmp.csv")
-        self.tmp_path_tsv = os.path.join(self.data_dir, "empiric", "tmp.tsv")
-        self.tmp_excel_path = os.path.join(self.data_dir, "empiric", "tmp.xlsx")
+        self.excel_path = os.path.join(self.data_dir, "empiric", "empiric.xlsx")
+        self.csv_path = os.path.join(self.data_dir, "empiric", "tmp.csv")
+        self.tsv_path = os.path.join(self.data_dir, "empiric", "tmp.tsv")
+        self.multisheet_excel_path = os.path.join(self.data_dir, "empiric", "tmp.xlsx")
         self.bin.append(self.tmp_path)
         self.bin.append(self.tmp_path_tsv)
 
     def test_extra_na_load_as_nan(self):
         for value in constants.extra_na:
-            df = pd.read_excel(self.path)
+            df = pd.read_excel(self.excel_path)
             df["A"] = [value] * len(df)
-            df.to_csv(self.tmp_path, index=False)
+            df.to_csv(self.csv_path, index=False)
             e = empiric.Empiric(
-                src=self.tmp_path,
+                src=self.csv_path,
                 wt_sequence="TTTTCTTATTGT",
                 score_column="col_A",
                 input_type=constants.score_type,
@@ -380,9 +380,9 @@ def test_loads_first_sheet_by_default(self):
             {"Position": [0], "Amino Acid": ["K"], "score": [1.2]},
             {"Position": [1], "Amino Acid": ["G"], "score": [1.4]},
         ]
-        self.mock_multi_sheet_excel_file(self.tmp_excel_path, data)
+        self.mock_multi_sheet_excel_file(self.multisheet_excel_path, data)
         p = empiric.Empiric(
-            src=self.tmp_excel_path,
+            src=self.multisheet_excel_path,
             wt_sequence="TTTTCTTATTGT",
             score_column="score",
             input_type=constants.score_type,
@@ -392,10 +392,10 @@ def test_loads_first_sheet_by_default(self):
         assert_frame_equal(df, expected)
 
     def test_handles_csv(self):
-        df = pd.read_excel(self.path)
-        df.to_csv(self.tmp_path, index=False, sep=",")
+        df = pd.read_excel(self.excel_path)
+        df.to_csv(self.csv_path, index=False, sep=",")
         e = empiric.Empiric(
-            src=self.tmp_path,
+            src=self.csv_path,
             wt_sequence="TTTTCTTATTGT",
             score_column="col_A",
             input_type=constants.score_type,
@@ -405,10 +405,10 @@ def test_handles_csv(self):
         assert_frame_equal(result, df)
 
     def test_handles_tsv(self):
-        df = pd.read_excel(self.path)
-        df.to_csv(self.tmp_path_tsv, index=False, sep="\t")
+        df = pd.read_excel(self.excel_path)
+        df.to_csv(self.tsv_path, index=False, sep="\t")
         e = empiric.Empiric(
-            src=self.tmp_path_tsv,
+            src=self.tsv_path,
             wt_sequence="TTTTCTTATTGT",
             score_column="col_A",
             input_type=constants.score_type,
@@ -418,12 +418,12 @@ def test_handles_tsv(self):
         assert_frame_equal(result, df)
 
     def test_error_position_not_in_columns(self):
-        df = pd.read_excel(self.path)
+        df = pd.read_excel(self.excel_path)
         df = df.drop(columns=["Position"])
-        df.to_csv(self.tmp_path, index=False, sep="\t")
+        df.to_csv(self.csv_path, index=False, sep="\t")
         with self.assertRaises(ValueError):
             e = empiric.Empiric(
-                src=self.tmp_path,
+                src=self.csv_path,
                 wt_sequence="TTTTCTTATTGT",
                 score_column="col_A",
                 input_type=constants.score_type,
@@ -432,12 +432,12 @@ def test_error_position_not_in_columns(self):
             e.load_input_file()
 
     def test_error_amino_acid_not_in_columns(self):
-        df = pd.read_excel(self.path)
+        df = pd.read_excel(self.excel_path)
         df = df.drop(columns=["Amino Acid"])
-        df.to_csv(self.tmp_path, index=False, sep="\t")
+        df.to_csv(self.csv_path, index=False, sep="\t")
         with self.assertRaises(ValueError):
             e = empiric.Empiric(
-                src=self.tmp_path,
+                src=self.csv_path,
                 wt_sequence="TTTTCTTATTGT",
                 score_column="col_A",
                 input_type=constants.score_type,
@@ -448,7 +448,7 @@ def test_error_amino_acid_not_in_columns(self):
     def test_not_scores_column_but_input_type_is_scores(self):
         with self.assertRaises(ValueError):
             empiric.Empiric(
-                src=self.tmp_path,
+                src=self.csv_path,
                 wt_sequence="TTTTCTTATTGT",
                 score_column=None,
                 input_type=constants.score_type,
@@ -457,7 +457,7 @@ def test_not_scores_column_but_input_type_is_scores(self):
 
     def test_applies_offset_to_position_column(self):
         e = empiric.Empiric(
-            src=self.path,
+            src=self.excel_path,
             wt_sequence="TTTTCTTATTGT",
             score_column="col_A",
             input_type=constants.score_type,

From 1efefb11842b59e94a1f082218f9f0b63aabc4b6 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 14 Oct 2019 14:47:31 +1100
Subject: [PATCH 12/26] removed unnecessary temp file tracking

---
 mavedbconvert/tests/test_empiric.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py
index 44310a9..7c5080c 100644
--- a/mavedbconvert/tests/test_empiric.py
+++ b/mavedbconvert/tests/test_empiric.py
@@ -356,8 +356,6 @@ def setUp(self):
         self.csv_path = os.path.join(self.data_dir, "empiric", "tmp.csv")
         self.tsv_path = os.path.join(self.data_dir, "empiric", "tmp.tsv")
         self.multisheet_excel_path = os.path.join(self.data_dir, "empiric", "tmp.xlsx")
-        self.bin.append(self.tmp_path)
-        self.bin.append(self.tmp_path_tsv)
 
     def test_extra_na_load_as_nan(self):
         for value in constants.extra_na:

From ea5f62731aaa0546dbb62c40333fe3ea63562e1a Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 14 Oct 2019 14:48:58 +1100
Subject: [PATCH 13/26] refactored file path names

---
 mavedbconvert/tests/test_empiric.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py
index 7c5080c..ceeaac2 100644
--- a/mavedbconvert/tests/test_empiric.py
+++ b/mavedbconvert/tests/test_empiric.py
@@ -469,10 +469,10 @@ def test_applies_offset_to_position_column(self):
 class TestEmpiricConvert(ProgramTestCase):
     def setUp(self):
         super().setUp()
-        self.path = os.path.join(self.data_dir, "empiric", "empiric.xlsx")
+        self.excel_path = os.path.join(self.data_dir, "empiric", "empiric.xlsx")
         self.expected = os.path.join(self.data_dir, "empiric", "empiric_expected.csv")
         self.empiric = empiric.Empiric(
-            src=self.path,
+            src=self.excel_path,
             wt_sequence="TTTTCTTATTGT",
             score_column="col_A",
             input_type=constants.score_type,
@@ -486,7 +486,7 @@ def test_saves_to_dst(self):
 
     def test_integration(self):
         self.empiric = empiric.Empiric(
-            src=self.path,
+            src=self.excel_path,
             wt_sequence="TCTTATTGT",
             score_column="col_A",
             input_type=constants.score_type,

From 3812f660fc89f8c3ca730a20e7c2ec28c411c4f5 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 14 Oct 2019 14:49:11 +1100
Subject: [PATCH 14/26] removed unnecessary temp file tracking

---
 mavedbconvert/tests/test_empiric.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py
index ceeaac2..f68feed 100644
--- a/mavedbconvert/tests/test_empiric.py
+++ b/mavedbconvert/tests/test_empiric.py
@@ -478,7 +478,6 @@ def setUp(self):
             input_type=constants.score_type,
             one_based=False,
         )
-        self.bin.append(self.empiric.output_file)
 
     def test_saves_to_dst(self):
         self.empiric.convert()

From 82cce0b0c1f16de714860408b98b34f44f9d540a Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 14 Oct 2019 15:35:52 +1100
Subject: [PATCH 15/26] don't print the pandas row numbers when generating
 excel test files

---
 mavedbconvert/tests/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mavedbconvert/tests/__init__.py b/mavedbconvert/tests/__init__.py
index 24aa494..4f9493c 100644
--- a/mavedbconvert/tests/__init__.py
+++ b/mavedbconvert/tests/__init__.py
@@ -37,7 +37,7 @@ def mock_multi_sheet_excel_file(self, path, data):
         writer = pd.ExcelWriter(path, engine="xlsxwriter")
         for i, di in enumerate(data):
             df = pd.DataFrame(di)
-            df.to_excel(writer, sheet_name="Sheet{}".format(i))
+            df.to_excel(writer, sheet_name="Sheet{}".format(i), index=False)
         writer.save()
         self.bin.append(path)
 

From a08fc7b320da7df092d28adf433d418d4d539045 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 30 Mar 2020 16:11:53 +1100
Subject: [PATCH 16/26] added travis config

---
 .travis.yml | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..cb1cab5
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,29 @@
+language: python
+matrix:
+  include:
+    - python: "3.6"
+    - python: "3.7"
+    - python: "3.8"
+    - python: "3.8-dev"
+    - python: "3.9-dev"
+    - python: "pypy3"
+      env: NO_MYPY=true
+  allow_failures:
+    - python: "3.8-dev"
+    - python: "3.9-dev"
+    - python: "pypy3"
+      env: NO_MYPY=true
+install:
+  - pip3 install .
+before_script:
+  - pip3 install coverage
+  - pip3 install coveralls
+  - if ! $NO_MYPY; then pip3 install mypy; fi
+  - pip3 install sphinx
+  - pip3 install sphinx-rtd-theme
+script:
+  - coverage run --source fqfa -m unittest
+  - if ! $NO_MYPY; then mypy fqfa tests; fi
+  - cd docs && make doctest
+after_success:
+  - coveralls

From 7e26a39dc526f6a8bcc5950e38df33e598375ea7 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 30 Mar 2020 16:26:00 +1100
Subject: [PATCH 17/26] updated setup.py details

---
 setup.py | 50 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/setup.py b/setup.py
index e236978..ab4722f 100644
--- a/setup.py
+++ b/setup.py
@@ -1,17 +1,43 @@
-from setuptools import setup
+import setuptools
 
-setup(
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+setuptools.setup(
     name="mavedbconvert",
-    version="0.6.0-alpha",
-    packages=["mavedbconvert", "mavedbconvert.tests"],
-    url="https://github.com/FowlerLab/mavedb-convert",
-    license="AGPLv3",
-    author="Daniel Esposito",
-    author_email="esposito.d@wehi.edu.au",
+    version="0.1.0-beta",
+    author="Alan F Rubin, Daniel Esposito",
+    author_email="alan.rubin@wehi.edu.au",
     description=(
-        "A command line tool for converting alternate "
-        "file formats into a MaveDB compliant format."
+        "A command line tool for converting Multiplex Assay of Variant Effect datasets into a MaveDB-ready format."
     ),
-    # install_requires=open("requirements/install.txt", "rt").read().split("\n"),
-    entry_points={"console_scripts": ["mavedb-convert=mavedbconvert.main:main"]},
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/VariantEffect/mavedbconvert",
+    packages=setuptools.find_packages(),
+    classifiers=[
+        "Development Status :: 4 - Beta",
+        "Intended Audience :: Science/Research",
+        "Topic :: Scientific/Engineering :: Bio-Informatics",
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: BSD License",
+        "Operating System :: OS Independent",
+    ],
+    python_requires=">=3.6",
+    install_requires=[
+        "tables>=3.2.0",
+        "pandas>=0.18.0",
+        "xlrd >= 0.9.0",
+        "tqdm",
+        "docopt",
+        "hgvsp @ git+https://github.com/FowlerLab/hgvs-patterns",
+        "hgvs",
+        "requests",
+        "numpy",
+        "scipy",
+        "joblib",
+        "xlsxwriter",
+    ],
+    entry_points={"console_scripts": ["mavedbconvert=mavedbconvert.main:main"]},
+    test_suite="tests",
 )

From f23dd740319a7bfec1c1363d90047f169b2a1ab5 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 30 Mar 2020 17:06:02 +1100
Subject: [PATCH 18/26] renamed entrypoint

---
 mavedbconvert/main.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mavedbconvert/main.py b/mavedbconvert/main.py
index 2cb84c8..5df79d3 100644
--- a/mavedbconvert/main.py
+++ b/mavedbconvert/main.py
@@ -12,11 +12,11 @@
 All outputs are in 1-based coordinates.
 
 Usage:
-  mavedb-convert enrich2 <src> [--dst=D] [--wtseq=W] [--offset=O] [--hgvs-column=A] [--input-type=T] [--skip-header=H] [--skip-footer=H] [--non-coding]
-  mavedb-convert enrich <src> [--dst=D] [--wtseq=W] [--offset=O]  [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H]
-  mavedb-convert empiric <src> [--dst=D] [--wtseq=W] [--offset=O] [--zero-based] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H]
-  mavedb-convert -h | --help
-  mavedb-convert --version
+  mavedbconvert enrich2 <src> [--dst=D] [--wtseq=W] [--offset=O] [--hgvs-column=A] [--input-type=T] [--skip-header=H] [--skip-footer=H] [--non-coding]
+  mavedbconvert enrich <src> [--dst=D] [--wtseq=W] [--offset=O]  [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H]
+  mavedbconvert empiric <src> [--dst=D] [--wtseq=W] [--offset=O] [--zero-based] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H]
+  mavedbconvert -h | --help
+  mavedbconvert --version
   
 
 Options:

From 6e8189929166f34eee4372a563587b50749dacf4 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 30 Mar 2020 17:10:05 +1100
Subject: [PATCH 19/26] use unittest.mock instead of requiring mock

---
 mavedbconvert/tests/test_base.py    |  6 +++---
 mavedbconvert/tests/test_enrich2.py | 20 ++++++++++----------
 mavedbconvert/tests/test_parsers.py | 12 ++++++------
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py
index a678e31..7b356b3 100644
--- a/mavedbconvert/tests/test_base.py
+++ b/mavedbconvert/tests/test_base.py
@@ -1,6 +1,6 @@
 import os
-import mock
 import unittest
+from unittest.mock import patch
 
 from mavedbconvert import base, exceptions
 
@@ -49,12 +49,12 @@ def test_creates_directory_tree_if_it_doesnt_exist(self):
         self.assertTrue(os.path.isdir(output))
         self.bin.append(output)
 
-    @mock.patch("os.access")
+    @patch("os.access")
     def test_checks_read_permission(self, patch):
         p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA")
         self.assertEqual(patch.call_args_list[0][0], (p.src, os.R_OK))
 
-    @mock.patch("os.access")
+    @patch("os.access")
     def test_checks_write_permission(self, patch):
         p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA")
         self.assertEqual(patch.call_args_list[1][0], (p.dst, os.W_OK))
diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py
index f2d9c20..02ac246 100644
--- a/mavedbconvert/tests/test_enrich2.py
+++ b/mavedbconvert/tests/test_enrich2.py
@@ -1,6 +1,6 @@
 import os
-import mock
 import unittest
+from unittest.mock import patch
 from itertools import product
 
 import hgvsp
@@ -559,7 +559,7 @@ def tearDown(self):
     def parse_rows(self, variants, element=None):
         return [self.enrich2.parse_row((v, element)) for v in list(variants)]
 
-    @mock.patch.object(pd.DataFrame, "to_csv", return_value=None)
+    @patch.object(pd.DataFrame, "to_csv", return_value=None)
     def test_saves_to_output_directory(self, patch):
         output = os.path.join(self.data_dir, "enrich2", "new")
         p = enrich2.Enrich2(src=self.store, dst=output, wt_sequence=self.wt, offset=0)
@@ -568,7 +568,7 @@ def test_saves_to_output_directory(self, patch):
             self.assertIn(output, call_args[0][0])
         self.bin.append(output)
 
-    @mock.patch.object(pd.DataFrame, "to_csv", return_value=None)
+    @patch.object(pd.DataFrame, "to_csv", return_value=None)
     def test_saves_to_file_location_if_no_dst_supplied(self, patch):
         p = enrich2.Enrich2(src=self.store, wt_sequence=self.wt, offset=0)
         p.parse_input(self.enrich2.load_input_file())
@@ -578,13 +578,13 @@ def test_saves_to_file_location_if_no_dst_supplied(self, patch):
         for call_args in patch.call_args_list:
             self.assertIn(expected_base_path, call_args[0][0])
 
-    @mock.patch("mavedbconvert.enrich2.get_replicate_score_dataframes")
+    @patch("mavedbconvert.enrich2.get_replicate_score_dataframes")
     def test_iterates_over_all_available_tables(self, patch):
         self.enrich2.parse_input(self.enrich2.load_input_file())
         self.assertIn(constants.synonymous_table, patch.call_args_list[0][0])
         self.assertIn(constants.variants_table, patch.call_args_list[1][0])
 
-    @mock.patch(
+    @patch(
         "mavedbconvert.enrich2.drop_null",
         side_effect=lambda scores_df, counts_df: (scores_df, counts_df),
     )
@@ -928,7 +928,7 @@ def test_nt_variant_is_none_special_variant_is_from_synonymous_table(self):
             ),
         )
 
-    @mock.patch("mavedbconvert.enrich2.apply_offset", return_value="c.3T>C (p.Thr1=)")
+    @patch("mavedbconvert.enrich2.apply_offset", return_value="c.3T>C (p.Thr1=)")
     def test_calls_apply_offset_to_variant(self, patch):
         variant = "c.3T>C (p.=)"
         self.enrich2.parse_row((variant, None))
@@ -1123,7 +1123,7 @@ def test_variant_order_maintained(self):
         self.assertEqual(nt, "c.[1=;6T>G;2A>T]")
         self.assertEqual(pro, "p.[Lys1Ile;Asn2Lys]")
 
-    @mock.patch.object(
+    @patch.object(
         enrich2.Enrich2, "infer_silent_aa_substitution", return_value="p.Lys1="
     )
     def test_groups_codons(self, patch):
@@ -1132,7 +1132,7 @@ def test_groups_codons(self, patch):
         _, _ = self.enrich2.parse_mixed_variant(variant)
         patch.assert_called_with(*(["c.1=", "c.2="], variant))
 
-    @mock.patch.object(
+    @patch.object(
         enrich2.Enrich2, "infer_silent_aa_substitution", return_value="p.Lys1="
     )
     def test_calls_infer_with_synonymous_variants_only(self, patch):
@@ -1257,7 +1257,7 @@ def test_applies_offset_to_protein_variant_modulo_3(self):
         self.assertEqual("p.Leu7=, p.Leu10=", enrich2.apply_offset(variant, offset))
         self.assertEqual("p.Leu7=", enrich2.apply_offset("p.Leu10=", offset))
 
-    @mock.patch.object(enrich2.base.BaseProgram, "validate_against_wt_sequence")
+    @patch.object(enrich2.base.BaseProgram, "validate_against_wt_sequence")
     def test_validates_against_wt_sequence(self, patch):
         variant = "c.-9C>T"
         path = os.path.join(self.data_dir, "enrich2", "dummy.h5")
@@ -1272,7 +1272,7 @@ def test_value_error_base_mismatch_after_offset_applied(self):
         with self.assertRaises(ValueError):
             enrich2.apply_offset(variant, offset=-10, enrich2=p)
 
-    @mock.patch.object(enrich2.base.BaseProgram, "validate_against_protein_sequence")
+    @patch.object(enrich2.base.BaseProgram, "validate_against_protein_sequence")
     def test_validates_against_pro_sequence(self, patch):
         variant = "p.Gly3Leu"
         path = os.path.join(self.data_dir, "enrich2", "dummy.h5")
diff --git a/mavedbconvert/tests/test_parsers.py b/mavedbconvert/tests/test_parsers.py
index fabc836..f0e3bae 100644
--- a/mavedbconvert/tests/test_parsers.py
+++ b/mavedbconvert/tests/test_parsers.py
@@ -1,6 +1,6 @@
 import os
-import mock
 import unittest
+from unittest.mock import patch
 
 from mavedbconvert import parsers, exceptions, constants
 
@@ -46,7 +46,7 @@ def test_returns_string_stripped_of_ws(self):
 
 
 class TestParseSrc(unittest.TestCase):
-    @mock.patch(
+    @patch(
         "mavedbconvert.parsers.parse_string",
         return_value=os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv"),
     )
@@ -74,7 +74,7 @@ def test_error_file_is_a_dir(self):
 
 
 class TestParseDst(ProgramTestCase):
-    @mock.patch(
+    @patch(
         "mavedbconvert.parsers.parse_string", return_value=os.path.join(TEST_DATA_DIR)
     )
     def test_calls_parse_string(self, patch):
@@ -162,7 +162,7 @@ def test_ok_divisible_by_three_enrich_empiric(self):
 
 
 class TestParseInputType(unittest.TestCase):
-    @mock.patch("mavedbconvert.parsers.parse_string", return_value="counts")
+    @patch("mavedbconvert.parsers.parse_string", return_value="counts")
     def test_calls_parse_string(self, patch):
         parsers.parse_input_type(constants.count_type)
         patch.assert_called()
@@ -177,7 +177,7 @@ def test_ok_recognised_input_type(self):
 
 
 class TestParseScoreColumn(unittest.TestCase):
-    @mock.patch("mavedbconvert.parsers.parse_string", return_value="score")
+    @patch("mavedbconvert.parsers.parse_string", return_value="score")
     def test_calls_parse_string(self, patch):
         parsers.parse_score_column("score", constants.score_type, program="enrich")
         patch.assert_called()
@@ -211,7 +211,7 @@ def test_ok_enrich2_and_column_not_defined(self):
 
 
 class TestParseOffset(unittest.TestCase):
-    @mock.patch("mavedbconvert.parsers.parse_numeric", return_value=0)
+    @patch("mavedbconvert.parsers.parse_numeric", return_value=0)
     def test_calls_parse_numeric(self, patch):
         parsers.parse_offset(0, program="enrich")
         patch.assert_called()

From fb7f2ca03e49f618464106885b22fdc692be2c91 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 30 Mar 2020 17:10:33 +1100
Subject: [PATCH 20/26] requirements files no longer needed

---
 requirements/dev.txt     |  8 --------
 requirements/install.txt | 12 ------------
 2 files changed, 20 deletions(-)
 delete mode 100644 requirements/dev.txt
 delete mode 100644 requirements/install.txt

diff --git a/requirements/dev.txt b/requirements/dev.txt
deleted file mode 100644
index e003b59..0000000
--- a/requirements/dev.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-black
-ipython
-pylint
-tox
-pytest
-pytest-sugar
-pytest-cov
-mock
\ No newline at end of file
diff --git a/requirements/install.txt b/requirements/install.txt
deleted file mode 100644
index a1f1556..0000000
--- a/requirements/install.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-tables>=3.2.0
-pandas>=0.18.0,<=0.24.0
-xlrd >= 0.9.0
-tqdm
-docopt
-git+https://github.com/FowlerLab/hgvs-patterns.git
-hgvs
-requests
-numpy
-scipy
-joblib
-xlsxwriter
\ No newline at end of file

From d251b6883e97422cb0f4d2e6fcfd2f6dd9aba03e Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 30 Mar 2020 17:17:48 +1100
Subject: [PATCH 21/26] added expected failures for lingering Enrich2 converter
 issues

---
 mavedbconvert/tests/test_enrich2.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py
index 02ac246..384051d 100644
--- a/mavedbconvert/tests/test_enrich2.py
+++ b/mavedbconvert/tests/test_enrich2.py
@@ -49,16 +49,22 @@ def tearDown(self):
         if os.path.isfile(self.path):
             os.unlink(self.path)
 
+    # TODO: pandas deprecation fix
+    @unittest.expectedFailure
     def test_column_names_combine_selection_and_timepoint(self):
         cnd_df = enrich2.get_count_dataframe_by_condition(self.store, cnd="c1")
         self.assertListEqual(
             list(cnd_df.columns), ["rep1_t0", "rep1_t1", "rep2_t0", "rep2_t1"]
         )
 
+    # TODO: pandas deprecation fix
+    @unittest.expectedFailure
     def test_index_of_dfs_match_index_of_scores(self):
         cnd_df = enrich2.get_count_dataframe_by_condition(self.store, cnd="c1")
         assert_index_equal(self.store["/main/variants/scores/"].index, cnd_df.index)
 
+    # TODO: pandas deprecation fix
+    @unittest.expectedFailure
     def test_row_filled_with_nans_filtered_index_not_in_counts(self):
         cnd_df = enrich2.get_count_dataframe_by_condition(self.store, cnd="c1")
         self.assertTrue(np.all(cnd_df.loc["c.3A>G", :].isnull()))
@@ -808,6 +814,8 @@ def test_outputs_expected_variants_scores_for_each_condition(self):
             ].values.astype(float)
         assert_frame_equal(result, expected)
 
+    # TODO: pandas deprecation fix
+    @unittest.expectedFailure
     def test_counts_and_scores_output_define_same_variants_when_input_does_not(self):
         self.store.close()
         self.store = pd.HDFStore(self.path, "w")
@@ -869,6 +877,8 @@ def test_error_file_not_h5_or_tsv(self):
         with self.assertRaises(TypeError):
             p.load_input_file()
 
+    # TODO: sort out reason why hardcoding Enrich2 score column was a fix
+    @unittest.expectedFailure
     def test_scores_tsv_missing_score_column(self):
         path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv")
         p = enrich2.Enrich2(

From dd59f2495d9dca1a91c8058787e148a9dd3d2b06 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 30 Mar 2020 17:20:59 +1100
Subject: [PATCH 22/26] removed extraneous docs references

---
 .travis.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index cb1cab5..205f79c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -19,11 +19,8 @@ before_script:
   - pip3 install coverage
   - pip3 install coveralls
   - if ! $NO_MYPY; then pip3 install mypy; fi
-  - pip3 install sphinx
-  - pip3 install sphinx-rtd-theme
 script:
   - coverage run --source fqfa -m unittest
   - if ! $NO_MYPY; then mypy fqfa tests; fi
-  - cd docs && make doctest
 after_success:
   - coveralls

From 25cc364ddd4c260c74afcdc0787ce34b816aae69 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 30 Mar 2020 17:23:15 +1100
Subject: [PATCH 23/26] updated name and instructions

---
 README.md | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index d12bdc4..fb1d7c3 100644
--- a/README.md
+++ b/README.md
@@ -1,23 +1,21 @@
-# mavedb-convert
-A command line tool for converting alternate file formats into a MaveDB compliant format.
+# mavedbconvert
+A command line tool for converting Multiplex Assay of Variant Effect datasets into a MaveDB-ready format.
 
 # Installation
-Download the `mavedb-convert` source and navigate to that directory.
+Download the mavedbconvert source and navigate to that directory.
 We recommend creating a [virtual environment](https://docs.python.org/3/library/venv.html) before proceeding with the installation.
 
-Install dependencies using the requirements file and then install the package:
+Install the package using pip:
 
-    pip3 install -r requirements/install.txt
     pip3 install .
 
-Additional requirements needed for running the unit tests and doing package development are in `reuirements/dev.txt`
-
 ## Troubleshooting
-If you are a OSX user, you may experience header related issues when installing `pysam`. The current workaround 
-is to install pysam version `0.13` manually before installing the requirements:
+If you are a OSX user, you may experience header related issues when installing pysam. The current workaround 
+is to install pysam v0.13 manually before installing the requirements:
 
-    pip install pysam==0.13
+    pip3 install pysam==0.13
 
 This is the latest version known to compile without errors.
 
-Although `pysam` is not required for `mavedb-convert` directly, it is installed by some of our dependencies. Until it is removed or made optional by those libraries, `mavedb-convert` will unfortunately not be installable on Windows.
+Although pysam is not required for mavedbconvert directly, it is installed by some of our dependencies.
+Until it is removed or made optional by those libraries, mavedbconvert will unfortunately not be installable on Windows.

From 3e2a2b543d281a3beb5f333a64a73f618ef1a7ca Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 30 Mar 2020 17:26:38 +1100
Subject: [PATCH 24/26] fixed incorrect package name for coverage and mypy

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 205f79c..ba833e5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,7 +20,7 @@ before_script:
   - pip3 install coveralls
   - if ! $NO_MYPY; then pip3 install mypy; fi
 script:
-  - coverage run --source fqfa -m unittest
-  - if ! $NO_MYPY; then mypy fqfa tests; fi
+  - coverage run --source mavedbconvert -m unittest
+  - if ! $NO_MYPY; then mypy mavedbconvert tests; fi
 after_success:
   - coveralls

From 8f67a2fa4622e4791d7e3b9b91674ca7e00ccf56 Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Mon, 30 Mar 2020 17:36:34 +1100
Subject: [PATCH 25/26] added badges for master branch

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index fb1d7c3..dbc1b07 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,7 @@
+[![Build Status](https://travis-ci.com/VariantEffect/mavedbconvert.svg?branch=master)](https://travis-ci.com/VariantEffect/mavedbconvert)
+[![Coverage Status](https://coveralls.io/repos/github/VariantEffect/mavedbconvert/badge.svg?branch=master)](https://coveralls.io/github/VariantEffect/mavedbconvert?branch=master)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+
 # mavedbconvert
 A command line tool for converting Multiplex Assay of Variant Effect datasets into a MaveDB-ready format.
 

From b991b7fb29b6c69b54e949366ee8843762503ebd Mon Sep 17 00:00:00 2001
From: Alan Rubin <alan.rubin@wehi.edu.au>
Date: Wed, 1 Apr 2020 15:57:39 +1100
Subject: [PATCH 26/26] resolved expected failures

---
 mavedbconvert/enrich2.py            |  7 +++++--
 mavedbconvert/tests/test_enrich2.py | 10 ----------
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/mavedbconvert/enrich2.py b/mavedbconvert/enrich2.py
index d99ffe6..2263976 100644
--- a/mavedbconvert/enrich2.py
+++ b/mavedbconvert/enrich2.py
@@ -241,7 +241,10 @@ def get_count_dataframe_by_condition(
             return None
         filtered = store["/main/{}/scores".format(element)].index
 
-    df = store[count_key].loc[filtered, idx[cnd, :, :]]
+    # TODO: revisit tests to see if preserving the all-NA rows makes sense
+    store_df = store[count_key]
+    store_df = store_df.reindex(filtered)
+    df = store_df.loc[filtered, idx[cnd, :, :]]
     df.columns = flatten_column_names(df.columns, (1, 2))
     return df
 
@@ -275,7 +278,7 @@ def __init__(
             skip_header_rows=skip_header_rows,
             skip_footer_rows=skip_footer_rows,
             sheet_name=sheet_name,
-            score_column="score",
+            score_column=score_column,
             hgvs_column=hgvs_column,
             input_type=input_type,
         )
diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py
index 384051d..02ac246 100644
--- a/mavedbconvert/tests/test_enrich2.py
+++ b/mavedbconvert/tests/test_enrich2.py
@@ -49,22 +49,16 @@ def tearDown(self):
         if os.path.isfile(self.path):
             os.unlink(self.path)
 
-    # TODO: pandas deprecation fix
-    @unittest.expectedFailure
     def test_column_names_combine_selection_and_timepoint(self):
         cnd_df = enrich2.get_count_dataframe_by_condition(self.store, cnd="c1")
         self.assertListEqual(
             list(cnd_df.columns), ["rep1_t0", "rep1_t1", "rep2_t0", "rep2_t1"]
         )
 
-    # TODO: pandas deprecation fix
-    @unittest.expectedFailure
     def test_index_of_dfs_match_index_of_scores(self):
         cnd_df = enrich2.get_count_dataframe_by_condition(self.store, cnd="c1")
         assert_index_equal(self.store["/main/variants/scores/"].index, cnd_df.index)
 
-    # TODO: pandas deprecation fix
-    @unittest.expectedFailure
     def test_row_filled_with_nans_filtered_index_not_in_counts(self):
         cnd_df = enrich2.get_count_dataframe_by_condition(self.store, cnd="c1")
         self.assertTrue(np.all(cnd_df.loc["c.3A>G", :].isnull()))
@@ -814,8 +808,6 @@ def test_outputs_expected_variants_scores_for_each_condition(self):
             ].values.astype(float)
         assert_frame_equal(result, expected)
 
-    # TODO: pandas deprecation fix
-    @unittest.expectedFailure
     def test_counts_and_scores_output_define_same_variants_when_input_does_not(self):
         self.store.close()
         self.store = pd.HDFStore(self.path, "w")
@@ -877,8 +869,6 @@ def test_error_file_not_h5_or_tsv(self):
         with self.assertRaises(TypeError):
             p.load_input_file()
 
-    # TODO: sort out reason why hardcoding Enrich2 score column was a fix
-    @unittest.expectedFailure
     def test_scores_tsv_missing_score_column(self):
         path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv")
         p = enrich2.Enrich2(