diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..ba833e5 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,26 @@ +language: python +matrix: + include: + - python: "3.6" + - python: "3.7" + - python: "3.8" + - python: "3.8-dev" + - python: "3.9-dev" + - python: "pypy3" + env: NO_MYPY=true + allow_failures: + - python: "3.8-dev" + - python: "3.9-dev" + - python: "pypy3" + env: NO_MYPY=true +install: + - pip3 install . +before_script: + - pip3 install coverage + - pip3 install coveralls + - if ! $NO_MYPY; then pip3 install mypy; fi +script: + - coverage run --source mavedbconvert -m unittest + - if ! $NO_MYPY; then mypy mavedbconvert tests; fi +after_success: + - coveralls diff --git a/README.md b/README.md index d12bdc4..dbc1b07 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,25 @@ -# mavedb-convert -A command line tool for converting alternate file formats into a MaveDB compliant format. +[![Build Status](https://travis-ci.com/VariantEffect/mavedbconvert.svg?branch=master)](https://travis-ci.com/VariantEffect/mavedbconvert) +[![Coverage Status](https://coveralls.io/repos/github/VariantEffect/mavedbconvert/badge.svg?branch=master)](https://coveralls.io/github/VariantEffect/mavedbconvert?branch=master) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + +# mavedbconvert +A command line tool for converting Multiplex Assay of Variant Effect datasets into a MaveDB-ready format. # Installation -Download the `mavedb-convert` source and navigate to that directory. +Download the mavedbconvert source and navigate to that directory. We recommend creating a [virtual environment](https://docs.python.org/3/library/venv.html) before proceeding with the installation. -Install dependencies using the requirements file and then install the package: +Install the package using pip: - pip3 install -r requirements/install.txt pip3 install . -Additional requirements needed for running the unit tests and doing package development are in `reuirements/dev.txt` - ## Troubleshooting -If you are a OSX user, you may experience header related issues when installing `pysam`. The current workaround -is to install pysam version `0.13` manually before installing the requirements: +If you are a OSX user, you may experience header related issues when installing pysam. The current workaround +is to install pysam v0.13 manually before installing the requirements: - pip install pysam==0.13 + pip3 install pysam==0.13 This is the latest version known to compile without errors. -Although `pysam` is not required for `mavedb-convert` directly, it is installed by some of our dependencies. Until it is removed or made optional by those libraries, `mavedb-convert` will unfortunately not be installable on Windows. +Although pysam is not required for mavedbconvert directly, it is installed by some of our dependencies. +Until it is removed or made optional by those libraries, mavedbconvert will unfortunately not be installable on Windows. diff --git a/mavedbconvert/enrich2.py b/mavedbconvert/enrich2.py index d99ffe6..2263976 100644 --- a/mavedbconvert/enrich2.py +++ b/mavedbconvert/enrich2.py @@ -241,7 +241,10 @@ def get_count_dataframe_by_condition( return None filtered = store["/main/{}/scores".format(element)].index - df = store[count_key].loc[filtered, idx[cnd, :, :]] + # TODO: revisit tests to see if preserving the all-NA rows makes sense + store_df = store[count_key] + store_df = store_df.reindex(filtered) + df = store_df.loc[filtered, idx[cnd, :, :]] df.columns = flatten_column_names(df.columns, (1, 2)) return df @@ -275,7 +278,7 @@ def __init__( skip_header_rows=skip_header_rows, skip_footer_rows=skip_footer_rows, sheet_name=sheet_name, - score_column="score", + score_column=score_column, hgvs_column=hgvs_column, input_type=input_type, ) diff --git a/mavedbconvert/main.py b/mavedbconvert/main.py index 2cb84c8..5df79d3 100644 --- a/mavedbconvert/main.py +++ b/mavedbconvert/main.py @@ -12,11 +12,11 @@ All outputs are in 1-based coordinates. Usage: - mavedb-convert enrich2 [--dst=D] [--wtseq=W] [--offset=O] [--hgvs-column=A] [--input-type=T] [--skip-header=H] [--skip-footer=H] [--non-coding] - mavedb-convert enrich [--dst=D] [--wtseq=W] [--offset=O] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H] - mavedb-convert empiric [--dst=D] [--wtseq=W] [--offset=O] [--zero-based] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H] - mavedb-convert -h | --help - mavedb-convert --version + mavedbconvert enrich2 [--dst=D] [--wtseq=W] [--offset=O] [--hgvs-column=A] [--input-type=T] [--skip-header=H] [--skip-footer=H] [--non-coding] + mavedbconvert enrich [--dst=D] [--wtseq=W] [--offset=O] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H] + mavedbconvert empiric [--dst=D] [--wtseq=W] [--offset=O] [--zero-based] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H] + mavedbconvert -h | --help + mavedbconvert --version Options: diff --git a/mavedbconvert/tests/__init__.py b/mavedbconvert/tests/__init__.py index 37466b8..4f9493c 100644 --- a/mavedbconvert/tests/__init__.py +++ b/mavedbconvert/tests/__init__.py @@ -1,5 +1,7 @@ import os +import shutil from unittest import TestCase +from tempfile import TemporaryDirectory import pandas as pd @@ -17,19 +19,30 @@ ] +# TODO: think up a better name for this class +# TODO: remove the old self.bin stuff class ProgramTestCase(TestCase): def setUp(self): + self._data_dir = TemporaryDirectory() # store the object + self.data_dir = os.path.join( + self._data_dir.name, "data" + ) # store the directory path + shutil.copytree( + src=os.path.join(os.path.dirname(os.path.abspath(__file__)), "data"), + dst=self.data_dir, + ) self.bin = [] def mock_multi_sheet_excel_file(self, path, data): writer = pd.ExcelWriter(path, engine="xlsxwriter") for i, di in enumerate(data): df = pd.DataFrame(di) - df.to_excel(writer, sheet_name="Sheet{}".format(i)) + df.to_excel(writer, sheet_name="Sheet{}".format(i), index=False) writer.save() self.bin.append(path) def tearDown(self): + self._data_dir.cleanup() for path in self.bin: if os.path.exists(path) and os.path.isfile(path): os.remove(path) diff --git a/mavedbconvert/tests/data/empiric.xlsx b/mavedbconvert/tests/data/empiric/empiric.xlsx similarity index 100% rename from mavedbconvert/tests/data/empiric.xlsx rename to mavedbconvert/tests/data/empiric/empiric.xlsx diff --git a/mavedbconvert/tests/data/empiric_expected.csv b/mavedbconvert/tests/data/empiric/empiric_expected.csv similarity index 100% rename from mavedbconvert/tests/data/empiric_expected.csv rename to mavedbconvert/tests/data/empiric/empiric_expected.csv diff --git a/mavedbconvert/tests/data/enrich1.tsv b/mavedbconvert/tests/data/enrich/enrich.tsv similarity index 100% rename from mavedbconvert/tests/data/enrich1.tsv rename to mavedbconvert/tests/data/enrich/enrich.tsv diff --git a/mavedbconvert/tests/data/enrich1.xlsx b/mavedbconvert/tests/data/enrich/enrich.xlsx similarity index 100% rename from mavedbconvert/tests/data/enrich1.xlsx rename to mavedbconvert/tests/data/enrich/enrich.xlsx diff --git a/mavedbconvert/tests/data/enrich1_1based.tsv b/mavedbconvert/tests/data/enrich/enrich_1based.tsv similarity index 100% rename from mavedbconvert/tests/data/enrich1_1based.tsv rename to mavedbconvert/tests/data/enrich/enrich_1based.tsv diff --git a/mavedbconvert/tests/data/enrich1_expected.csv b/mavedbconvert/tests/data/enrich/enrich_expected.csv similarity index 100% rename from mavedbconvert/tests/data/enrich1_expected.csv rename to mavedbconvert/tests/data/enrich/enrich_expected.csv diff --git a/mavedbconvert/tests/data/enrich1_expected_offset.csv b/mavedbconvert/tests/data/enrich/enrich_expected_offset.csv similarity index 100% rename from mavedbconvert/tests/data/enrich1_expected_offset.csv rename to mavedbconvert/tests/data/enrich/enrich_expected_offset.csv diff --git a/mavedbconvert/tests/data/enrich1_no_seqid.tsv b/mavedbconvert/tests/data/enrich/enrich_no_seqid.tsv similarity index 100% rename from mavedbconvert/tests/data/enrich1_no_seqid.tsv rename to mavedbconvert/tests/data/enrich/enrich_no_seqid.tsv diff --git a/mavedbconvert/tests/data/dummy.h5 b/mavedbconvert/tests/data/enrich2/dummy.h5 similarity index 100% rename from mavedbconvert/tests/data/dummy.h5 rename to mavedbconvert/tests/data/enrich2/dummy.h5 diff --git a/mavedbconvert/tests/data/enrich2.tsv b/mavedbconvert/tests/data/enrich2/enrich2.tsv similarity index 100% rename from mavedbconvert/tests/data/enrich2.tsv rename to mavedbconvert/tests/data/enrich2/enrich2.tsv diff --git a/mavedbconvert/tests/data/bad_format.fasta b/mavedbconvert/tests/data/fasta/bad_format.fasta similarity index 100% rename from mavedbconvert/tests/data/bad_format.fasta rename to mavedbconvert/tests/data/fasta/bad_format.fasta diff --git a/mavedbconvert/tests/data/invalid_chars.fasta b/mavedbconvert/tests/data/fasta/invalid_chars.fasta similarity index 100% rename from mavedbconvert/tests/data/invalid_chars.fasta rename to mavedbconvert/tests/data/fasta/invalid_chars.fasta diff --git a/mavedbconvert/tests/data/lower.fa b/mavedbconvert/tests/data/fasta/lower.fa similarity index 100% rename from mavedbconvert/tests/data/lower.fa rename to mavedbconvert/tests/data/fasta/lower.fa diff --git a/mavedbconvert/tests/data/spaces.fasta b/mavedbconvert/tests/data/fasta/spaces.fasta similarity index 100% rename from mavedbconvert/tests/data/spaces.fasta rename to mavedbconvert/tests/data/fasta/spaces.fasta diff --git a/mavedbconvert/tests/data/two.fasta b/mavedbconvert/tests/data/fasta/two.fasta similarity index 100% rename from mavedbconvert/tests/data/two.fasta rename to mavedbconvert/tests/data/fasta/two.fasta diff --git a/mavedbconvert/tests/data/wt.fasta b/mavedbconvert/tests/data/fasta/wt.fasta similarity index 100% rename from mavedbconvert/tests/data/wt.fasta rename to mavedbconvert/tests/data/fasta/wt.fasta diff --git a/mavedbconvert/tests/data/wt.fasta.bz2 b/mavedbconvert/tests/data/fasta/wt.fasta.bz2 similarity index 100% rename from mavedbconvert/tests/data/wt.fasta.bz2 rename to mavedbconvert/tests/data/fasta/wt.fasta.bz2 diff --git a/mavedbconvert/tests/data/wt.fasta.gz b/mavedbconvert/tests/data/fasta/wt.fasta.gz similarity index 100% rename from mavedbconvert/tests/data/wt.fasta.gz rename to mavedbconvert/tests/data/fasta/wt.fasta.gz diff --git a/mavedbconvert/tests/test_base.py b/mavedbconvert/tests/test_base.py index 352482b..7b356b3 100644 --- a/mavedbconvert/tests/test_base.py +++ b/mavedbconvert/tests/test_base.py @@ -1,26 +1,23 @@ import os -import mock +import unittest +from unittest.mock import patch -from .. import base, exceptions +from mavedbconvert import base, exceptions -from . import ProgramTestCase +from mavedbconvert.tests import ProgramTestCase -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.normpath(BASE_DIR + "/data/") - - -class TestBaseProgram(ProgramTestCase): +class TestPaths(ProgramTestCase): """ Test __init__ correctly sets up read and write directories, - sequence information etc. + etc. """ def setUp(self): super().setUp() - self.src = os.path.join(DATA_DIR, "enrich1.tsv") - self.src_with_spaces = os.path.join(DATA_DIR, "enrich 1.tsv") - self.h5_src = os.path.join(DATA_DIR, "dummy.h5") + self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv") + self.src_with_spaces = os.path.join(self.data_dir, "enrich", "enrich .tsv") + self.h5_src = os.path.join(self.data_dir, "enrich2", "dummy.h5") def tearDown(self): for path in self.bin: @@ -31,7 +28,7 @@ def tearDown(self): def test_sets_directory_as_input_directory_if_dst_is_none(self): p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA") - self.assertEqual(p.dst, DATA_DIR) + self.assertEqual(p.dst, os.path.join(self.data_dir, "enrich")) def test_error_file_not_readable(self): with self.assertRaises(IOError): @@ -43,45 +40,43 @@ def test_expands_user_and_norms_dst(self): def test_dir_with_input_fname_appended_when_h5_and_dst_is_none(self): p = base.BaseProgram(src=self.h5_src, dst=None, wt_sequence="AAA") - self.assertEqual(p.dst, os.path.join(DATA_DIR, "dummy")) - self.bin.append(os.path.join(DATA_DIR, "dummy")) + self.assertEqual(p.dst, os.path.join(self.data_dir, "enrich2", "dummy")) + self.bin.append(os.path.join(self.data_dir, "enrich2", "dummy")) def test_creates_directory_tree_if_it_doesnt_exist(self): - output = os.path.join(DATA_DIR, "outer_dir/inner_dir/") + output = os.path.join(self.data_dir, "enrich2", "outer_dir", "inner_dir") base.BaseProgram(src=self.h5_src, dst=output, wt_sequence="AAA") self.assertTrue(os.path.isdir(output)) self.bin.append(output) - @mock.patch("os.access") + @patch("os.access") def test_checks_read_permission(self, patch): p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA") self.assertEqual(patch.call_args_list[0][0], (p.src, os.R_OK)) - @mock.patch("os.access") + @patch("os.access") def test_checks_write_permission(self, patch): p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA") self.assertEqual(patch.call_args_list[1][0], (p.dst, os.W_OK)) def test_splits_src_into_filename_and_ext(self): p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA") - self.assertEqual(p.src_filename, "enrich1") + self.assertEqual(p.src_filename, "enrich") self.assertEqual(p.ext, ".tsv") def test_lower_cases_ext(self): p = base.BaseProgram(src=self.src.replace("tsv", "TSV"), wt_sequence="AAA") self.assertEqual(p.ext, ".tsv") - def test_value_error_coding_offset_not_multiple_of_three(self): - with self.assertRaises(ValueError): - base.BaseProgram(src=self.src, wt_sequence="ATCA", offset=-1) - def test_dst_filename_replaces_whitespace_with_underscores(self): p = base.BaseProgram(src=self.src_with_spaces, wt_sequence="AAA") - self.assertEqual(p.dst_filename, "mavedb_enrich_1.csv") + self.assertEqual(p.dst_filename, "mavedb_enrich_.csv") def test_output_file_joins_dst_and_dst_filename(self): p = base.BaseProgram(src=self.src, wt_sequence="AAA") - self.assertEqual(p.output_file, os.path.join(DATA_DIR, "mavedb_enrich1.csv")) + self.assertEqual( + p.output_file, os.path.join(self.data_dir, "enrich", "mavedb_enrich.csv") + ) def test_output_directory_expands_user_and_norms_path(self): p = base.BaseProgram(src=self.src, wt_sequence="AAA") @@ -90,6 +85,29 @@ def test_output_directory_expands_user_and_norms_path(self): p.output_directory, os.path.join(os.path.expanduser("~"), "user") ) + +class TestWtSequence(ProgramTestCase): + """ + Test __init__ correctly sets up sequence information etc. + """ + + def setUp(self): + super().setUp() + self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv") + self.src_with_spaces = os.path.join(self.data_dir, "enrich", "enrich .tsv") + self.h5_src = os.path.join(self.data_dir, "enrich2", "dummy.h5") + + def tearDown(self): + for path in self.bin: + if os.path.exists(path) and os.path.isfile(path): + os.remove(path) + elif os.path.exists(path) and os.path.isdir(path): + os.removedirs(path) + + def test_value_error_coding_offset_not_multiple_of_three(self): + with self.assertRaises(ValueError): + base.BaseProgram(src=self.src, wt_sequence="ATCA", offset=-1) + # --- Test property setters --- # def test_wt_setter_upper_cases_wt_sequence(self): p = base.BaseProgram(src=self.src, wt_sequence="AAA") @@ -126,7 +144,7 @@ def test_wt_setter_value_error_not_valid_wt_sequence(self): class TestBaseProgramValidateAgainstWTSeq(ProgramTestCase): def setUp(self): super().setUp() - self.src = os.path.join(DATA_DIR, "enrich1.tsv") + self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv") self.base = base.BaseProgram(src=self.src, wt_sequence="ATG", one_based=True) def test_error_not_a_dna_sub(self): @@ -177,7 +195,7 @@ def test_index_error_index_extends_beyond_indexable_wt_seq(self): class TestBaseProgramValidateAgainstProteinSeq(ProgramTestCase): def setUp(self): super().setUp() - self.src = os.path.join(DATA_DIR, "enrich1.tsv") + self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv") self.base = base.BaseProgram(src=self.src, wt_sequence="ATGAAA", one_based=True) def test_error_not_a_protein_sub(self): @@ -224,3 +242,7 @@ def test_index_error_index_extends_beyond_indexable_pro_seq(self): with self.assertRaises(IndexError): self.base.one_based = False self.base.validate_against_protein_sequence("p.Met2Lys") + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_empiric.py b/mavedbconvert/tests/test_empiric.py index ec8b9b7..f68feed 100644 --- a/mavedbconvert/tests/test_empiric.py +++ b/mavedbconvert/tests/test_empiric.py @@ -1,23 +1,19 @@ import os -from unittest import TestCase +import unittest import pandas as pd import numpy as np from pandas.testing import assert_frame_equal, assert_series_equal -from .. import empiric, constants +from mavedbconvert import empiric, constants -from . import ProgramTestCase - - -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.normpath(BASE_DIR + "/data/") +from mavedbconvert.tests import ProgramTestCase class TestEmpiricInit(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich2.tsv") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") def test_error_offset_not_mult_of_three(self): with self.assertRaises(ValueError): @@ -27,7 +23,7 @@ def test_ok_is_mult_of_three(self): empiric.Empiric(src=self.path, wt_sequence="ATC", offset=3) -class TestInferProEvent(TestCase): +class TestInferProEvent(unittest.TestCase): def test_infers_equal_event(self): self.assertEqual( empiric.infer_pro_substitution(mut_aa="V", wt_aa="v", codon_pos=0), @@ -47,7 +43,7 @@ def test_converts_triple_q_to_Xaa(self): ) -class TestInferNTEvent(TestCase): +class TestInferNTEvent(unittest.TestCase): def test_infers_equal_event(self): self.assertEqual( empiric.infer_nt_substitution(wt_codon="aaa", mut_codon="AAA", codon_pos=0), @@ -70,7 +66,7 @@ def test_adds_codon_pos_multiplied_by_3_to_position(self): class TestEmpiric(ProgramTestCase): def setUp(self): super().setUp() - self.input = os.path.join(DATA_DIR, "empiric.xlsx") + self.input = os.path.join(self.data_dir, "empiric", "empiric.xlsx") self.empiric = empiric.Empiric( src=self.input, wt_sequence="AAA", one_based=False ) @@ -177,10 +173,10 @@ def test_correctly_infers_hgvs_nt_positions_when_one_based(self): self.assertEqual(hgvs_nt, "c.[1G>A;2T>A;3A>T]") -class TestEmpiricValidateColumns(TestCase): +class TestEmpiricValidateColumns(ProgramTestCase): def setUp(self): super().setUp() - self.input = os.path.join(DATA_DIR, "empiric.xlsx") + self.input = os.path.join(self.data_dir, "empiric", "empiric.xlsx") self.empiric = empiric.Empiric( src=self.input, wt_sequence="AAA", one_based=False ) @@ -219,7 +215,7 @@ def test_sets_aa_column(self): class TestEmpiricParseInput(ProgramTestCase): def setUp(self): super().setUp() - self.input = os.path.join(DATA_DIR, "empiric.xlsx") + self.input = os.path.join(self.data_dir, "empiric", "empiric.xlsx") self.empiric = empiric.Empiric( src=self.input, wt_sequence="AAA", @@ -356,20 +352,18 @@ def test_keeps_int_type_as_int(self): class TestEmpiricLoadInput(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "empiric.xlsx") - self.tmp_path = os.path.join(DATA_DIR, "tmp.csv") - self.tmp_path_tsv = os.path.join(DATA_DIR, "tmp.tsv") - self.tmp_excel_path = os.path.join(DATA_DIR, "tmp.xlsx") - self.bin.append(self.tmp_path) - self.bin.append(self.tmp_path_tsv) + self.excel_path = os.path.join(self.data_dir, "empiric", "empiric.xlsx") + self.csv_path = os.path.join(self.data_dir, "empiric", "tmp.csv") + self.tsv_path = os.path.join(self.data_dir, "empiric", "tmp.tsv") + self.multisheet_excel_path = os.path.join(self.data_dir, "empiric", "tmp.xlsx") def test_extra_na_load_as_nan(self): for value in constants.extra_na: - df = pd.read_excel(self.path) + df = pd.read_excel(self.excel_path) df["A"] = [value] * len(df) - df.to_csv(self.tmp_path, index=False) + df.to_csv(self.csv_path, index=False) e = empiric.Empiric( - src=self.tmp_path, + src=self.csv_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, @@ -384,9 +378,9 @@ def test_loads_first_sheet_by_default(self): {"Position": [0], "Amino Acid": ["K"], "score": [1.2]}, {"Position": [1], "Amino Acid": ["G"], "score": [1.4]}, ] - self.mock_multi_sheet_excel_file(self.tmp_excel_path, data) + self.mock_multi_sheet_excel_file(self.multisheet_excel_path, data) p = empiric.Empiric( - src=self.tmp_excel_path, + src=self.multisheet_excel_path, wt_sequence="TTTTCTTATTGT", score_column="score", input_type=constants.score_type, @@ -396,10 +390,10 @@ def test_loads_first_sheet_by_default(self): assert_frame_equal(df, expected) def test_handles_csv(self): - df = pd.read_excel(self.path) - df.to_csv(self.tmp_path, index=False, sep=",") + df = pd.read_excel(self.excel_path) + df.to_csv(self.csv_path, index=False, sep=",") e = empiric.Empiric( - src=self.tmp_path, + src=self.csv_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, @@ -409,10 +403,10 @@ def test_handles_csv(self): assert_frame_equal(result, df) def test_handles_tsv(self): - df = pd.read_excel(self.path) - df.to_csv(self.tmp_path_tsv, index=False, sep="\t") + df = pd.read_excel(self.excel_path) + df.to_csv(self.tsv_path, index=False, sep="\t") e = empiric.Empiric( - src=self.tmp_path_tsv, + src=self.tsv_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, @@ -422,12 +416,12 @@ def test_handles_tsv(self): assert_frame_equal(result, df) def test_error_position_not_in_columns(self): - df = pd.read_excel(self.path) + df = pd.read_excel(self.excel_path) df = df.drop(columns=["Position"]) - df.to_csv(self.tmp_path, index=False, sep="\t") + df.to_csv(self.csv_path, index=False, sep="\t") with self.assertRaises(ValueError): e = empiric.Empiric( - src=self.tmp_path, + src=self.csv_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, @@ -436,12 +430,12 @@ def test_error_position_not_in_columns(self): e.load_input_file() def test_error_amino_acid_not_in_columns(self): - df = pd.read_excel(self.path) + df = pd.read_excel(self.excel_path) df = df.drop(columns=["Amino Acid"]) - df.to_csv(self.tmp_path, index=False, sep="\t") + df.to_csv(self.csv_path, index=False, sep="\t") with self.assertRaises(ValueError): e = empiric.Empiric( - src=self.tmp_path, + src=self.csv_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, @@ -452,7 +446,7 @@ def test_error_amino_acid_not_in_columns(self): def test_not_scores_column_but_input_type_is_scores(self): with self.assertRaises(ValueError): empiric.Empiric( - src=self.tmp_path, + src=self.csv_path, wt_sequence="TTTTCTTATTGT", score_column=None, input_type=constants.score_type, @@ -461,7 +455,7 @@ def test_not_scores_column_but_input_type_is_scores(self): def test_applies_offset_to_position_column(self): e = empiric.Empiric( - src=self.path, + src=self.excel_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, @@ -475,16 +469,15 @@ def test_applies_offset_to_position_column(self): class TestEmpiricConvert(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "empiric.xlsx") - self.expected = os.path.join(DATA_DIR, "empiric_expected.csv") + self.excel_path = os.path.join(self.data_dir, "empiric", "empiric.xlsx") + self.expected = os.path.join(self.data_dir, "empiric", "empiric_expected.csv") self.empiric = empiric.Empiric( - src=self.path, + src=self.excel_path, wt_sequence="TTTTCTTATTGT", score_column="col_A", input_type=constants.score_type, one_based=False, ) - self.bin.append(self.empiric.output_file) def test_saves_to_dst(self): self.empiric.convert() @@ -492,7 +485,7 @@ def test_saves_to_dst(self): def test_integration(self): self.empiric = empiric.Empiric( - src=self.path, + src=self.excel_path, wt_sequence="TCTTATTGT", score_column="col_A", input_type=constants.score_type, @@ -503,3 +496,7 @@ def test_integration(self): pd.read_csv(self.empiric.output_file, delimiter=","), pd.read_csv(self.expected, delimiter=","), ) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_enrich.py b/mavedbconvert/tests/test_enrich.py index b30e3b0..dc40baa 100644 --- a/mavedbconvert/tests/test_enrich.py +++ b/mavedbconvert/tests/test_enrich.py @@ -1,16 +1,13 @@ import os +import unittest import pandas as pd import numpy as np from pandas.testing import assert_frame_equal -from .. import enrich, constants, utilities +from mavedbconvert import enrich, constants, utilities -from . import ProgramTestCase - - -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.normpath(BASE_DIR + "/data/") +from mavedbconvert.tests import ProgramTestCase WT = ( @@ -22,7 +19,7 @@ class TestEnrichInit(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich2.tsv") + self.path = os.path.join(self.data_dir, "enrich", "enrich2.tsv") def test_error_offset_not_mult_of_three(self): with self.assertRaises(ValueError): @@ -35,7 +32,7 @@ def test_ok_is_mult_of_three(self): class TestEnrichParseRow(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich1.tsv") + self.path = os.path.join(self.data_dir, "enrich", "enrich.tsv") self.enrich = enrich.Enrich( src=self.path, wt_sequence=WT, @@ -128,7 +125,7 @@ def test_applies_offset_divided_by_3(self): class TestEnrichParseInput(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich1.tsv") + self.path = os.path.join(self.data_dir, "enrich", "enrich.tsv") self.enrich = enrich.Enrich( src=self.path, wt_sequence=WT, @@ -186,19 +183,22 @@ def test_removes_non_numeric(self): class TestEnrichLoadInput(ProgramTestCase): def setUp(self): - self.path = os.path.join(DATA_DIR, "enrich1.tsv") - self.path_1based = os.path.join(DATA_DIR, "enrich1_1based.tsv") - self.path_csv = os.path.join(DATA_DIR, "enrich1.csv") - self.expected = os.path.join(DATA_DIR, "enrich1_expected.csv") - self.expected_offset = os.path.join(DATA_DIR, "enrich1_expected_offset.csv") - self.excel_path = os.path.join(DATA_DIR, "enrich1.xlsx") - self.no_seq_id = os.path.join(DATA_DIR, "enrich1_no_seqid.tsv") - self.tmp_path = os.path.join(DATA_DIR, "tmp.xlsx") + super().setUp() + self.path = os.path.join(self.data_dir, "enrich", "enrich.tsv") + self.path_1based = os.path.join(self.data_dir, "enrich", "enrich_1based.tsv") + self.path_csv = os.path.join(self.data_dir, "enrich", "enrich1.csv") + self.expected = os.path.join(self.data_dir, "enrich", "enrich_expected.csv") + self.expected_offset = os.path.join( + self.data_dir, "enrich", "enrich_expected_offset.csv" + ) + self.excel_path = os.path.join(self.data_dir, "enrich", "enrich.xlsx") + self.no_seq_id = os.path.join(self.data_dir, "enrich", "enrich_no_seqid.tsv") + self.tmp_path = os.path.join(self.data_dir, "enrich", "tmp.xlsx") self.bin = [ - os.path.join(DATA_DIR, "mavedb_enrich1.csv"), - os.path.join(DATA_DIR, "mavedb_enrich1_1based.csv"), - os.path.join(DATA_DIR, self.path_csv), + os.path.join(self.data_dir, "enrich", "mavedb_enrich1.csv"), + os.path.join(self.data_dir, "enrich", "mavedb_enrich1_1based.csv"), + os.path.join(self.data_dir, "enrich", self.path_csv), ] def test_error_seq_id_not_in_columns(self): @@ -279,17 +279,20 @@ def test_table_and_excel_load_same_dataframe(self): class TestEnrichIntegration(ProgramTestCase): def setUp(self): - self.path = os.path.join(DATA_DIR, "enrich1.tsv") - self.path_1based = os.path.join(DATA_DIR, "enrich1_1based.tsv") - self.excel_path = os.path.join(DATA_DIR, "enrich1.xlsx") - self.no_seq_id = os.path.join(DATA_DIR, "enrich1_no_seqid.tsv") - - self.expected = os.path.join(DATA_DIR, "enrich1_expected.csv") - self.expected_offset = os.path.join(DATA_DIR, "enrich1_expected_offset.csv") + super().setUp() + self.path = os.path.join(self.data_dir, "enrich", "enrich.tsv") + self.path_1based = os.path.join(self.data_dir, "enrich", "enrich_1based.tsv") + self.excel_path = os.path.join(self.data_dir, "enrich", "enrich.xlsx") + self.no_seq_id = os.path.join(self.data_dir, "enrich", "enrich_no_seqid.tsv") + + self.expected = os.path.join(self.data_dir, "enrich", "enrich_expected.csv") + self.expected_offset = os.path.join( + self.data_dir, "enrich", "enrich_expected_offset.csv" + ) self.bin = [ - os.path.join(DATA_DIR, "mavedb_enrich1.csv"), - os.path.join(DATA_DIR, "mavedb_enrich1_1based.csv"), + os.path.join(self.data_dir, "enrich", "mavedb_enrich.csv"), + os.path.join(self.data_dir, "enrich", "mavedb_enrich_1based.csv"), ] def test_saves_to_input_dst_by_default(self): @@ -329,3 +332,7 @@ def test_output_from_one_based_input(self): result = pd.read_csv(self.bin[1]) expected = pd.read_csv(self.expected) assert_frame_equal(expected, result) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_enrich2.py b/mavedbconvert/tests/test_enrich2.py index 882f07d..02ac246 100644 --- a/mavedbconvert/tests/test_enrich2.py +++ b/mavedbconvert/tests/test_enrich2.py @@ -1,6 +1,6 @@ import os -import mock -from unittest import TestCase +import unittest +from unittest.mock import patch from itertools import product import hgvsp @@ -10,25 +10,22 @@ import pandas as pd from pandas.testing import assert_index_equal, assert_frame_equal -from .. import validators, enrich2, constants, exceptions +from mavedbconvert import validators, enrich2, constants, exceptions -from . import ProgramTestCase - - -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.normpath(BASE_DIR + "/data/") +from mavedbconvert.tests import ProgramTestCase # Utility tests # --------------------------------------------------------------------------- # -class TestGetCountDataFrames(TestCase): +class TestGetCountDataFrames(ProgramTestCase): """ Test method get_count_dataframes checking if conditions are correctly parsed. """ def setUp(self): - self.path = os.path.join(DATA_DIR, "test_store.h5") + super().setUp() + self.path = os.path.join(self.data_dir, "enrich2", "test_store.h5") self.store = pd.HDFStore(self.path, "w") index = pd.MultiIndex.from_product( [["c1", "c2"], ["rep1", "rep2"], ["t0", "t1"]], @@ -77,7 +74,7 @@ def test_returns_empty_when_missing_counts_key(self): self.assertIsNone(cnd_df) -class TestFlattenColumnNames(TestCase): +class TestFlattenColumnNames(unittest.TestCase): def setUp(self): index = pd.MultiIndex.from_product( [["c1", "c2"], ["rep1", "rep2"], ["t0", "t1"]], @@ -97,14 +94,15 @@ def test_column_names_combine_columns_using_ordering(self): self.assertListEqual(cnames, ["t0_rep1", "t1_rep1", "t0_rep2", "t1_rep2"]) -class TestReplicateScoreDataFrames(TestCase): +class TestReplicateScoreDataFrames(ProgramTestCase): """ Test method get_replicate_score_dataframes checking if conditions are correctly parsed. """ def setUp(self): - self.path = os.path.join(DATA_DIR, "test_store.h5") + super().setUp() + self.path = os.path.join(self.data_dir, "enrich2", "test_store.h5") self.store = pd.HDFStore(self.path, "w") shared_index = pd.MultiIndex.from_product( @@ -126,6 +124,7 @@ def setUp(self): ) def tearDown(self): + super().tearDown() self.store.close() if os.path.isfile(self.path): os.unlink(self.path) @@ -167,7 +166,7 @@ def test_assertion_error_scores_shared_scores_different_index(self): enrich2.get_replicate_score_dataframes(self.store) -class TestDropNull(TestCase): +class TestDropNull(unittest.TestCase): def test_calls_drop_na_rows_from_scores_inplace(self): df = pd.DataFrame({"A": [None, 1]}) enrich2.drop_null(df) @@ -277,7 +276,7 @@ def test_scores_and_counts_columns_separated_after_join(self): class TestEnrich2ConvertH5Filepath(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich2.h5") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") self.bin.append(self.path.replace(".h5", "")) @@ -294,20 +293,22 @@ def test_concats_basename_elem_type_then_cnd_and_csv_ext(self): class TestEnrich2ConvertH5Df(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich2.h5") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") - self.bin.append(os.path.join(DATA_DIR, "enrich2")) + self.bin.append(os.path.join(self.data_dir, "enrich2", "enrich2")) def test_doesnt_open_invalid_rows_file_if_there_are_no_invalid_rows(self): - self.path = os.path.join(DATA_DIR, "enrich2.tsv") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") - fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv" + invalid_rows_path = os.path.join( + os.path.dirname(self.path), "enrich2_invalid_rows.csv" + ) df = pd.DataFrame(data={"score": [1]}, index=["c.1A>G (p.Lys1Val)"]) self.enrich2.convert_h5_df( df=df, element=constants.variants_table, df_type=constants.score_type ) - self.assertFalse(os.path.isfile(fpath)) + self.assertFalse(os.path.isfile(invalid_rows_path)) def test_drops_non_numeric_columns(self): df = pd.DataFrame(data={"score": [1], "B": ["a"]}, index=["c.1A>G (p.Lys1Val)"]) @@ -332,7 +333,7 @@ def test_sets_index_as_input_index(self): assert_index_equal(result.index, df.index) def test_opens_invalid_rows_file_for_invalid_rows(self): - self.path = os.path.join(DATA_DIR, "enrich2.tsv") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") df = pd.DataFrame(data={"score": [1], "B": ["a"]}, index=["c.1T>G (p.Lys1Val)"]) with self.assertRaises(ValueError): @@ -340,14 +341,19 @@ def test_opens_invalid_rows_file_for_invalid_rows(self): df=df, element=constants.variants_table, df_type=constants.score_type ) - fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv" - self.assertTrue(os.path.isfile(fpath)) - self.bin.append(fpath) + invalid_rows_path = os.path.join( + os.path.dirname(self.path), "enrich2_invalid_rows.csv" + ) + + self.assertTrue(os.path.isfile(invalid_rows_path)) + self.bin.append(invalid_rows_path) def test_invalid_rows_file_contains_error_description(self): - self.path = os.path.join(DATA_DIR, "enrich2.tsv") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") - fpath = str(self.path.split(".")[0]) + "_invalid_rows.csv" + invalid_rows_path = os.path.join( + os.path.dirname(self.path), "enrich2_invalid_rows.csv" + ) df = pd.DataFrame( data={"score": [1.1, 1.2]}, @@ -355,20 +361,20 @@ def test_invalid_rows_file_contains_error_description(self): ) self.enrich2.convert_h5_df(df=df, df_type=constants.score_type, element=None) - self.assertTrue(os.path.isfile(fpath)) + self.assertTrue(os.path.isfile(invalid_rows_path)) - invalid = pd.read_csv(fpath, sep=",", index_col=0) + invalid = pd.read_csv(invalid_rows_path, sep=",", index_col=0) self.assertEqual(len(invalid), 1) self.assertEqual(invalid.index[0], "c.1T>G (p.Lys1Val)") self.assertIn("error_description", invalid.columns) - self.bin.append(fpath) + self.bin.append(invalid_rows_path) class TestEnrich2ParseInput(ProgramTestCase): def setUp(self): super().setUp() self.wt = "GCTGAT" - self.path = os.path.join(DATA_DIR, "test_store.h5") + self.path = os.path.join(self.data_dir, "enrich2", "test_store.h5") self.store = pd.HDFStore(self.path, "w") self.enrich2 = enrich2.Enrich2( self.path, wt_sequence=self.wt, offset=0, one_based=True @@ -387,42 +393,66 @@ def setUp(self): self.files = [ os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_synonymous_counts_c1.csv" + self.data_dir, + "enrich2", + "test_store", + "mavedb_test_store_synonymous_counts_c1.csv", ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_synonymous_counts_c2.csv" + self.data_dir, + "enrich2", + "test_store", + "mavedb_test_store_synonymous_counts_c2.csv", ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_synonymous_scores_c1.csv" + self.data_dir, + "enrich2", + "test_store", + "mavedb_test_store_synonymous_scores_c1.csv", ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_synonymous_scores_c2.csv" + self.data_dir, + "enrich2", + "test_store", + "mavedb_test_store_synonymous_scores_c2.csv", ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_variants_counts_c1.csv" + self.data_dir, + "enrich2", + "test_store", + "mavedb_test_store_variants_counts_c1.csv", ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_variants_counts_c2.csv" + self.data_dir, + "enrich2", + "test_store", + "mavedb_test_store_variants_counts_c2.csv", ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_variants_scores_c1.csv" + self.data_dir, + "enrich2", + "test_store", + "mavedb_test_store_variants_scores_c1.csv", ) ), os.path.normpath( os.path.join( - DATA_DIR, "test_store", "mavedb_test_store_variants_scores_c2.csv" + self.data_dir, + "enrich2", + "test_store", + "mavedb_test_store_variants_scores_c2.csv", ) ), ] @@ -529,30 +559,32 @@ def tearDown(self): def parse_rows(self, variants, element=None): return [self.enrich2.parse_row((v, element)) for v in list(variants)] - @mock.patch.object(pd.DataFrame, "to_csv", return_value=None) + @patch.object(pd.DataFrame, "to_csv", return_value=None) def test_saves_to_output_directory(self, patch): - output = os.path.join(DATA_DIR, "new") + output = os.path.join(self.data_dir, "enrich2", "new") p = enrich2.Enrich2(src=self.store, dst=output, wt_sequence=self.wt, offset=0) p.parse_input(p.load_input_file()) for call_args in patch.call_args_list: self.assertIn(output, call_args[0][0]) self.bin.append(output) - @mock.patch.object(pd.DataFrame, "to_csv", return_value=None) + @patch.object(pd.DataFrame, "to_csv", return_value=None) def test_saves_to_file_location_if_no_dst_supplied(self, patch): p = enrich2.Enrich2(src=self.store, wt_sequence=self.wt, offset=0) p.parse_input(self.enrich2.load_input_file()) - expected_base_path = os.path.normpath(os.path.join(DATA_DIR, "test_store")) + expected_base_path = os.path.normpath( + os.path.join(self.data_dir, "enrich2", "test_store") + ) for call_args in patch.call_args_list: self.assertIn(expected_base_path, call_args[0][0]) - @mock.patch("mavedbconvert.enrich2.get_replicate_score_dataframes") + @patch("mavedbconvert.enrich2.get_replicate_score_dataframes") def test_iterates_over_all_available_tables(self, patch): self.enrich2.parse_input(self.enrich2.load_input_file()) self.assertIn(constants.synonymous_table, patch.call_args_list[0][0]) self.assertIn(constants.variants_table, patch.call_args_list[1][0]) - @mock.patch( + @patch( "mavedbconvert.enrich2.drop_null", side_effect=lambda scores_df, counts_df: (scores_df, counts_df), ) @@ -830,15 +862,15 @@ def test_drops_null_rows(self): self.assertNotIn("p.Ala1=", df_scores[constants.pro_variant_col]) -class TestEnrich2LoadInput(TestCase): +class TestEnrich2LoadInput(ProgramTestCase): def test_error_file_not_h5_or_tsv(self): - path = os.path.join(DATA_DIR, "empiric.xlsx") + path = os.path.join(self.data_dir, "empiric", "empiric.xlsx") p = enrich2.Enrich2(path, wt_sequence="AAA") with self.assertRaises(TypeError): p.load_input_file() def test_scores_tsv_missing_score_column(self): - path = os.path.join(DATA_DIR, "enrich2.tsv") + path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") p = enrich2.Enrich2( path, wt_sequence="AAA", @@ -850,7 +882,7 @@ def test_scores_tsv_missing_score_column(self): p.load_input_file() def test_input_type_counts_doesnt_raise_keyerror(self): - path = os.path.join(DATA_DIR, "enrich2.tsv") + path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") p = enrich2.Enrich2( path, wt_sequence="AAA", @@ -860,7 +892,7 @@ def test_input_type_counts_doesnt_raise_keyerror(self): p.load_input_file() def test_scores_tsv_missing_hgvs_column(self): - path = os.path.join(DATA_DIR, "enrich2.tsv") + path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") p = enrich2.Enrich2(path, wt_sequence="AAA", hgvs_column="hgvs") with self.assertRaises(KeyError): p.load_input_file() @@ -869,7 +901,7 @@ def test_scores_tsv_missing_hgvs_column(self): class TestEnrich2ParseRow(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "dummy.h5") + self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="ACT") self.bin.append(self.path.replace(".h5", "")) @@ -896,7 +928,7 @@ def test_nt_variant_is_none_special_variant_is_from_synonymous_table(self): ), ) - @mock.patch("mavedbconvert.enrich2.apply_offset", return_value="c.3T>C (p.Thr1=)") + @patch("mavedbconvert.enrich2.apply_offset", return_value="c.3T>C (p.Thr1=)") def test_calls_apply_offset_to_variant(self, patch): variant = "c.3T>C (p.=)" self.enrich2.parse_row((variant, None)) @@ -941,7 +973,7 @@ def test_uses_three_qmarks(self): class TestProteinHGVSParsing(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "dummy.h5") + self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") self.bin.append(self.path.replace(".h5", "")) @@ -1001,7 +1033,7 @@ def test_maintains_ordering(self): class TestNucleotideHGVSParing(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "dummy.h5") + self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA") self.bin.append(self.path.replace(".h5", "")) @@ -1055,7 +1087,7 @@ def test_strips_ws(self): class TestEnrich2MixedHGVSParsing(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "dummy.h5") + self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5") self.wt = "ACT" self.wt_aa = constants.AA_CODES[constants.CODON_TABLE[self.wt]] self.enrich2 = enrich2.Enrich2(self.path, wt_sequence=self.wt) @@ -1091,7 +1123,7 @@ def test_variant_order_maintained(self): self.assertEqual(nt, "c.[1=;6T>G;2A>T]") self.assertEqual(pro, "p.[Lys1Ile;Asn2Lys]") - @mock.patch.object( + @patch.object( enrich2.Enrich2, "infer_silent_aa_substitution", return_value="p.Lys1=" ) def test_groups_codons(self, patch): @@ -1100,7 +1132,7 @@ def test_groups_codons(self, patch): _, _ = self.enrich2.parse_mixed_variant(variant) patch.assert_called_with(*(["c.1=", "c.2="], variant)) - @mock.patch.object( + @patch.object( enrich2.Enrich2, "infer_silent_aa_substitution", return_value="p.Lys1=" ) def test_calls_infer_with_synonymous_variants_only(self, patch): @@ -1149,7 +1181,7 @@ def test_protein_set_as_nt_when_table_is_not_syn_and_variant_is_special(self): class TestInferSilentAASub(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "dummy.h5") + self.path = os.path.join(self.data_dir, "enrich2", "dummy.h5") self.enrich2 = enrich2.Enrich2(self.path, wt_sequence="AAA", offset=0) self.bin.append(self.path.replace(".h5", "")) @@ -1194,7 +1226,7 @@ def test_correctly_infers_aa_from_silent_variants(self): self.assertEqual("p.Leu1=", self.enrich2.infer_silent_aa_substitution(group)) -class TestApplyOffset(TestCase): +class TestApplyOffset(ProgramTestCase): def test_mixed_variant_uses_nt_position_to_compute_codon_pos(self): variant = "c.-9A>T (p.Thr2Pro), c.-6C>A (p.Gln3Lys)" offset = -10 @@ -1225,41 +1257,41 @@ def test_applies_offset_to_protein_variant_modulo_3(self): self.assertEqual("p.Leu7=, p.Leu10=", enrich2.apply_offset(variant, offset)) self.assertEqual("p.Leu7=", enrich2.apply_offset("p.Leu10=", offset)) - @mock.patch.object(enrich2.base.BaseProgram, "validate_against_wt_sequence") + @patch.object(enrich2.base.BaseProgram, "validate_against_wt_sequence") def test_validates_against_wt_sequence(self, patch): variant = "c.-9C>T" - path = os.path.join(DATA_DIR, "dummy.h5") + path = os.path.join(self.data_dir, "enrich2", "dummy.h5") p = enrich2.Enrich2(path, wt_sequence="ACT") enrich2.apply_offset(variant, offset=-10, enrich2=p) # pass patch.assert_called_with(*("c.1C>T",)) def test_value_error_base_mismatch_after_offset_applied(self): variant = "c.-9G>T" - path = os.path.join(DATA_DIR, "dummy.h5") + path = os.path.join(self.data_dir, "enrich2", "dummy.h5") p = enrich2.Enrich2(path, wt_sequence="ACT") with self.assertRaises(ValueError): enrich2.apply_offset(variant, offset=-10, enrich2=p) - @mock.patch.object(enrich2.base.BaseProgram, "validate_against_protein_sequence") + @patch.object(enrich2.base.BaseProgram, "validate_against_protein_sequence") def test_validates_against_pro_sequence(self, patch): variant = "p.Gly3Leu" - path = os.path.join(DATA_DIR, "dummy.h5") + path = os.path.join(self.data_dir, "enrich2", "dummy.h5") p = enrich2.Enrich2(path, wt_sequence="ACG") enrich2.apply_offset(variant, offset=6, enrich2=p) # pass patch.assert_called_with(*("p.Gly1Leu",)) def test_value_error_pro_mismatch_after_offset_applied(self): variant = "p.Gly3Leu" - path = os.path.join(DATA_DIR, "dummy.h5") + path = os.path.join(self.data_dir, "enrich2", "dummy.h5") p = enrich2.Enrich2(path, wt_sequence="ACG") with self.assertRaises(ValueError): enrich2.apply_offset(variant, offset=6, enrich2=p) -class TestEnrich2Init(TestCase): +class TestEnrich2Init(ProgramTestCase): def setUp(self): super().setUp() - self.path = os.path.join(DATA_DIR, "enrich2.tsv") + self.path = os.path.join(self.data_dir, "enrich2", "enrich2.tsv") def test_error_is_coding_and_offset_not_mult_of_three(self): with self.assertRaises(ValueError): @@ -1270,3 +1302,7 @@ def test_ok_is_coding_false_and_offset_not_mult_of_three(self): def test_ok_is_coding_and_offset_mult_of_three(self): enrich2.Enrich2(src=self.path, wt_sequence="ATC", is_coding=True, offset=-3) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_fasta.py b/mavedbconvert/tests/test_fasta.py index 923e6ad..6c6fafb 100644 --- a/mavedbconvert/tests/test_fasta.py +++ b/mavedbconvert/tests/test_fasta.py @@ -1,48 +1,47 @@ import os -from unittest import TestCase +import unittest -from ..fasta import parse_fasta, split_fasta_path +from mavedbconvert.tests import ProgramTestCase -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.normpath(BASE_DIR + "/data/") +from mavedbconvert.fasta import parse_fasta, split_fasta_path -class TestSplitFasta(TestCase): +class TestFastaPath(ProgramTestCase): def test_infers_bzip(self): head, base, ext, compression = split_fasta_path( - os.path.join(DATA_DIR, "wt.fasta.bz2") + os.path.join(self.data_dir, "fasta", "wt.fasta.bz2") ) self.assertEqual(ext, ".fasta") self.assertEqual(compression, "bz2") def test_infers_gzip(self): head, base, ext, compression = split_fasta_path( - os.path.join(DATA_DIR, "wt.fasta.gz") + os.path.join(self.data_dir, "fasta", "wt.fasta.gz") ) self.assertEqual(ext, ".fasta") self.assertEqual(compression, "gz") - def test_infers_none(self): + def test_infers_uncompressed(self): head, base, ext, compression = split_fasta_path( - os.path.join(DATA_DIR, "wt.fasta") + os.path.join(self.data_dir, "fasta", "wt.fasta") ) self.assertEqual(ext, ".fasta") self.assertEqual(compression, None) head, base, ext, compression = split_fasta_path( - os.path.join(DATA_DIR, "lower.fa") + os.path.join(self.data_dir, "fasta", "lower.fa") ) self.assertEqual(ext, ".fa") self.assertEqual(compression, None) def test_ioerror_invalid_ext(self): with self.assertRaises(IOError): - split_fasta_path(os.path.join(DATA_DIR, "enrich1.tsv")) + split_fasta_path(os.path.join(self.data_dir, "enrich", "enrich.tsv")) -class TestFastaReader(TestCase): +class TestFastaReader(ProgramTestCase): def test_can_read_first_sequence(self): - sequence = parse_fasta(os.path.join(DATA_DIR, "wt.fasta")) + sequence = parse_fasta(os.path.join(self.data_dir, "fasta", "wt.fasta")) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA" @@ -52,7 +51,7 @@ def test_can_read_first_sequence(self): self.assertEqual(sequence, expected) def test_converts_to_uppercase(self): - sequence = parse_fasta(os.path.join(DATA_DIR, "lower.fa")) + sequence = parse_fasta(os.path.join(self.data_dir, "fasta", "lower.fa")) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA" @@ -63,11 +62,11 @@ def test_converts_to_uppercase(self): def test_error_more_than_one_sequence(self): with self.assertRaises(ValueError): - parse_fasta(os.path.join(DATA_DIR, "two.fasta")) + parse_fasta(os.path.join(self.data_dir, "fasta", "two.fasta")) def test_error_invalid_chars_in_sequence(self): with self.assertRaises(ValueError): - parse_fasta(os.path.join(DATA_DIR, "invalid_chars.fasta")) + parse_fasta(os.path.join(self.data_dir, "fasta", "invalid_chars.fasta")) def test_ignores_blank_lines(self): expected = ( @@ -76,15 +75,15 @@ def test_ignores_blank_lines(self): "GAGGCTACGATCGTGCCTTGTGGCAAGTCTTCGCTCGCAC" "GCCCTTCCTACCGTGCTATGAGAGGAAATCTCGGGCGTAA" ) - seq = parse_fasta(os.path.join(DATA_DIR, "spaces.fasta")) + seq = parse_fasta(os.path.join(self.data_dir, "fasta", "spaces.fasta")) self.assertEqual(seq, expected) def test_error_missing_gt_on_first_line(self): with self.assertRaises(IOError): - parse_fasta(os.path.join(DATA_DIR, "bad_format.fasta")) + parse_fasta(os.path.join(self.data_dir, "fasta", "bad_format.fasta")) def test_can_open_with_gzip(self): - sequence = parse_fasta(os.path.join(DATA_DIR, "wt.fasta.gz")) + sequence = parse_fasta(os.path.join(self.data_dir, "fasta", "wt.fasta.gz")) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA" @@ -94,7 +93,7 @@ def test_can_open_with_gzip(self): self.assertEqual(sequence, expected) def test_can_open_with_bzip(self): - sequence = parse_fasta(os.path.join(DATA_DIR, "wt.fasta.bz2")) + sequence = parse_fasta(os.path.join(self.data_dir, "fasta", "wt.fasta.bz2")) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" "CGCCAGCGCATACCAGCATAGTAAAGGCAACGGCCTCTGA" @@ -102,3 +101,7 @@ def test_can_open_with_bzip(self): "GCCCTTCCTACCGTGCTATGAGAGGAAATCTCGGGCGTAA" ) self.assertEqual(sequence, expected) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_filters.py b/mavedbconvert/tests/test_filters.py index 2b2cbb2..d7b037d 100644 --- a/mavedbconvert/tests/test_filters.py +++ b/mavedbconvert/tests/test_filters.py @@ -1,13 +1,13 @@ -from unittest import TestCase +import unittest import pandas as pd import numpy as np -from .. import filters, constants +from mavedbconvert import filters, constants -class TestDropNaColumns(TestCase): +class TestDropNaColumns(unittest.TestCase): def test_drops_null_nt_column(self): df = pd.DataFrame( { @@ -59,7 +59,7 @@ def test_does_not_drop_column_containing_non_null_values(self): self.assertIn("A", df) -class TestDropNaRows(TestCase): +class TestDropNaRows(unittest.TestCase): def test_drops_null_row(self): df = pd.DataFrame({"A": [None], "B": [np.NaN]}) filters.drop_na_rows(df, inplace=True) @@ -69,3 +69,7 @@ def test_does_not_drop_row_containing_non_null_values(self): df = pd.DataFrame({"A": [None], "B": [0.0]}) filters.drop_na_rows(df, inplace=True) self.assertEqual(len(df), 1) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_parsers.py b/mavedbconvert/tests/test_parsers.py index db62ea9..f0e3bae 100644 --- a/mavedbconvert/tests/test_parsers.py +++ b/mavedbconvert/tests/test_parsers.py @@ -1,17 +1,16 @@ import os -import mock -from unittest import TestCase +import unittest +from unittest.mock import patch -from .. import parsers, exceptions, constants +from mavedbconvert import parsers, exceptions, constants -from . import ProgramTestCase +from mavedbconvert.tests import ProgramTestCase +# TODO: convert these tests to use temp directories +TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -DATA_DIR = os.path.normpath(BASE_DIR + "/data/") - -class TestParseBoolean(TestCase): +class TestParseBoolean(unittest.TestCase): def test_true_if_str_of_true(self): self.assertTrue(parsers.parse_boolean(True)) self.assertTrue(parsers.parse_boolean("True")) @@ -23,7 +22,7 @@ def test_false_if_not_repr_of_true(self): self.assertFalse(parsers.parse_boolean(False)) -class TestParseNumeric(TestCase): +class TestParseNumeric(unittest.TestCase): def test_converts_to_dtype(self): self.assertIsInstance( parsers.parse_numeric("1", name="int", dtype=float), float @@ -36,7 +35,7 @@ def test_value_error_cannot_cast_to_dtype(self): parsers.parse_numeric("a", name="value", dtype=int) -class TestParseString(TestCase): +class TestParseString(unittest.TestCase): def test_returns_none_if_falsey(self): self.assertIsNone(parsers.parse_string(None)) self.assertIsNone(parsers.parse_string(" ")) @@ -46,17 +45,17 @@ def test_returns_string_stripped_of_ws(self): self.assertEqual(parsers.parse_string(" aaa "), "aaa") -class TestParseSrc(TestCase): - @mock.patch( +class TestParseSrc(unittest.TestCase): + @patch( "mavedbconvert.parsers.parse_string", - return_value=os.path.join(DATA_DIR, "enrich2.tsv"), + return_value=os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv"), ) def test_calls_parse_string(self, patch): - parsers.parse_src(os.path.join(DATA_DIR, "enrich2.tsv")) + parsers.parse_src(os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv")) patch.assert_called() def test_ok_file_exists(self): - path = os.path.join(DATA_DIR, "enrich2.tsv") + path = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") self.assertEqual(path, parsers.parse_src(path)) def test_error_no_value(self): @@ -65,23 +64,25 @@ def test_error_no_value(self): parsers.parse_src(v) def test_error_file_not_found(self): - path = os.path.join(DATA_DIR, "missing_file.tsv") + path = os.path.join(TEST_DATA_DIR, "enrich2", "missing_file.tsv") with self.assertRaises(FileNotFoundError): parsers.parse_src(path) def test_error_file_is_a_dir(self): with self.assertRaises(IsADirectoryError): - parsers.parse_src(DATA_DIR) + parsers.parse_src(os.path.join(TEST_DATA_DIR)) class TestParseDst(ProgramTestCase): - @mock.patch("mavedbconvert.parsers.parse_string", return_value=DATA_DIR) + @patch( + "mavedbconvert.parsers.parse_string", return_value=os.path.join(TEST_DATA_DIR) + ) def test_calls_parse_string(self, patch): - parsers.parse_dst(DATA_DIR) + parsers.parse_dst(os.path.join(TEST_DATA_DIR)) patch.assert_called() def test_ok_dst_exists(self): - path = os.path.join(DATA_DIR) + path = os.path.join(os.path.join(TEST_DATA_DIR)) self.assertEqual(path, parsers.parse_dst(path)) def test_returns_none_no_value(self): @@ -89,17 +90,17 @@ def test_returns_none_no_value(self): self.assertIsNone(parsers.parse_dst(v)) def test_dst_path_is_normalised(self): - path = BASE_DIR + "//data" - self.assertEqual(parsers.parse_dst(path), DATA_DIR) + path = TEST_DATA_DIR + "//fasta" + self.assertEqual(parsers.parse_dst(path), os.path.join(TEST_DATA_DIR, "fasta")) def test_makes_dst_directory_tree(self): - path = os.path.join(DATA_DIR, "subdir") + path = os.path.join(TEST_DATA_DIR, "subdir") parsers.parse_dst(path) self.assertTrue(os.path.isdir(path)) self.bin.append(path) -class TestParseProgram(TestCase): +class TestParseProgram(unittest.TestCase): def test_ok_supported_program(self): for p in ("enrich2", "enrich", "empiric"): parsers.parse_program(p) @@ -123,9 +124,9 @@ def test_sets_correct_program_from_dict(self): parsers.parse_program(program) -class TestParseWildTypeSequence(TestCase): +class TestParseWildTypeSequence(unittest.TestCase): def test_can_read_from_fasta(self): - path = os.path.join(DATA_DIR, "lower.fa") + path = os.path.join(TEST_DATA_DIR, "fasta", "lower.fa") wtseq = parsers.parse_wt_sequence(path, program="enrich2", non_coding=True) expected = ( "ACAGTTGGATATAGTAGTTTGTACGAGTTGCTTGTGGCTT" @@ -160,8 +161,8 @@ def test_ok_divisible_by_three_enrich_empiric(self): parsers.parse_wt_sequence("ATGATC", program="empiric") -class TestParseInputType(TestCase): - @mock.patch("mavedbconvert.parsers.parse_string", return_value="counts") +class TestParseInputType(unittest.TestCase): + @patch("mavedbconvert.parsers.parse_string", return_value="counts") def test_calls_parse_string(self, patch): parsers.parse_input_type(constants.count_type) patch.assert_called() @@ -175,8 +176,8 @@ def test_ok_recognised_input_type(self): parsers.parse_input_type(v) -class TestParseScoreColumn(TestCase): - @mock.patch("mavedbconvert.parsers.parse_string", return_value="score") +class TestParseScoreColumn(unittest.TestCase): + @patch("mavedbconvert.parsers.parse_string", return_value="score") def test_calls_parse_string(self, patch): parsers.parse_score_column("score", constants.score_type, program="enrich") patch.assert_called() @@ -209,8 +210,8 @@ def test_ok_enrich2_and_column_not_defined(self): ) -class TestParseOffset(TestCase): - @mock.patch("mavedbconvert.parsers.parse_numeric", return_value=0) +class TestParseOffset(unittest.TestCase): + @patch("mavedbconvert.parsers.parse_numeric", return_value=0) def test_calls_parse_numeric(self, patch): parsers.parse_offset(0, program="enrich") patch.assert_called() @@ -237,7 +238,7 @@ def test_ok_enrich_empiric_offset_mult_of_three(self): self.assertEqual(-6, parsers.parse_offset("-6", "empiric")) -class TestParseDocopt(TestCase): +class TestParseDocopt(unittest.TestCase): @staticmethod def mock_args( program=None, @@ -258,13 +259,13 @@ def mock_args( if program is None: program = "enrich2" if src is None: - src = os.path.join(DATA_DIR, "enrich2.tsv") + src = os.path.join(TEST_DATA_DIR, "enrich2", "enrich2.tsv") return { "enrich": True if program == "enrich" else False, "enrich2": True if program == "enrich2" else False, "empiric": True if program == "empiric" else False, - "": os.path.join(DATA_DIR, src), - "--dst": os.path.join(DATA_DIR, dst) if dst else dst, + "": os.path.join(TEST_DATA_DIR, program, src), + "--dst": os.path.join(TEST_DATA_DIR, program, dst) if dst else dst, "--score-column": score_column, "--hgvs-column": hgvs_column, "--skip-header": skip_header, @@ -314,3 +315,7 @@ def test_contains_skip_header_rows_key(self): args = self.mock_args() _, kwargs = parsers.parse_docopt(args) self.assertIn("skip_header_rows", kwargs) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_utilities.py b/mavedbconvert/tests/test_utilities.py index 25b530f..7e1c244 100644 --- a/mavedbconvert/tests/test_utilities.py +++ b/mavedbconvert/tests/test_utilities.py @@ -1,11 +1,11 @@ -from unittest import TestCase +import unittest import numpy as np -from .. import utilities, constants, exceptions +from mavedbconvert import utilities, constants, exceptions -class TestSlicer(TestCase): +class TestSlicer(unittest.TestCase): def test_slicer_returns_chunks_of_size_n(self): self.assertEqual(list(utilities.slicer("aaabbbccc", 3)), ["aaa", "bbb", "ccc"]) @@ -15,7 +15,7 @@ def test_slicer_returns_clips_if_cannot_chunk(self): ) -class TestTranslateWTSequence(TestCase): +class TestTranslateWTSequence(unittest.TestCase): def test_translate_wt_seq_no_offset(self): self.assertEqual(utilities.translate_dna("GTGGCGGAG", offset=0), "VAE") @@ -31,7 +31,7 @@ def test_error_offset_negative(self): utilities.translate_dna("GTGGCGGAG", offset=-3) -class TestIsNull(TestCase): +class TestIsNull(unittest.TestCase): def test_is_null_true_for_none_nan_and_na(self): for v in constants.extra_na: self.assertTrue(utilities.is_null(v)) @@ -46,7 +46,7 @@ def test_is_null_false(self): self.assertFalse(utilities.is_null("1.2")) -class TestFormatColumn(TestCase): +class TestFormatColumn(unittest.TestCase): def test_replaces_null_with_nan(self): self.assertIs(utilities.format_column([" "])[0], np.NaN) self.assertIs(utilities.format_column(["none"])[0], np.NaN) @@ -67,7 +67,7 @@ def test_replaces_null_with_none_if_astype_is_not_int_or_float(self): self.assertIs(utilities.format_column(["none"], astype=str)[0], None) -class TestIsNumeric(TestCase): +class TestIsNumeric(unittest.TestCase): def test_true_for_float(self): self.assertTrue(utilities.is_numeric(float)) @@ -90,7 +90,7 @@ def test_false_for_np_object(self): self.assertFalse(utilities.is_numeric(np.object)) -class TestNucleotideSubstitutionEvent(TestCase): +class TestNucleotideSubstitutionEvent(unittest.TestCase): def test_parses_negative_positions(self): nt = utilities.NucleotideSubstitutionEvent("n.-100A>T") self.assertEqual(nt.position, -100) @@ -166,7 +166,7 @@ def test_infers_within_frame_position(self): ) -class TestProteinSubstitutionEvent(TestCase): +class TestProteinSubstitutionEvent(unittest.TestCase): def test_error_set_position_less_than_1(self): pro = utilities.ProteinSubstitutionEvent("p.Gly4Leu") with self.assertRaises(ValueError): @@ -208,7 +208,7 @@ def test_formats_event_string_correctly(self): ) -class TestSplitVariant(TestCase): +class TestSplitVariant(unittest.TestCase): def test_split_hgvs_singular_list_non_multi_variant(self): self.assertListEqual(["c.100A>G"], utilities.split_variant("c.100A>G")) @@ -218,7 +218,7 @@ def test_split_hgvs_returns_list_of_single_variants(self): ) -class TestNormalizeVariant(TestCase): +class TestNormalizeVariant(unittest.TestCase): def test_stripts_white_space(self): self.assertEqual(utilities.normalize_variant(" c.1A>G "), "c.1A>G") @@ -256,7 +256,7 @@ def test_replaces_X_with_N_in_rna_variant(self): ) -class TestFormatVariant(TestCase): +class TestFormatVariant(unittest.TestCase): def test_stripts_white_space(self): self.assertEqual(utilities.format_variant(" c.1A>G "), "c.1A>G") @@ -264,7 +264,7 @@ def test_passes_on_none(self): self.assertIsNone(utilities.format_variant(None)) -class TestHGVSProFromEventList(TestCase): +class TestHGVSProFromEventList(unittest.TestCase): def test_returns_single_event(self): result = utilities.hgvs_pro_from_event_list(["L4V"]) self.assertEqual(result, "p.L4V") @@ -292,7 +292,7 @@ def test_error_invalid_hgvs(self): utilities.hgvs_pro_from_event_list(["aaaa"]) -class TestHGVSNTFromEventList(TestCase): +class TestHGVSNTFromEventList(unittest.TestCase): def test_returns_single_event(self): result = utilities.hgvs_nt_from_event_list(["45A>G"], prefix="c") self.assertEqual(result, "c.45A>G") @@ -316,7 +316,7 @@ def test_error_invalid_hgvs(self): utilities.hgvs_nt_from_event_list(["aaaa"], prefix="c") -class TestNonHgvsColumns(TestCase): +class TestNonHgvsColumns(unittest.TestCase): def test_returns_non_hgvs_columns(self): self.assertListEqual( ["score"], @@ -328,7 +328,7 @@ def test_returns_non_hgvs_columns(self): ) -class TestHgvsColumns(TestCase): +class TestHgvsColumns(unittest.TestCase): def test_returns_only_hgvs_columns(self): self.assertListEqual( [constants.nt_variant_col, constants.pro_variant_col], @@ -338,3 +338,7 @@ def test_returns_only_hgvs_columns(self): ) ), ) + + +if __name__ == "__main__": + unittest.main() diff --git a/mavedbconvert/tests/test_validators.py b/mavedbconvert/tests/test_validators.py index bad4735..301d4a6 100644 --- a/mavedbconvert/tests/test_validators.py +++ b/mavedbconvert/tests/test_validators.py @@ -1,13 +1,13 @@ -from unittest import TestCase +import unittest import pandas as pd from hgvs.sequencevariant import SequenceVariant -from .. import validators, constants, exceptions +from mavedbconvert import validators, constants, exceptions -class TestHGVSPatternsBackend(TestCase): +class TestHGVSPatternsBackend(unittest.TestCase): def setUp(self): self.backend = validators.HGVSPatternsBackend() @@ -25,7 +25,7 @@ def test_returns_str_variant(self): self.assertIsInstance(self.backend.validate("c.1A>G"), str) -class TestHGVSBiocommonsBackend(TestCase): +class TestHGVSBiocommonsBackend(unittest.TestCase): def setUp(self): self.backend = validators.HGVSBiocommonsBackend("NM_000000001.1") @@ -68,7 +68,7 @@ def test_validate_hgvs_uses_dummy_ref_if_transcript_not_passed(self): ) -class TestValidateHGVS(TestCase): +class TestValidateHGVS(unittest.TestCase): def test_uses_biocommons_backend_if_transcript_provided(self): result = validators.validate_variants( ["c.[1A>G;2A>G]"], n_jobs=2, verbose=0, transcript=constants.dummy_ref @@ -80,7 +80,7 @@ def test_uses_patterns_backend_as_default(self): self.assertIsInstance(result[0], str) -class TestDfValidators(TestCase): +class TestDfValidators(unittest.TestCase): def test_validate_column_raise_keyerror_column_not_exist(self): df = pd.DataFrame({"a": [1]}) with self.assertRaises(KeyError): @@ -100,7 +100,7 @@ def test_pass_all_numeric(self): validators.validate_columns_are_numeric(df) -class TestHGVSValidators(TestCase): +class TestHGVSValidators(unittest.TestCase): def test_validate_hgvs_nt_not_redef_raise_error_if_redefined(self): df = pd.DataFrame({constants.nt_variant_col: ["a", "b"]}) validators.validate_hgvs_nt_uniqueness(df) # Should pass @@ -124,7 +124,7 @@ def test_validate_hgvs_pro_not_redef_ignores_none(self): validators.validate_hgvs_pro_uniqueness(df) # Should pass -class TestMaveDBCompliance(TestCase): +class TestMaveDBCompliance(unittest.TestCase): def test_error_primary_column_contains_null(self): df = pd.DataFrame( { @@ -207,7 +207,7 @@ def test_keyerror_missing_score_column_df_type_is_scores(self): validators.validate_mavedb_compliance(df, df_type=constants.score_type) -class TestValidateSameVariants(TestCase): +class TestValidateSameVariants(unittest.TestCase): def test_ve_counts_defines_different_nt_variants(self): scores = pd.DataFrame( { @@ -260,3 +260,7 @@ def test_error_dfs_define_different_hgvs_columns(self): counts = pd.DataFrame({constants.pro_variant_col: ["p.Leu75Glu"]}) with self.assertRaises(AssertionError): validators.validate_datasets_define_same_variants(scores, counts) + + +if __name__ == "__main__": + unittest.main() diff --git a/requirements/dev.txt b/requirements/dev.txt deleted file mode 100644 index e003b59..0000000 --- a/requirements/dev.txt +++ /dev/null @@ -1,8 +0,0 @@ -black -ipython -pylint -tox -pytest -pytest-sugar -pytest-cov -mock \ No newline at end of file diff --git a/requirements/install.txt b/requirements/install.txt deleted file mode 100644 index a1f1556..0000000 --- a/requirements/install.txt +++ /dev/null @@ -1,12 +0,0 @@ -tables>=3.2.0 -pandas>=0.18.0,<=0.24.0 -xlrd >= 0.9.0 -tqdm -docopt -git+https://github.com/FowlerLab/hgvs-patterns.git -hgvs -requests -numpy -scipy -joblib -xlsxwriter \ No newline at end of file diff --git a/setup.py b/setup.py index 31edffb..ab4722f 100644 --- a/setup.py +++ b/setup.py @@ -1,17 +1,43 @@ -from setuptools import setup +import setuptools -setup( +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( name="mavedbconvert", - version="0.6.0-alpha", - packages=["mavedbconvert", "mavedbconvert.tests"], - url="https://github.com/FowlerLab/mavedb-convert", - license="AGPLv3", - author="Daniel Esposito", - author_email="esposito.d@wehi.edu.au", + version="0.1.0-beta", + author="Alan F Rubin, Daniel Esposito", + author_email="alan.rubin@wehi.edu.au", description=( - "A command line tool for converting alternate " - "file formats into a MaveDB compliant format." + "A command line tool for converting Multiplex Assay of Variant Effect datasets into a MaveDB-ready format." ), - #install_requires=open("requirements/install.txt", "rt").read().split("\n"), - entry_points={"console_scripts": ["mavedb-convert=mavedbconvert.main:main"]}, + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/VariantEffect/mavedbconvert", + packages=setuptools.find_packages(), + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Programming Language :: Python :: 3", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + ], + python_requires=">=3.6", + install_requires=[ + "tables>=3.2.0", + "pandas>=0.18.0", + "xlrd >= 0.9.0", + "tqdm", + "docopt", + "hgvsp @ git+https://github.com/FowlerLab/hgvs-patterns", + "hgvs", + "requests", + "numpy", + "scipy", + "joblib", + "xlsxwriter", + ], + entry_points={"console_scripts": ["mavedbconvert=mavedbconvert.main:main"]}, + test_suite="tests", )