Skip to content

Commit

Permalink
Merge b991b7f into b2a8b74
Browse files Browse the repository at this point in the history
  • Loading branch information
afrubin committed Apr 1, 2020
2 parents b2a8b74 + b991b7f commit d7ae1b1
Show file tree
Hide file tree
Showing 35 changed files with 434 additions and 302 deletions.
26 changes: 26 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
language: python
matrix:
include:
- python: "3.6"
- python: "3.7"
- python: "3.8"
- python: "3.8-dev"
- python: "3.9-dev"
- python: "pypy3"
env: NO_MYPY=true
allow_failures:
- python: "3.8-dev"
- python: "3.9-dev"
- python: "pypy3"
env: NO_MYPY=true
install:
- pip3 install .
before_script:
- pip3 install coverage
- pip3 install coveralls
- if ! $NO_MYPY; then pip3 install mypy; fi
script:
- coverage run --source mavedbconvert -m unittest
- if ! $NO_MYPY; then mypy mavedbconvert tests; fi
after_success:
- coveralls
24 changes: 13 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
# mavedb-convert
A command line tool for converting alternate file formats into a MaveDB compliant format.
[![Build Status](https://travis-ci.com/VariantEffect/mavedbconvert.svg?branch=master)](https://travis-ci.com/VariantEffect/mavedbconvert)
[![Coverage Status](https://coveralls.io/repos/github/VariantEffect/mavedbconvert/badge.svg?branch=master)](https://coveralls.io/github/VariantEffect/mavedbconvert?branch=master)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)

# mavedbconvert
A command line tool for converting Multiplex Assay of Variant Effect datasets into a MaveDB-ready format.

# Installation
Download the `mavedb-convert` source and navigate to that directory.
Download the mavedbconvert source and navigate to that directory.
We recommend creating a [virtual environment](https://docs.python.org/3/library/venv.html) before proceeding with the installation.

Install dependencies using the requirements file and then install the package:
Install the package using pip:

pip3 install -r requirements/install.txt
pip3 install .

Additional requirements needed for running the unit tests and doing package development are in `reuirements/dev.txt`

## Troubleshooting
If you are a OSX user, you may experience header related issues when installing `pysam`. The current workaround
is to install pysam version `0.13` manually before installing the requirements:
If you are a OSX user, you may experience header related issues when installing pysam. The current workaround
is to install pysam v0.13 manually before installing the requirements:

pip install pysam==0.13
pip3 install pysam==0.13

This is the latest version known to compile without errors.

Although `pysam` is not required for `mavedb-convert` directly, it is installed by some of our dependencies. Until it is removed or made optional by those libraries, `mavedb-convert` will unfortunately not be installable on Windows.
Although pysam is not required for mavedbconvert directly, it is installed by some of our dependencies.
Until it is removed or made optional by those libraries, mavedbconvert will unfortunately not be installable on Windows.
7 changes: 5 additions & 2 deletions mavedbconvert/enrich2.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,10 @@ def get_count_dataframe_by_condition(
return None
filtered = store["/main/{}/scores".format(element)].index

df = store[count_key].loc[filtered, idx[cnd, :, :]]
# TODO: revisit tests to see if preserving the all-NA rows makes sense
store_df = store[count_key]
store_df = store_df.reindex(filtered)
df = store_df.loc[filtered, idx[cnd, :, :]]
df.columns = flatten_column_names(df.columns, (1, 2))
return df

Expand Down Expand Up @@ -275,7 +278,7 @@ def __init__(
skip_header_rows=skip_header_rows,
skip_footer_rows=skip_footer_rows,
sheet_name=sheet_name,
score_column="score",
score_column=score_column,
hgvs_column=hgvs_column,
input_type=input_type,
)
Expand Down
10 changes: 5 additions & 5 deletions mavedbconvert/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
All outputs are in 1-based coordinates.
Usage:
mavedb-convert enrich2 <src> [--dst=D] [--wtseq=W] [--offset=O] [--hgvs-column=A] [--input-type=T] [--skip-header=H] [--skip-footer=H] [--non-coding]
mavedb-convert enrich <src> [--dst=D] [--wtseq=W] [--offset=O] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H]
mavedb-convert empiric <src> [--dst=D] [--wtseq=W] [--offset=O] [--zero-based] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H]
mavedb-convert -h | --help
mavedb-convert --version
mavedbconvert enrich2 <src> [--dst=D] [--wtseq=W] [--offset=O] [--hgvs-column=A] [--input-type=T] [--skip-header=H] [--skip-footer=H] [--non-coding]
mavedbconvert enrich <src> [--dst=D] [--wtseq=W] [--offset=O] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H]
mavedbconvert empiric <src> [--dst=D] [--wtseq=W] [--offset=O] [--zero-based] [--score-column=C] [--input-type=T] [--sheet-name=S] [--skip-header=H] [--skip-footer=H]
mavedbconvert -h | --help
mavedbconvert --version
Options:
Expand Down
15 changes: 14 additions & 1 deletion mavedbconvert/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os
import shutil
from unittest import TestCase
from tempfile import TemporaryDirectory

import pandas as pd

Expand All @@ -17,19 +19,30 @@
]


# TODO: think up a better name for this class
# TODO: remove the old self.bin stuff
class ProgramTestCase(TestCase):
def setUp(self):
self._data_dir = TemporaryDirectory() # store the object
self.data_dir = os.path.join(
self._data_dir.name, "data"
) # store the directory path
shutil.copytree(
src=os.path.join(os.path.dirname(os.path.abspath(__file__)), "data"),
dst=self.data_dir,
)
self.bin = []

def mock_multi_sheet_excel_file(self, path, data):
writer = pd.ExcelWriter(path, engine="xlsxwriter")
for i, di in enumerate(data):
df = pd.DataFrame(di)
df.to_excel(writer, sheet_name="Sheet{}".format(i))
df.to_excel(writer, sheet_name="Sheet{}".format(i), index=False)
writer.save()
self.bin.append(path)

def tearDown(self):
self._data_dir.cleanup()
for path in self.bin:
if os.path.exists(path) and os.path.isfile(path):
os.remove(path)
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
76 changes: 49 additions & 27 deletions mavedbconvert/tests/test_base.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,23 @@
import os
import mock
import unittest
from unittest.mock import patch

from .. import base, exceptions
from mavedbconvert import base, exceptions

from . import ProgramTestCase
from mavedbconvert.tests import ProgramTestCase


BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.normpath(BASE_DIR + "/data/")


class TestBaseProgram(ProgramTestCase):
class TestPaths(ProgramTestCase):
"""
Test __init__ correctly sets up read and write directories,
sequence information etc.
etc.
"""

def setUp(self):
super().setUp()
self.src = os.path.join(DATA_DIR, "enrich1.tsv")
self.src_with_spaces = os.path.join(DATA_DIR, "enrich 1.tsv")
self.h5_src = os.path.join(DATA_DIR, "dummy.h5")
self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv")
self.src_with_spaces = os.path.join(self.data_dir, "enrich", "enrich .tsv")
self.h5_src = os.path.join(self.data_dir, "enrich2", "dummy.h5")

def tearDown(self):
for path in self.bin:
Expand All @@ -31,7 +28,7 @@ def tearDown(self):

def test_sets_directory_as_input_directory_if_dst_is_none(self):
p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA")
self.assertEqual(p.dst, DATA_DIR)
self.assertEqual(p.dst, os.path.join(self.data_dir, "enrich"))

def test_error_file_not_readable(self):
with self.assertRaises(IOError):
Expand All @@ -43,45 +40,43 @@ def test_expands_user_and_norms_dst(self):

def test_dir_with_input_fname_appended_when_h5_and_dst_is_none(self):
p = base.BaseProgram(src=self.h5_src, dst=None, wt_sequence="AAA")
self.assertEqual(p.dst, os.path.join(DATA_DIR, "dummy"))
self.bin.append(os.path.join(DATA_DIR, "dummy"))
self.assertEqual(p.dst, os.path.join(self.data_dir, "enrich2", "dummy"))
self.bin.append(os.path.join(self.data_dir, "enrich2", "dummy"))

def test_creates_directory_tree_if_it_doesnt_exist(self):
output = os.path.join(DATA_DIR, "outer_dir/inner_dir/")
output = os.path.join(self.data_dir, "enrich2", "outer_dir", "inner_dir")
base.BaseProgram(src=self.h5_src, dst=output, wt_sequence="AAA")
self.assertTrue(os.path.isdir(output))
self.bin.append(output)

@mock.patch("os.access")
@patch("os.access")
def test_checks_read_permission(self, patch):
p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA")
self.assertEqual(patch.call_args_list[0][0], (p.src, os.R_OK))

@mock.patch("os.access")
@patch("os.access")
def test_checks_write_permission(self, patch):
p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA")
self.assertEqual(patch.call_args_list[1][0], (p.dst, os.W_OK))

def test_splits_src_into_filename_and_ext(self):
p = base.BaseProgram(src=self.src, dst=None, wt_sequence="AAA")
self.assertEqual(p.src_filename, "enrich1")
self.assertEqual(p.src_filename, "enrich")
self.assertEqual(p.ext, ".tsv")

def test_lower_cases_ext(self):
p = base.BaseProgram(src=self.src.replace("tsv", "TSV"), wt_sequence="AAA")
self.assertEqual(p.ext, ".tsv")

def test_value_error_coding_offset_not_multiple_of_three(self):
with self.assertRaises(ValueError):
base.BaseProgram(src=self.src, wt_sequence="ATCA", offset=-1)

def test_dst_filename_replaces_whitespace_with_underscores(self):
p = base.BaseProgram(src=self.src_with_spaces, wt_sequence="AAA")
self.assertEqual(p.dst_filename, "mavedb_enrich_1.csv")
self.assertEqual(p.dst_filename, "mavedb_enrich_.csv")

def test_output_file_joins_dst_and_dst_filename(self):
p = base.BaseProgram(src=self.src, wt_sequence="AAA")
self.assertEqual(p.output_file, os.path.join(DATA_DIR, "mavedb_enrich1.csv"))
self.assertEqual(
p.output_file, os.path.join(self.data_dir, "enrich", "mavedb_enrich.csv")
)

def test_output_directory_expands_user_and_norms_path(self):
p = base.BaseProgram(src=self.src, wt_sequence="AAA")
Expand All @@ -90,6 +85,29 @@ def test_output_directory_expands_user_and_norms_path(self):
p.output_directory, os.path.join(os.path.expanduser("~"), "user")
)


class TestWtSequence(ProgramTestCase):
"""
Test __init__ correctly sets up sequence information etc.
"""

def setUp(self):
super().setUp()
self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv")
self.src_with_spaces = os.path.join(self.data_dir, "enrich", "enrich .tsv")
self.h5_src = os.path.join(self.data_dir, "enrich2", "dummy.h5")

def tearDown(self):
for path in self.bin:
if os.path.exists(path) and os.path.isfile(path):
os.remove(path)
elif os.path.exists(path) and os.path.isdir(path):
os.removedirs(path)

def test_value_error_coding_offset_not_multiple_of_three(self):
with self.assertRaises(ValueError):
base.BaseProgram(src=self.src, wt_sequence="ATCA", offset=-1)

# --- Test property setters --- #
def test_wt_setter_upper_cases_wt_sequence(self):
p = base.BaseProgram(src=self.src, wt_sequence="AAA")
Expand Down Expand Up @@ -126,7 +144,7 @@ def test_wt_setter_value_error_not_valid_wt_sequence(self):
class TestBaseProgramValidateAgainstWTSeq(ProgramTestCase):
def setUp(self):
super().setUp()
self.src = os.path.join(DATA_DIR, "enrich1.tsv")
self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv")
self.base = base.BaseProgram(src=self.src, wt_sequence="ATG", one_based=True)

def test_error_not_a_dna_sub(self):
Expand Down Expand Up @@ -177,7 +195,7 @@ def test_index_error_index_extends_beyond_indexable_wt_seq(self):
class TestBaseProgramValidateAgainstProteinSeq(ProgramTestCase):
def setUp(self):
super().setUp()
self.src = os.path.join(DATA_DIR, "enrich1.tsv")
self.src = os.path.join(self.data_dir, "enrich", "enrich.tsv")
self.base = base.BaseProgram(src=self.src, wt_sequence="ATGAAA", one_based=True)

def test_error_not_a_protein_sub(self):
Expand Down Expand Up @@ -224,3 +242,7 @@ def test_index_error_index_extends_beyond_indexable_pro_seq(self):
with self.assertRaises(IndexError):
self.base.one_based = False
self.base.validate_against_protein_sequence("p.Met2Lys")


if __name__ == "__main__":
unittest.main()

0 comments on commit d7ae1b1

Please sign in to comment.