Skip to content
This repository has been archived by the owner on Apr 16, 2024. It is now read-only.

Commit

Permalink
Merge 73ba5d1 into 426b965
Browse files Browse the repository at this point in the history
  • Loading branch information
cjgrady committed Apr 19, 2019
2 parents 426b965 + 73ba5d1 commit 1b368e5
Show file tree
Hide file tree
Showing 6 changed files with 259 additions and 5 deletions.
73 changes: 68 additions & 5 deletions analyses/lm_objects/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,60 @@ def load(cls, flo):
'Cannot load matrix data from file-like object provided',
str(e)))

# ...........................
@classmethod
def load_csv(cls, flo, dtype=np.float, num_header_rows=0,
num_header_cols=0):
"""Attempts to load a Matrix object from a CSV file-like object.
Args:
flo (file-like): A file-like object with matrix data.
dtype (:obj:`method`, optional): The data type for the data. Will
be used to cast data when adding to matrix.
num_header_rows (:obj:`int`, optional): The number of header rows
in the CSV file.
num_header_cols (:obj:`int`, optional): The number of header
columns in the CSV file.
Returns:
Matrix: The newly loaded Matrix object.
"""
col_headers = []
row_headers = []
header_lines = [] # Leading rows that are headers
data = []
i = 0
for line in flo:
items = line.strip().split(',')
# If header row, add to header rows for processing
if i < num_header_rows:
# Add the headers to header lines for processing
header_lines.append(items[num_header_cols:])
else:
if num_header_cols == 1:
row_headers.append(items[0])
elif num_header_cols > 1:
row_headers.append(items[:num_header_cols])
data.append([dtype(x) for x in items[num_header_cols:]])

i += 1

print(header_lines)

# Process header columns from header rows
if num_header_rows == 1:
col_headers = header_lines[0]
elif num_header_rows > 1:
for j in range(len(header_lines[0])):
h = []
for x in range(num_header_rows):
h.append(header_lines[x][j])
col_headers.append(h)

data_array = np.array(data)

return cls(data_array, headers={'0': row_headers, '1': col_headers})

# ...........................
@classmethod
def load_new(cls, flo):
Expand Down Expand Up @@ -529,11 +583,20 @@ def csv_generator():

# Start with the header row, if we have one
if '1' in mtx.headers and mtx.headers['1']:
# Add a blank entry if we have row headers
header_row = ['']*len(listify(
row_headers[0]) if row_headers else [])
header_row.extend(mtx.headers['1'])
yield header_row
# Make column headers lists of lists
if not isinstance(mtx.headers['1'][0], (tuple, list)):
header_row = ['']*len(
listify(row_headers[0]) if row_headers else [])
header_row.extend(mtx.headers['1'])
yield header_row
else:
for i in range(len(mtx.headers['1'][0])):
header_row = ['']*len(
listify(row_headers[0]) if row_headers else [])
header_row.extend(
[mtx.headers['1'][j][i] for j in range(
len(mtx.headers['1']))])
yield header_row
# For each row in the data set
for i in range(mtx.data.shape[0]):
# Add the row headers if exists
Expand Down
29 changes: 29 additions & 0 deletions analyses/lm_objects/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* Add method to remove annotations.
* Move label method out of internal functions.
"""
import os

import dendropy
import numpy as np

Expand Down Expand Up @@ -59,6 +61,33 @@ def from_base_tree(cls, tree):
"""
return cls.get(data=tree.as_string('nexus'), schema='nexus')

# ..............................
@classmethod
def from_filename(cls, filename):
"""Creates a TreeWrapper object by loading a file.
Args:
filename (str): A file path to a tree file that should be loaded.
Returns:
TreeWrapper: The newly loaded tree.
Raises:
IOError: Raised if the tree file cannot be loaded based on the file
extension.
"""
_, tree_ext = os.path.splitext(filename)
if tree_ext == '.nex':
tree_schema = 'nexus'
elif tree_ext == '.xml':
tree_schema = 'nexml'
elif tree_ext == '.tre':
tree_schema = 'newick'
else:
raise IOError(
'Cannot handle tree with extension: {}'.format(tree_ext))
return cls.get(path=filename, schema=tree_schema)

# ..............................
def add_node_labels(self, prefix=None, overwrite=False):
"""Add labels to the nodes in the tree.
Expand Down
3 changes: 3 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ def _get_format_extension(self, fmt):
return '.tre'
elif fmt.lower() == 'nexus':
return '.nex'
elif fmt.lower() == 'nexml':
return '.xml'
elif fmt.lower() == 'phylip':
return '.phylip'
elif fmt.lower() == 'table':
Expand Down Expand Up @@ -212,6 +214,7 @@ def pytest_generate_tests(metafunc):
('valid_csv_alignment', df.get_alignments('csv', True)),
('valid_json_alignment', df.get_alignments('json', True)),
('valid_newick_tree', df.get_trees('newick', True)),
('valid_nexml_tree', df.get_trees('nexml', True)),
('valid_nexus_tree', df.get_trees('nexus', True)),
('valid_phylip_alignment', df.get_alignments('phylip', True)),
('valid_table_alignment', df.get_alignments('table', True))
Expand Down
50 changes: 50 additions & 0 deletions tests/data_dir/trees/valid_tree.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<nex:nexml
version="0.9"
xsi:schemaLocation="http://www.nexml.org/2009 ../xsd/nexml.xsd"
xmlns="http://www.nexml.org/2009"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xml="http://www.w3.org/XML/1998/namespace"
xmlns:nex="http://www.nexml.org/2009"
xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
>
<otus id="d0">
<otu id="d1" label="A" />
<otu id="d2" label="B" />
<otu id="d3" label="C" />
<otu id="d4" label="G" />
<otu id="d5" label="D" />
<otu id="d6" label="E" />
<otu id="d7" label="F" />
</otus>
<trees id="d8" otus="d0">
<tree id="d9" xsi:type="nex:FloatTree">
<node id="d10" />
<node id="d11" otu="d1" />
<node id="d12" />
<node id="d13" />
<node id="d14" otu="d2" />
<node id="d15" otu="d3" />
<node id="d16" />
<node id="d17" otu="d4" />
<node id="d18" />
<node id="d19" otu="d5" />
<node id="d20" />
<node id="d21" otu="d6" />
<node id="d22" otu="d7" />
<rootedge id="d23" target="d10" />
<edge id="d24" source="d10" target="d11" length="2.9999" />
<edge id="d25" source="d10" target="d12" length="0.1" />
<edge id="d26" source="d12" target="d13" length="0.1" />
<edge id="d27" source="d13" target="d14" length="0.1" />
<edge id="d28" source="d13" target="d15" length="0.1" />
<edge id="d29" source="d12" target="d16" length="0.1" />
<edge id="d30" source="d16" target="d17" length="0.2" />
<edge id="d31" source="d16" target="d18" length="0.1" />
<edge id="d32" source="d18" target="d19" length="0.1" />
<edge id="d33" source="d18" target="d20" length="0.1" />
<edge id="d34" source="d20" target="d21" length="0.1" />
<edge id="d35" source="d20" target="d22" length="0.1" />
</tree>
</trees>
</nex:nexml>
57 changes: 57 additions & 0 deletions tests/test_lm_objects/test_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,63 @@ def test_load(self):
with pytest.raises(IOError):
mtx = matrix.Matrix.load(io.BytesIO())

# .....................................
def test_load_csv(self):
"""Test the load_csv method.
"""
orig_mtx = get_random_matrix(5, 5)

with io.StringIO() as out_str:
orig_mtx.write_csv(out_str)
out_str.seek(0)

# Attempt to load matrix
loaded_mtx = matrix.Matrix.load_csv(
out_str, num_header_rows=1, num_header_cols=1)

print(loaded_mtx.get_headers())
print(orig_mtx.get_headers())

# Verify data and headers are the same
assert np.allclose(loaded_mtx.data, orig_mtx.data)
assert loaded_mtx.get_headers() == orig_mtx.get_headers()

# .....................................
def test_load_csv_multi_headers(self):
"""Test the load_csv method.
"""
orig_mtx = get_random_matrix(5, 5)
new_row_headers = []
new_col_headers = []
orig_row_headers = orig_mtx.get_row_headers()
orig_col_headers = orig_mtx.get_column_headers()

for h in orig_row_headers:
new_row_headers.append([h, '{}-2'.format(h)])
orig_mtx.set_row_headers(new_row_headers)
for h in orig_col_headers:
new_col_headers.append([h, '{}-2'.format(h)])
orig_mtx.set_column_headers(new_col_headers)

print(orig_mtx.get_headers())

with io.StringIO() as out_str:
orig_mtx.write_csv(out_str)
out_str.seek(0)
print(out_str.getvalue())
out_str.seek(0)

# Attempt to load matrix
loaded_mtx = matrix.Matrix.load_csv(
out_str, num_header_rows=2, num_header_cols=2)

print(loaded_mtx.get_headers())
print(orig_mtx.get_headers())

# Verify data and headers are the same
assert np.allclose(loaded_mtx.data, orig_mtx.data)
assert loaded_mtx.get_headers() == orig_mtx.get_headers()

# .....................................
def test_load_new(self):
"""Test the load_new method.
Expand Down
52 changes: 52 additions & 0 deletions tests/test_lm_objects/test_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,58 @@ def test_from_base_nexus_tree(self, valid_nexus_tree):
wrapped_tree = tree.TreeWrapper.from_base_tree(dendropy_tree)
assert isinstance(wrapped_tree, tree.TreeWrapper)

# .....................................
def test_from_filename_newick_tree(self, valid_newick_tree):
"""Attempt to get a tree using a newick file.
Args:
valid_newick_tree (pytest.fixture): A parameterized pytest fixture
that provides valid newick trees, one at a time, to this test
function.
"""
loaded_tree = tree.TreeWrapper.from_filename(valid_newick_tree)
assert isinstance(loaded_tree, tree.TreeWrapper)

# .....................................
def test_from_filename_nexml_tree(self, valid_nexml_tree):
"""Attempt to get a tree using a nexml file.
Args:
valid_nexml_tree (pytest.fixture): A parameterized pytest fixture
that provides valid nexml trees, one at a time, to this test
function.
"""
loaded_tree = tree.TreeWrapper.from_filename(valid_nexml_tree)
assert isinstance(loaded_tree, tree.TreeWrapper)

# .....................................
def test_from_filename_nexus_tree(self, valid_nexus_tree):
"""Attempt to get a tree using a nexus file.
Args:
valid_nexus_tree (pytest.fixture): A parameterized pytest fixture
that provides valid nexus trees, one at a time, to this test
function.
"""
loaded_tree = tree.TreeWrapper.from_filename(valid_nexus_tree)
assert isinstance(loaded_tree, tree.TreeWrapper)

# .....................................
def test_from_filename_invalid_file(self, invalid_csv_alignment):
"""Attempt to get a tree using a nexus file.
Args:
invalid_csv_alignment (pytest.fixture): A parameterized pytest
fixture that provides a CSV filename, one at a time, to test
this function. Trees cannot be loaded from CSV, so this should
raise an IOError.
valid_nexus_tree (pytest.fixture): A parameterized pytest fixture
that provides valid nexus trees, one at a time, to this test
function.
"""
with pytest.raises(IOError):
tree.TreeWrapper.from_filename(invalid_csv_alignment)

# .....................................
def test_add_node_labels_no_prefix_no_overwrite(self):
"""Test that node labels are added correctly to a tree.
Expand Down

0 comments on commit 1b368e5

Please sign in to comment.