Merge pull request #27 from carlosp420/nexus_data_type_protein

fixed DATAYPE=PROTEIN bug
carlosp420 · Oct 2, 2015 · c940358 · c940358
2 parents 03899e4 + 46052c1
commit c940358
Show file tree

Hide file tree

Showing 8 changed files with 28 additions and 13 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.3
+current_version = 0.3.4
 commit = True
 tag = True
 

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,6 +1,11 @@
 Changelog
 =========
 
+0.3.4 (2015-10-02)
+------------------
+* Fixed bug that did not show DATATYPE=PROTEIN in Nexus files when aminoacid
+  sequences were requested by user.
+
 0.3.3 (2015-10-02)
 ------------------
 * Fixed bug that raised an exception when SeqExpandedRecords did not have data

diff --git a/dataset_creator/__init__.py b/dataset_creator/__init__.py
@@ -1,4 +1,4 @@
 from .dataset import Dataset
 
-__version__ = "0.3.3"
+__version__ = "0.3.4"
 __all__ = ['Dataset']
diff --git a/dataset_creator/creator.py b/dataset_creator/creator.py
@@ -59,7 +59,8 @@ def __init__(self, data, format=None, codon_positions=None, partitioning=None,
         self.extra_dataset_str = self.create_extra_dataset_file()
 
     def create_dataset_header(self):
-        return make_dataset_header(self.data, file_format=self.format)
+        return make_dataset_header(self.data, file_format=self.format,
+                                   aminoacids=self.aminoacids)
 
     def create_dataset_block(self):
         if self.format in ['NEXUS', 'PHYLIP', 'FASTA']:

diff --git a/dataset_creator/utils.py b/dataset_creator/utils.py
@@ -65,20 +65,29 @@ def read_and_delete_tmp_file(filename):
     return contents
 
 
-def make_dataset_header(data, file_format):
-    """
-    :param data: named tuple with necessary info for dataset creation.
-    :param file_format: TNT, PHYLIP, NEXUS, FASTA
+def make_dataset_header(data, file_format, aminoacids):
+    """Creates the dataset header for NEXUS files from ``#NEXUS`` to ``MATRIX``.
+
+    Parameters:
+        data (namedtuple):    with necessary info for dataset creation.
+        file_format (str):    TNT, PHYLIP, NEXUS, FASTA
+        aminoacids (boolean): If ``aminoacids is True`` the header will show
+                              ``DATATYPE=PROTEIN`` otherwise it will be ``DNA``.
     """
+    if aminoacids is True:
+        datatype = 'PROTEIN'
+    else:
+        datatype = 'DNA'
+
     if file_format in ['NEXUS', 'PHYLIP', 'FASTA']:
         header = """
 #NEXUS
 
 BEGIN DATA;
 DIMENSIONS NTAX={0} NCHAR={1};
-FORMAT INTERLEAVE DATATYPE=DNA MISSING=? GAP=-;
+FORMAT INTERLEAVE DATATYPE={2} MISSING=? GAP=-;
 MATRIX
-""".format(data.number_taxa, data.number_chars)
+""".format(data.number_taxa, data.number_chars, datatype)
 
     elif file_format == 'MEGA':
         return "#MEGA\n!TITLE title;"

diff --git a/docs/conf.py b/docs/conf.py
@@ -61,9 +61,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.3.3'
+version = '0.3.4'
 # The full version, including alpha/beta/rc tags.
-release = '0.3.3'
+release = '0.3.4'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/setup.py b/setup.py
@@ -24,7 +24,7 @@ def read(*names, **kwargs):
 
 setup(
     name='dataset-creator',
-    version='0.3.3',
+    version='0.3.4',
     license='BSD',
     description='Takes SeqRecordExpanded objects and creates datasets for phylogenetic software',
     long_description='%s\n%s' % (read('README.rst'), re.sub(':[a-z]+:`~?(.*?)`', r'``\1``', read('CHANGELOG.rst'))),

diff --git a/tests/Nexus/dataset_aa.nex b/tests/Nexus/dataset_aa.nex
@@ -2,7 +2,7 @@
 
 BEGIN DATA;
 DIMENSIONS NTAX=10 NCHAR=1575;
-FORMAT INTERLEAVE DATATYPE=DNA MISSING=? GAP=-;
+FORMAT INTERLEAVE DATATYPE=PROTEIN MISSING=? GAP=-;
 MATRIX
 
 [ArgKin]