Skip to content

Commit

Permalink
Add ptkscmp datatype for PlantTribes significant components in the Ks…
Browse files Browse the repository at this point in the history
… distribution
  • Loading branch information
gregvonkuster committed Jun 28, 2017
1 parent dd0336e commit 66dabf6
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 0 deletions.
2 changes: 2 additions & 0 deletions config/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,7 @@
<datatype extension="ptaligntrimmedca" type="galaxy.datatypes.plant_tribes:PlantTribesMultipleSequenceAlignmentTrimmedCodonAlignment" />
<datatype extension="ptalignfiltered" type="galaxy.datatypes.plant_tribes:PlantTribesMultipleSequenceAlignmentFiltered" />
<datatype extension="ptalignfilteredca" type="galaxy.datatypes.plant_tribes:PlantTribesMultipleSequenceAlignmentFilteredCodonAlignment" />
<datatype extension="ptkscmp" type="galaxy.datatypes.plant_tribes:PlantTribesKsComponents" display_in_upload="true"/>
<datatype extension="ptortho" type="galaxy.datatypes.plant_tribes:PlantTribesOrtho" />
<datatype extension="ptorthocs" type="galaxy.datatypes.plant_tribes:PlantTribesOrthoCodingSequence" />
<datatype extension="ptphylip" type="galaxy.datatypes.plant_tribes:PlantTribesPhylip" />
Expand All @@ -623,6 +624,7 @@
defined format first, followed by next-most rigidly defined,
and so on.
-->
<sniffer type="galaxy.datatypes.plant_tribes:PlantTribesKsComponents"/>
<sniffer type="galaxy.datatypes.plant_tribes:Smat"/>
<sniffer type="galaxy.datatypes.mothur:Sabund"/>
<sniffer type="galaxy.datatypes.mothur:Otu"/>
Expand Down
62 changes: 62 additions & 0 deletions lib/galaxy/datatypes/plant_tribes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from galaxy.datatypes.data import get_file_peek, Text
from galaxy.datatypes.metadata import MetadataElement, MetadataParameter
from galaxy.datatypes.sniff import get_headers
from galaxy.datatypes.tabular import Tabular
from galaxy.datatypes.text import Html
from galaxy.util import nice_size

Expand Down Expand Up @@ -89,6 +91,66 @@ def set_meta(self, dataset, overwrite=True, **kwd):
log.warning("set_meta fname: %s %s" % (dataset.file_name if dataset and dataset.file_name else 'Unkwown', str(e)))


class PlantTribesKsComponents(Tabular):
file_ext = "ptkscmp"
MetadataElement(name="number_comp", default=0, desc="Number of significant components in the Ks distribution", readonly=True, visible=True, no_value=0)

def display_peek(self, dataset):
try:
return dataset.peek
except:
return "Significant components in the Ks distribution (%s)" % (nice_size(dataset.get_size()))

def set_meta(self, dataset, **kwd):
"""
Set the number of significant components in the Ks distribution.
The dataset will always be on the order of less than 10 lines.
"""
super(PlantTribesKsComponents, self).set_meta(dataset, **kwd)
significant_components = []
with open(dataset.file_name) as fh:
for i, line in enumerate(fh):
if i == 0:
# Skip the first line.
continue
line = line.strip()
items = line.split()
try:
# Could be \t.
significant_components.append(int(items[2]))
except Exception:
continue
if len(significant_components) > 0:
dataset.metadata.number_comp = max(significant_components)

def set_peek(self, dataset, is_multi_byte=False):
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte)
if (dataset.metadata.number_comp == 1):
dataset.blurb = "1 significant component"
else:
dataset.blurb = "%s significant components" % dataset.metadata.number_comp
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'

def sniff(self, filename):
"""
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('test_tab.bed')
>>> PlantTribesKsComponents().sniff(fname)
False
>>> fname = get_test_fname('1.ptkscmp')
>>> PlantTribesKsComponents().sniff(fname)
True
"""
try:
line_item_str = get_headers(filename, '\\t', 1)[0][0]
return line_item_str == 'species\tn\tnumber_comp\tlnL\tAIC\tBIC\tmean\tvariance\tporportion'
except Exception:
return False


class PlantTribesOrtho(PlantTribes):
"""
PlantTribes sequences classified into precomputed, orthologous gene family
Expand Down
11 changes: 11 additions & 0 deletions lib/galaxy/datatypes/test/1.ptkscmp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
species n number_comp lnL AIC BIC mean variance porportion
species1 1184 1 -1527.4110 3058.82 3068.98 0.4656 0.1675 1.00
species1 1184 2 -1490.7280 2991.46 3016.84 0.1718 0.0035 0.22
0.6173 0.2275 0.78
species1 1184 3 -1471.6980 2959.40 3000.01 0.6921 0.1806 0.65
5.9251 3.0703 0.02
0.1887 0.0054 0.33
species1 1184 4 -1443.1050 2908.21 2964.05 0.1219 0.0002 0.10
0.2589 0.0093 0.38
0.7900 0.1030 0.42
1.5944 1.7482 0.10

0 comments on commit 66dabf6

Please sign in to comment.