Permalink
Browse files

Adding Bio.Wise to the distribution

  • Loading branch information...
grouse
grouse committed Jan 15, 2004
1 parent 7ace0b4 commit 3a5d5506bb16a79d1512ae5d45ba83b3e4be0a33
View
@@ -0,0 +1,91 @@
+#!/usr/bin/env python2.3
+
+__version__ = "$Revision: 1.1 $"
+
+import os
+
+try:
+ import lsf
+
+ _NamedTemporaryFile = lsf.NamedTemporaryFile
+ _localfilename = lsf.localfilename
+except ImportError:
+ import tempfile
+
+ _NamedTemporaryFile = tempfile.NamedTemporaryFile
+ _localfilename = lambda *args: args[0]
+
+def _build_align_cmdline(cmdline, pair, output_filename, kbyte=None, force_type=None, quiet=False):
+ """
+ >>> os.environ["WISE_KBYTE"]="300000"
+ >>> _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"), "/tmp/output", kbyte=100000)
+ 'dnal -kbyte 100000 seq1.fna seq2.fna > /tmp/output'
+ >>> _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"), "/tmp/output_aa")
+ 'psw -kbyte 300000 seq1.faa seq2.faa > /tmp/output_aa'
+
+ """
+ cmdline = cmdline[:]
+
+ if force_type:
+ cmdline.insert(0, "WISE_FORCE_TYPE=%s" % force_type)
+
+ if kbyte is None:
+ try:
+ cmdline.extend(("-kbyte", os.environ["WISE_KBYTE"]))
+ except KeyError:
+ pass
+ else:
+ cmdline.extend(("-kbyte", str(kbyte)))
+
+ cmdline.extend(pair)
+ cmdline.extend((">", output_filename))
+ if quiet:
+ cmdline.extend(("2>", "/dev/null"))
+ cmdline_str = ' '.join(cmdline)
+
+ return cmdline_str
+
+def align(cmdline, pair, kbyte=None, force_type=None, dry_run=False, quiet=False):
+ """
+ Returns a filehandle
+ """
+ temp_file = _NamedTemporaryFile(mode='r')
+ cmdline_str = _build_align_cmdline(cmdline,
+ [_localfilename(filename)
+ for filename in pair],
+ temp_file.name, kbyte, force_type, quiet)
+
+ if dry_run:
+ print cmdline_str
+ return
+
+ os.system(cmdline_str)
+ return temp_file
+
+def all_pairs(singles):
+ """
+ Generate pairs list for all-against-all alignments
+
+ >>> all_pairs(range(4))
+ [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]
+ """
+ pairs = []
+
+ singles = list(singles)
+ while singles:
+ suitor = singles.pop(0) # if sorted, stay sorted
+ pairs.extend([(suitor, single) for single in singles])
+
+ return pairs
+
+def main():
+ pass
+
+def _test(*args, **keywds):
+ import doctest, sys
+ doctest.testmod(sys.modules[__name__], *args, **keywds)
+
+if __name__ == "__main__":
+ if __debug__:
+ _test()
+ main()
View
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+
+__version__ = "$Revision: 1.1 $"
+
+from __future__ import division
+
+import commands
+import os
+
+from Bio import Wise
+
+_SCORE_MATCH = "4"
+_SCORE_MISMATCH = "-1"
+_PENALTY_GAP_START = "5"
+_PENALTY_GAP_EXTENSION = "1"
+
+_CMDLINE_DNAL = ["dnal", "-alb", "-nopretty",
+ "-match", _SCORE_MATCH, "-mis", _SCORE_MISMATCH,
+ "-gap", _PENALTY_GAP_START, "-ext", _PENALTY_GAP_EXTENSION]
+
+try:
+ import lsf
+ if lsf.jobid:
+ _CMDLINE_DNAL.extend("-silent", "-quiet")
+except ImportError:
+ pass
+
+_CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s"
+def _fgrep_count(pattern, file):
+ return int(commands.getoutput(_CMDLINE_FGREP_COUNT % (pattern, file)))
+
+class Statistics(object):
+ """
+ Calculate statistics from an ALB report
+ """
+ def __init__(self, filename):
+ self.matches = _fgrep_count('"SEQUENCE" %s' % _SCORE_MATCH, filename)
+ self.mismatches = _fgrep_count('"SEQUENCE" %s' % _SCORE_MISMATCH, filename)
+ self.gaps = _fgrep_count('"INSERT" -%s' % _PENALTY_GAP_START, filename)
+ self.extensions = _fgrep_count('"INSERT" -%s' % _PENALTY_GAP_EXTENSION, filename)
+
+ def identity_fraction(self):
+ return self.matches/(self.matches+self.mismatches)
+
+ header = "identity_fraction\tmatches\tmismatches\tgaps\textensions"
+
+ def __str__(self):
+ return "\t".join([str(x) for x in (self.identity_fraction(), self.matches, self.mismatches, self.gaps, self.extensions)])
+
+def align(pair, *args, **keywds):
+ temp_file = Wise.align(_CMDLINE_DNAL, pair, *args, **keywds)
+ try:
+ return Statistics(temp_file.name)
+ except AttributeError:
+ try:
+ keywds['dry_run']
+ return None
+ except KeyError:
+ raise
+
+def main():
+ import sys
+ stats = align(sys.argv[1:3])
+ print "\n".join(["%s: %s" % (attr, getattr(stats, attr))
+ for attr in
+ ("matches", "mismatches", "gaps", "extensions")])
+ print "identity_fraction: %s" % stats.identity_fraction()
+
+def _test(*args, **keywds):
+ import doctest, sys
+ doctest.testmod(sys.modules[__name__], *args, **keywds)
+
+if __name__ == "__main__":
+ if __debug__:
+ _test()
+ main()
View
@@ -0,0 +1,132 @@
+#!/usr/bin/env python2.3
+
+__version__ = "$Revision: 1.1 $"
+
+import exceptions
+import os
+import re
+import sys
+
+from Bio import Wise
+
+_CMDLINE_PSW = ["psw-force",
+ "-l"]
+_OPTION_GAP_START = "-g"
+_OPTION_GAP_EXTENSION = "-e"
+_OPTION_SCORES = "-m"
+
+class AlignmentColumnFullException(exceptions.Exception):
+ pass
+
+class Alignment(list):
+ def append(self, column_unit):
+ try:
+ self[-1].append(column_unit)
+ except AlignmentColumnFullException:
+ list.append(self, AlignmentColumn(column_unit))
+ except IndexError:
+ list.append(self, AlignmentColumn(column_unit))
+
+class AlignmentColumn(list):
+ def _set_kind(self, column_unit):
+ if self.kind == "SEQUENCE":
+ self.kind = column_unit.kind
+
+ def __init__(self, column_unit):
+ assert column_unit.unit == 0
+ self.kind = column_unit.kind
+ list.__init__(self, [column_unit.column, None])
+
+ def __repr__(self):
+ return "%s(%s, %s)" % (self.kind, self[0], self[1])
+
+ def append(self, column_unit):
+ if self[1] is not None:
+ raise AlignmentColumnFullException
+
+ assert column_unit.unit == 1
+
+ self._set_kind(column_unit)
+ self[1] = column_unit.column
+
+class ColumnUnit(object):
+ def __init__(self, unit, column, kind):
+ self.unit = unit
+ self.column = column
+ self.kind = kind
+
+ def __str__(self):
+ return "ColumnUnit(unit=%s, column=%s, %s)" % (self.unit, self.column, self.kind)
+
+ __repr__ = __str__
+
+_re_unit = re.compile(r"^Unit +([01])- \[ *(-?\d+)- *(-?\d+)\] \[(\w+)\]$")
+def parse_line(line):
+ """
+ >>> print parse_line("Column 0:")
+ None
+ >>> parse_line("Unit 0- [ -1- 0] [SEQUENCE]")
+ ColumnUnit(unit=0, column=0, SEQUENCE)
+ >>> parse_line("Unit 1- [ 85- 86] [SEQUENCE]")
+ ColumnUnit(unit=1, column=86, SEQUENCE)
+ """
+ match = _re_unit.match(line.rstrip())
+
+ if not match:
+ return
+
+ return ColumnUnit(int(match.group(1)), int(match.group(3)), match.group(4))
+
+def parse(iterable):
+ """
+ format
+
+ Column 0:
+ Unit 0- [ -1- 0] [SEQUENCE]
+ Unit 1- [ 85- 86] [SEQUENCE]
+
+ means that seq1[0] == seq2[86] (0-based)
+ """
+
+ alignment = Alignment()
+ for line in iterable:
+ try:
+ if os.environ["WISE_PY_DEBUG"]:
+ print line,
+ except KeyError:
+ pass
+
+ column_unit = parse_line(line)
+ if column_unit:
+ alignment.append(column_unit)
+
+ return alignment
+
+def align(pair,
+ scores=None,
+ gap_start=None,
+ gap_extension=None,
+ *args, **keywds):
+
+ os.environ["WISE_FORCE_TYPE"] = "PROTEIN"
+ cmdline = _CMDLINE_PSW[:]
+ if scores:
+ cmdline.extend((_OPTION_SCORES, scores))
+ if gap_start:
+ cmdline.extend((_OPTION_GAP_START, str(gap_start)))
+ if gap_extension:
+ cmdline.extend((_OPTION_GAP_EXTENSION, str(gap_extension)))
+ temp_file = Wise.align(cmdline, pair, *args, **keywds)
+ return parse(temp_file)
+
+def main():
+ print align(sys.argv[1:3])
+
+def _test(*args, **keywds):
+ import doctest, sys
+ doctest.testmod(sys.modules[__name__], *args, **keywds)
+
+if __name__ == "__main__":
+ if __debug__:
+ _test()
+ main()
@@ -0,0 +1,2 @@
+>ENSG00000172056|ENST00000321078|ENSE00001281503
+TCAGGTTCCAGATGCGACATCCAGATGACCCAGTCTCCATCTTCCGTGTCTGCATCTGTAGGAGACAGAGTCACCATCACTTGTCGGGCGAGTCAGGGTATTAGCAGCTGGTTAGCCTGGTATCAGCAGAAACCAGGGAAAGCCCCTAAGCTCCTGATCTATGCTGCATCCAGTTTGCAAAGTGGGGTCCCATCAAGGTTCAGCGGCAGTGGATCTGGGACAGATTTCACTCTCACCATCAGCAGCCTGCAGCCTGAAGATTTTGCAACTTACTATTGTCAACAGGCTAACA
@@ -0,0 +1,2 @@
+>ENSG00000163182|ENST00000295339|ENSE00001130648
+ATGGAAGCCCCAGCTCAGCTTCTCTTCCTCCTGCTACTCTGGCTCCCAG
View
@@ -0,0 +1,11 @@
+test_Wise
+test_dnal (test_Wise.TestWiseDryRun) ... ok
+test_psw (test_Wise.TestWiseDryRun) ... ok
+test_align (test_Wise.TestWise) ... ok
+doctest of Bio.Wise._build_align_cmdline ... ok
+doctest of Bio.Wise.all_pairs ... ok
+
+----------------------------------------------------------------------
+Ran 5 tests in 0.010s
+
+OK
View
@@ -0,0 +1,14 @@
+test_psw
+test_AlignmentColumn_assertions (test_psw.TestPSW) ... ok
+test_AlignmentColumn_full (test_psw.TestPSW) ... ok
+test_AlignmentColumn_kinds (test_psw.TestPSW) ... ok
+test_AlignmentColumn_repr (test_psw.TestPSW) ... ok
+test_Alignment_assertions (test_psw.TestPSW) ... ok
+test_Alignment_normal (test_psw.TestPSW) ... ok
+test_ColumnUnit (test_psw.TestPSW) ... ok
+doctest of Bio.Wise.psw.parse_line ... ok
+
+----------------------------------------------------------------------
+Ran 8 tests in 0.002s
+
+OK
View
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+
+# Copyright 2004 by Michael Hoffman. All rights reserved. This code is
+# part of the Biopython distribution and governed by its license.
+# Please see the LICENSE file that should have been included as part
+# of this package.
+
+import commands
+import re
+
+if commands.getoutput("dnal").find("command not found") != -1:
+ raise ImportError
Oops, something went wrong.

0 comments on commit 3a5d550

Please sign in to comment.