Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Adding Bio.Wise to the distribution

  • Loading branch information...
commit 3a5d5506bb16a79d1512ae5d45ba83b3e4be0a33 1 parent 7ace0b4
grouse authored
View
91 Bio/Wise/__init__.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python2.3
+
+__version__ = "$Revision: 1.1 $"
+
+import os
+
+try:
+ import lsf
+
+ _NamedTemporaryFile = lsf.NamedTemporaryFile
+ _localfilename = lsf.localfilename
+except ImportError:
+ import tempfile
+
+ _NamedTemporaryFile = tempfile.NamedTemporaryFile
+ _localfilename = lambda *args: args[0]
+
+def _build_align_cmdline(cmdline, pair, output_filename, kbyte=None, force_type=None, quiet=False):
+ """
+ >>> os.environ["WISE_KBYTE"]="300000"
+ >>> _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"), "/tmp/output", kbyte=100000)
+ 'dnal -kbyte 100000 seq1.fna seq2.fna > /tmp/output'
+ >>> _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"), "/tmp/output_aa")
+ 'psw -kbyte 300000 seq1.faa seq2.faa > /tmp/output_aa'
+
+ """
+ cmdline = cmdline[:]
+
+ if force_type:
+ cmdline.insert(0, "WISE_FORCE_TYPE=%s" % force_type)
+
+ if kbyte is None:
+ try:
+ cmdline.extend(("-kbyte", os.environ["WISE_KBYTE"]))
+ except KeyError:
+ pass
+ else:
+ cmdline.extend(("-kbyte", str(kbyte)))
+
+ cmdline.extend(pair)
+ cmdline.extend((">", output_filename))
+ if quiet:
+ cmdline.extend(("2>", "/dev/null"))
+ cmdline_str = ' '.join(cmdline)
+
+ return cmdline_str
+
+def align(cmdline, pair, kbyte=None, force_type=None, dry_run=False, quiet=False):
+ """
+ Returns a filehandle
+ """
+ temp_file = _NamedTemporaryFile(mode='r')
+ cmdline_str = _build_align_cmdline(cmdline,
+ [_localfilename(filename)
+ for filename in pair],
+ temp_file.name, kbyte, force_type, quiet)
+
+ if dry_run:
+ print cmdline_str
+ return
+
+ os.system(cmdline_str)
+ return temp_file
+
+def all_pairs(singles):
+ """
+ Generate pairs list for all-against-all alignments
+
+ >>> all_pairs(range(4))
+ [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]
+ """
+ pairs = []
+
+ singles = list(singles)
+ while singles:
+ suitor = singles.pop(0) # if sorted, stay sorted
+ pairs.extend([(suitor, single) for single in singles])
+
+ return pairs
+
+def main():
+ pass
+
+def _test(*args, **keywds):
+ import doctest, sys
+ doctest.testmod(sys.modules[__name__], *args, **keywds)
+
+if __name__ == "__main__":
+ if __debug__:
+ _test()
+ main()
View
76 Bio/Wise/dnal.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+
+__version__ = "$Revision: 1.1 $"
+
+from __future__ import division
+
+import commands
+import os
+
+from Bio import Wise
+
+_SCORE_MATCH = "4"
+_SCORE_MISMATCH = "-1"
+_PENALTY_GAP_START = "5"
+_PENALTY_GAP_EXTENSION = "1"
+
+_CMDLINE_DNAL = ["dnal", "-alb", "-nopretty",
+ "-match", _SCORE_MATCH, "-mis", _SCORE_MISMATCH,
+ "-gap", _PENALTY_GAP_START, "-ext", _PENALTY_GAP_EXTENSION]
+
+try:
+ import lsf
+ if lsf.jobid:
+ _CMDLINE_DNAL.extend("-silent", "-quiet")
+except ImportError:
+ pass
+
+_CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s"
+def _fgrep_count(pattern, file):
+ return int(commands.getoutput(_CMDLINE_FGREP_COUNT % (pattern, file)))
+
+class Statistics(object):
+ """
+ Calculate statistics from an ALB report
+ """
+ def __init__(self, filename):
+ self.matches = _fgrep_count('"SEQUENCE" %s' % _SCORE_MATCH, filename)
+ self.mismatches = _fgrep_count('"SEQUENCE" %s' % _SCORE_MISMATCH, filename)
+ self.gaps = _fgrep_count('"INSERT" -%s' % _PENALTY_GAP_START, filename)
+ self.extensions = _fgrep_count('"INSERT" -%s' % _PENALTY_GAP_EXTENSION, filename)
+
+ def identity_fraction(self):
+ return self.matches/(self.matches+self.mismatches)
+
+ header = "identity_fraction\tmatches\tmismatches\tgaps\textensions"
+
+ def __str__(self):
+ return "\t".join([str(x) for x in (self.identity_fraction(), self.matches, self.mismatches, self.gaps, self.extensions)])
+
+def align(pair, *args, **keywds):
+ temp_file = Wise.align(_CMDLINE_DNAL, pair, *args, **keywds)
+ try:
+ return Statistics(temp_file.name)
+ except AttributeError:
+ try:
+ keywds['dry_run']
+ return None
+ except KeyError:
+ raise
+
+def main():
+ import sys
+ stats = align(sys.argv[1:3])
+ print "\n".join(["%s: %s" % (attr, getattr(stats, attr))
+ for attr in
+ ("matches", "mismatches", "gaps", "extensions")])
+ print "identity_fraction: %s" % stats.identity_fraction()
+
+def _test(*args, **keywds):
+ import doctest, sys
+ doctest.testmod(sys.modules[__name__], *args, **keywds)
+
+if __name__ == "__main__":
+ if __debug__:
+ _test()
+ main()
View
132 Bio/Wise/psw.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python2.3
+
+__version__ = "$Revision: 1.1 $"
+
+import exceptions
+import os
+import re
+import sys
+
+from Bio import Wise
+
+_CMDLINE_PSW = ["psw-force",
+ "-l"]
+_OPTION_GAP_START = "-g"
+_OPTION_GAP_EXTENSION = "-e"
+_OPTION_SCORES = "-m"
+
+class AlignmentColumnFullException(exceptions.Exception):
+ pass
+
+class Alignment(list):
+ def append(self, column_unit):
+ try:
+ self[-1].append(column_unit)
+ except AlignmentColumnFullException:
+ list.append(self, AlignmentColumn(column_unit))
+ except IndexError:
+ list.append(self, AlignmentColumn(column_unit))
+
+class AlignmentColumn(list):
+ def _set_kind(self, column_unit):
+ if self.kind == "SEQUENCE":
+ self.kind = column_unit.kind
+
+ def __init__(self, column_unit):
+ assert column_unit.unit == 0
+ self.kind = column_unit.kind
+ list.__init__(self, [column_unit.column, None])
+
+ def __repr__(self):
+ return "%s(%s, %s)" % (self.kind, self[0], self[1])
+
+ def append(self, column_unit):
+ if self[1] is not None:
+ raise AlignmentColumnFullException
+
+ assert column_unit.unit == 1
+
+ self._set_kind(column_unit)
+ self[1] = column_unit.column
+
+class ColumnUnit(object):
+ def __init__(self, unit, column, kind):
+ self.unit = unit
+ self.column = column
+ self.kind = kind
+
+ def __str__(self):
+ return "ColumnUnit(unit=%s, column=%s, %s)" % (self.unit, self.column, self.kind)
+
+ __repr__ = __str__
+
+_re_unit = re.compile(r"^Unit +([01])- \[ *(-?\d+)- *(-?\d+)\] \[(\w+)\]$")
+def parse_line(line):
+ """
+ >>> print parse_line("Column 0:")
+ None
+ >>> parse_line("Unit 0- [ -1- 0] [SEQUENCE]")
+ ColumnUnit(unit=0, column=0, SEQUENCE)
+ >>> parse_line("Unit 1- [ 85- 86] [SEQUENCE]")
+ ColumnUnit(unit=1, column=86, SEQUENCE)
+ """
+ match = _re_unit.match(line.rstrip())
+
+ if not match:
+ return
+
+ return ColumnUnit(int(match.group(1)), int(match.group(3)), match.group(4))
+
+def parse(iterable):
+ """
+ format
+
+ Column 0:
+ Unit 0- [ -1- 0] [SEQUENCE]
+ Unit 1- [ 85- 86] [SEQUENCE]
+
+ means that seq1[0] == seq2[86] (0-based)
+ """
+
+ alignment = Alignment()
+ for line in iterable:
+ try:
+ if os.environ["WISE_PY_DEBUG"]:
+ print line,
+ except KeyError:
+ pass
+
+ column_unit = parse_line(line)
+ if column_unit:
+ alignment.append(column_unit)
+
+ return alignment
+
+def align(pair,
+ scores=None,
+ gap_start=None,
+ gap_extension=None,
+ *args, **keywds):
+
+ os.environ["WISE_FORCE_TYPE"] = "PROTEIN"
+ cmdline = _CMDLINE_PSW[:]
+ if scores:
+ cmdline.extend((_OPTION_SCORES, scores))
+ if gap_start:
+ cmdline.extend((_OPTION_GAP_START, str(gap_start)))
+ if gap_extension:
+ cmdline.extend((_OPTION_GAP_EXTENSION, str(gap_extension)))
+ temp_file = Wise.align(cmdline, pair, *args, **keywds)
+ return parse(temp_file)
+
+def main():
+ print align(sys.argv[1:3])
+
+def _test(*args, **keywds):
+ import doctest, sys
+ doctest.testmod(sys.modules[__name__], *args, **keywds)
+
+if __name__ == "__main__":
+ if __debug__:
+ _test()
+ main()
View
2  Tests/Wise/human_114_g01_exons.fna_01
@@ -0,0 +1,2 @@
+>ENSG00000172056|ENST00000321078|ENSE00001281503
+TCAGGTTCCAGATGCGACATCCAGATGACCCAGTCTCCATCTTCCGTGTCTGCATCTGTAGGAGACAGAGTCACCATCACTTGTCGGGCGAGTCAGGGTATTAGCAGCTGGTTAGCCTGGTATCAGCAGAAACCAGGGAAAGCCCCTAAGCTCCTGATCTATGCTGCATCCAGTTTGCAAAGTGGGGTCCCATCAAGGTTCAGCGGCAGTGGATCTGGGACAGATTTCACTCTCACCATCAGCAGCCTGCAGCCTGAAGATTTTGCAACTTACTATTGTCAACAGGCTAACA
View
2  Tests/Wise/human_114_g02_exons.fna_01
@@ -0,0 +1,2 @@
+>ENSG00000163182|ENST00000295339|ENSE00001130648
+ATGGAAGCCCCAGCTCAGCTTCTCTTCCTCCTGCTACTCTGGCTCCCAG
View
11 Tests/output/test_Wise
@@ -0,0 +1,11 @@
+test_Wise
+test_dnal (test_Wise.TestWiseDryRun) ... ok
+test_psw (test_Wise.TestWiseDryRun) ... ok
+test_align (test_Wise.TestWise) ... ok
+doctest of Bio.Wise._build_align_cmdline ... ok
+doctest of Bio.Wise.all_pairs ... ok
+
+----------------------------------------------------------------------
+Ran 5 tests in 0.010s
+
+OK
View
14 Tests/output/test_psw
@@ -0,0 +1,14 @@
+test_psw
+test_AlignmentColumn_assertions (test_psw.TestPSW) ... ok
+test_AlignmentColumn_full (test_psw.TestPSW) ... ok
+test_AlignmentColumn_kinds (test_psw.TestPSW) ... ok
+test_AlignmentColumn_repr (test_psw.TestPSW) ... ok
+test_Alignment_assertions (test_psw.TestPSW) ... ok
+test_Alignment_normal (test_psw.TestPSW) ... ok
+test_ColumnUnit (test_psw.TestPSW) ... ok
+doctest of Bio.Wise.psw.parse_line ... ok
+
+----------------------------------------------------------------------
+Ran 8 tests in 0.002s
+
+OK
View
12 Tests/requires_wise.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+
+# Copyright 2004 by Michael Hoffman. All rights reserved. This code is
+# part of the Biopython distribution and governed by its license.
+# Please see the LICENSE file that should have been included as part
+# of this package.
+
+import commands
+import re
+
+if commands.getoutput("dnal").find("command not found") != -1:
+ raise ImportError
View
59 Tests/test_Wise.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python2.3
+
+__version__ = "$Revision: 1.1 $"
+
+import cStringIO
+import doctest, unittest
+import sys
+
+import requires_wise
+
+from Bio import Wise
+
+class TestWiseDryRun(unittest.TestCase):
+ def setUp(self):
+ self.old_stdout = sys.stdout
+ sys.stdout = cStringIO.StringIO()
+
+ def test_dnal(self):
+ Wise.align(["dnal"], ("seq1.fna", "seq2.fna"), kbyte=100000, dry_run=True)
+ self.assert_(sys.stdout.getvalue().startswith("dnal -kbyte 100000 seq1.fna seq2.fna > /tmp/tmp"))
+
+ def test_psw(self):
+ Wise.align(["psw"], ("seq1.faa", "seq2.faa"), dry_run=True)
+ self.assert_(sys.stdout.getvalue().startswith("psw -kbyte 300000 seq1.faa seq2.faa > /tmp/tmp"))
+
+ def tearDown(self):
+ sys.stdout = self.old_stdout
+
+class TestWise(unittest.TestCase):
+ def test_align(self):
+ temp_file = Wise.align(["dnal"], ("Wise/human_114_g01_exons.fna_01", "Wise/human_114_g02_exons.fna_01"), kbyte=100000, force_type="DNA", quiet=True)
+ self.assertEqual(temp_file.readline().rstrip(), "ENSG00000172135 AGGGAAAGCCCCTAAGCTC--CTGATCTATGCTGCATCCAGTTTGCAAAGTGGGGTCCC")
+
+def run_tests(argv):
+ test_suite = testing_suite()
+ runner = unittest.TextTestRunner(sys.stdout, verbosity = 2)
+ runner.run(test_suite)
+
+def testing_suite():
+ """Generate the suite of tests.
+ """
+ unittest_suite = unittest.TestSuite()
+
+ test_loader = unittest.TestLoader()
+ test_loader.testMethodPrefix = 'test_'
+ tests = [TestWiseDryRun, TestWise]
+
+ for test in tests:
+ cur_suite = test_loader.loadTestsFromTestCase(test)
+ unittest_suite.addTest(cur_suite)
+
+ doctest_suite = doctest.DocTestSuite(Wise)
+
+ big_suite = unittest.TestSuite((unittest_suite, doctest_suite))
+
+ return big_suite
+
+if __name__ == "__main__":
+ sys.exit(run_tests(sys.argv))
View
109 Tests/test_psw.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python2.3
+
+__version__ = "$Revision: 1.1 $"
+
+import doctest, unittest
+import random
+import sys
+
+import requires_wise
+
+from Bio.Wise import psw
+
+class TestPSW(unittest.TestCase):
+ def test_Alignment_normal(self):
+ a = psw.Alignment()
+
+ a.append(psw.ColumnUnit(0, 98, "SEQUENCE"))
+ a.append(psw.ColumnUnit(1, 200, "SEQUENCE"))
+ a.append(psw.ColumnUnit(0, 98, "INSERT"))
+ a.append(psw.ColumnUnit(1, 201, "SEQUENCE"))
+ a.append(psw.ColumnUnit(0, 98, "END"))
+ a.append(psw.ColumnUnit(1, 201, "END"))
+
+ self.assertEqual(str(a), "[SEQUENCE(98, 200), INSERT(98, 201), END(98, 201)]")
+
+ def test_Alignment_assertions(self):
+ a = psw.Alignment()
+
+ self.assertRaises(AssertionError, a.append, psw.ColumnUnit(1, 200, "SEQUENCE"))
+ a.append(psw.ColumnUnit(0, 98, "SEQUENCE"))
+ self.assertRaises(AssertionError, a.append, psw.ColumnUnit(0, 200, "SEQUENCE"))
+ a.append(psw.ColumnUnit(1, 200, "SEQUENCE"))
+ self.assertRaises(AssertionError, a.append, psw.ColumnUnit(1, 200, "SEQUENCE"))
+
+ def test_AlignmentColumn_kinds(self):
+ ac = psw.AlignmentColumn(psw.ColumnUnit(0, random.randint(0, 9999), "SEQUENCE"))
+ ac.append(psw.ColumnUnit(1, random.randint(0, 9999), "INSERT"))
+ self.assertEqual(ac.kind, "INSERT")
+
+ ac = psw.AlignmentColumn(psw.ColumnUnit(0, random.randint(0, 9999), "INSERT"))
+ ac.append(psw.ColumnUnit(1, random.randint(0, 9999), "SEQUENCE"))
+ self.assertEqual(ac.kind, "INSERT")
+
+ ac = psw.AlignmentColumn(psw.ColumnUnit(0, random.randint(0, 9999), "SEQUENCE"))
+ ac.append(psw.ColumnUnit(1, random.randint(0, 9999), "SEQUENCE"))
+ self.assertEqual(ac.kind, "SEQUENCE")
+
+ ac = psw.AlignmentColumn(psw.ColumnUnit(0, random.randint(0, 9999), "SEQUENCE"))
+ ac.append(psw.ColumnUnit(1, random.randint(0, 9999), "END"))
+ self.assertEqual(ac.kind, "END")
+
+ def test_AlignmentColumn_repr(self):
+ ac = psw.AlignmentColumn(psw.ColumnUnit(0, 34, "SEQUENCE"))
+ ac.append(psw.ColumnUnit(1, 55, "END"))
+ self.assertEqual(repr(ac), "END(34, 55)")
+
+ def test_AlignmentColumn_full(self):
+ ac = psw.AlignmentColumn(psw.ColumnUnit(0, random.randint(0, 9999), "SEQUENCE"))
+ ac.append(psw.ColumnUnit(1, random.randint(0, 9999), "END"))
+ self.assertRaises(psw.AlignmentColumnFullException, ac.append, psw.ColumnUnit(1, random.randint(0, 9999), "END"))
+
+ def test_AlignmentColumn_assertions(self):
+ self.assertRaises(AssertionError, psw.AlignmentColumn, psw.ColumnUnit(1, random.randint(0, 9999), "SEQUENCE"))
+
+ ac = psw.AlignmentColumn(psw.ColumnUnit(0, random.randint(0, 9999), "SEQUENCE"))
+ self.assertRaises(AssertionError, ac.append, psw.ColumnUnit(0, random.randint(0, 9999), "SEQUENCE"))
+
+
+ def test_ColumnUnit(self):
+ self.assertEqual(repr(psw.ColumnUnit(0, 33, "SEQUENCE")),
+ "ColumnUnit(unit=0, column=33, SEQUENCE)")
+
+ self.assertEqual(repr(psw.ColumnUnit(1, 33, "INSERT")),
+ "ColumnUnit(unit=1, column=33, INSERT)")
+
+ self.assertEqual(repr(psw.ColumnUnit(1, 33, "END")),
+ "ColumnUnit(unit=1, column=33, END)")
+
+ PARSED = "[SEQUENCE(39, 22), SEQUENCE(40, 23), SEQUENCE(41, 24), SEQUENCE(42, 25), SEQUENCE(43, 26), SEQUENCE(44, 27), END(0, 27)]"
+
+# def test_align(self):
+# self.assertEqual(repr(psw.align(("Wise/human_114_g01_exons.fna_01", "Wise/human_114_g02_exons.fna_01"), "introns.bla", 23, 5, quiet=True)), self.PARSED)
+
+def run_tests(argv):
+ test_suite = testing_suite()
+ runner = unittest.TextTestRunner(sys.stdout, verbosity = 2)
+ runner.run(test_suite)
+
+def testing_suite():
+ """Generate the suite of tests.
+ """
+ unittest_suite = unittest.TestSuite()
+
+ test_loader = unittest.TestLoader()
+ test_loader.testMethodPrefix = 'test_'
+ tests = [TestPSW]
+
+ for test in tests:
+ cur_suite = test_loader.loadTestsFromTestCase(test)
+ unittest_suite.addTest(cur_suite)
+
+ doctest_suite = doctest.DocTestSuite(psw)
+
+ big_suite = unittest.TestSuite((unittest_suite, doctest_suite))
+
+ return big_suite
+
+if __name__ == "__main__":
+ sys.exit(run_tests(sys.argv))
View
1  setup.py
@@ -317,6 +317,7 @@ def is_reportlab_installed():
'Bio.UniGene',
'Bio.writers',
'Bio.writers.SeqRecord',
+ 'Bio.Wise',
'Bio.WWW',
]
Please sign in to comment.
Something went wrong with that request. Please try again.