Skip to content
This repository
Browse code

Conversion of test_Clustalw_tool.py to unittest

Peter: Based on Christian's branch, collapsing several commits into one
  • Loading branch information...
commit fbd59c6169b45460aa25e208c6a2d02d5f8c6e61 1 parent a42db54
Christian Brueffer authored November 20, 2012 peterjc committed November 20, 2012

Showing 1 changed file with 213 additions and 157 deletions. Show diff stats Hide diff stats

  1. 370  Tests/test_Clustalw_tool.py
370  Tests/test_Clustalw_tool.py
@@ -9,6 +9,7 @@
9 9
 
10 10
 import sys
11 11
 import os
  12
+import unittest
12 13
 from Bio import SeqIO
13 14
 from Bio import AlignIO
14 15
 from Bio.Align.Applications import ClustalwCommandline
@@ -56,7 +57,7 @@
56 57
                 break
57 58
 else:
58 59
     import commands
59  
-    #Note that clustalw 1.83 and clustalw 2.0.10 don't obey the --version
  60
+    #Note that clustalw 1.83 and clustalw 2.1 don't obey the --version
60 61
     #command, but this does cause them to quit cleanly.  Otherwise they prompt
61 62
     #the user for input (causing a lock up).
62 63
     output = commands.getoutput("clustalw2 --version")
@@ -75,167 +76,222 @@
75 76
     raise MissingExternalDependencyError(\
76 77
         "Install clustalw or clustalw2 if you want to use it from Biopython.")
77 78
 
78  
-#################################################################
  79
+class ClustalWTestCase(unittest.TestCase):
  80
+    """Class implementing common functions for ClustalW tests."""
79 81
 
80  
-print "Checking error conditions"
81  
-print "========================="
82  
-
83  
-print "Empty file"
84  
-input_file = "does_not_exist.fasta"
85  
-assert not os.path.isfile(input_file)
86  
-cline = ClustalwCommandline(clustalw_exe, infile=input_file)
87  
-try:
88  
-    stdout, stderr = cline()
89  
-    assert False, "Should have failed, returned:\n%s\n%s" % (stdout, stderr)
90  
-except ApplicationError, err:
91  
-    print "Failed (good)"
92  
-    #Python 2.3 on Windows gave (0, 'Error')
93  
-    #Python 2.5 on Windows gives [Errno 0] Error
94  
-    assert "Cannot open sequence file" in str(err) or \
95  
-           "Cannot open input file" in str(err) or \
96  
-           "non-zero exit status" in str(err), str(err)
97  
-
98  
-print
99  
-print "Single sequence"
100  
-input_file = "Fasta/f001"
101  
-assert os.path.isfile(input_file)
102  
-assert len(list(SeqIO.parse(input_file,"fasta")))==1
103  
-cline = ClustalwCommandline(clustalw_exe, infile=input_file)
104  
-try:
105  
-    stdout, stderr = cline()
106  
-    if "cannot do multiple alignment" in (stdout + stderr):
107  
-        #Zero return code is a possible bug in clustal?
108  
-        print "Failed (good)"
109  
-    else:
110  
-        assert False, "Should have failed, returned:\n%s\n%s" % (stdout, stderr)
111  
-except ApplicationError, err:
112  
-    print "Failed (good)"
113  
-    assert str(err) == "No records found in handle", str(err)
114  
-
115  
-print
116  
-print "Invalid sequence"
117  
-input_file = "Medline/pubmed_result1.txt"
118  
-assert os.path.isfile(input_file)
119  
-cline = ClustalwCommandline(clustalw_exe, infile=input_file)
120  
-try:
121  
-    stdout, stderr = cline()
122  
-    assert False, "Should have failed, returned:\n%s\n%s" % (stdout, stderr)
123  
-except ApplicationError, err:
124  
-    print "Failed (good)"
125  
-    #Ideally we'd catch the return code and raise the specific
126  
-    #error for "invalid format", rather than just notice there
127  
-    #is not output file.
128  
-    #Note:
129  
-    #Python 2.3 on Windows gave (0, 'Error')
130  
-    #Python 2.5 on Windows gives [Errno 0] Error
131  
-    assert "invalid format" in str(err) \
132  
-           or "not produced" in str(err) \
133  
-           or "No sequences in file" in str(err) \
134  
-           or "non-zero exit status " in str(err), str(err)
  82
+    def setUp(self):
  83
+        self.files_to_clean = set()
135 84
 
136  
-#################################################################
137  
-print
138  
-print "Checking normal situations"
139  
-print "=========================="
140  
-
141  
-#Create a temp fasta file with a space in the name
142  
-temp_filename_with_spaces = "Clustalw/temp horses.fasta"
143  
-handle = open(temp_filename_with_spaces, "w")
144  
-SeqIO.write(SeqIO.parse("Phylip/hennigian.phy","phylip"),handle, "fasta")
145  
-handle.close()
146  
-
147  
-#Create a large input file by converting another example file
148  
-#(See Bug 2804, this will produce so much output on stdout that
149  
-#subprocess could suffer a deadlock and hang).  Using all the
150  
-#records should show the deadlock but is very slow - just thirty
151  
-#seems to lockup on Mac OS X, even 20 on Linux (without the fix).
152  
-temp_large_fasta_file = "temp_cw_prot.fasta"
153  
-handle = open(temp_large_fasta_file, "w")
154  
-records = list(SeqIO.parse("NBRF/Cw_prot.pir", "pir"))[:40]
155  
-SeqIO.write(records, handle, "fasta")
156  
-handle.close()
157  
-del handle, records
158  
-
159  
-for input_file, output_file, statistics_file, newtree_file in [
160  
-    ("Fasta/f002", "temp_test.aln", "temp_stats.txt", None),
161  
-    ("GFF/multi.fna", "temp with space.aln", "temp_stats.txt", None),
162  
-    ("Registry/seqs.fasta", "temp_test.aln", "temp_stats.txt", None),
163  
-    ("Registry/seqs.fasta", "temp_test.aln", "temp stats with space.txt", "temp_test.dnd"),
164  
-    ("Registry/seqs.fasta", "temp_test.aln", "temp_stats.txt", "temp with space.dnd"),
165  
-    (temp_filename_with_spaces, "temp_test.aln", "temp_stats.txt", None),
166  
-    (temp_filename_with_spaces, "temp with space.aln", "temp_stats", None),
167  
-    (temp_large_fasta_file, "temp_cw_prot.aln", "temp_stats.txt", None),
168  
-    ]:
169  
-    #Note that ClustalW will map ":" to "_" in it's output file
170  
-    input_records = SeqIO.to_dict(SeqIO.parse(input_file,"fasta"),
171  
-                                  lambda rec : rec.id.replace(":","_"))
172  
-    if os.path.isfile(output_file):
173  
-        os.remove(output_file)
174  
-    print "Calling clustalw on %s (with %i records)" \
175  
-          % (repr(input_file), len(input_records))
176  
-    print "using output file %s" % repr(output_file)
177  
-    if newtree_file is not None:
178  
-        print "requesting output guide tree file %s" % repr(newtree_file)
179  
-
180  
-    #Any filenames with spaces should get escaped with quotes automatically.
181  
-    #Using keyword arguments here.
182  
-    if clustalw_exe == "clustalw2":
183  
-        # By using the stats keyword, we require ClustalW 2.0.10 or higher.
  85
+    def tearDown(self):
  86
+        for filename in self.files_to_clean:
  87
+            if os.path.isfile(filename):
  88
+                os.remove(filename)
  89
+
  90
+    def standard_test_procedure(self, cline):
  91
+        """Standard testing procedure used by all tests."""
  92
+        self.assertTrue(str(eval(repr(cline))) == str(cline))
  93
+        input_records = SeqIO.to_dict(SeqIO.parse(cline.infile, "fasta"),
  94
+                                      lambda rec : rec.id.replace(":", "_"))
  95
+
  96
+        #Determine name of tree file
  97
+        if cline.newtree:
  98
+            tree_file = cline.newtree
  99
+        else:
  100
+            #Clustalw will name it based on the input file
  101
+            tree_file = os.path.splitext(cline.infile)[0] + ".dnd"
  102
+
  103
+        # Mark generated files for later removal
  104
+        self.add_file_to_clean(cline.outfile)
  105
+        self.add_file_to_clean(tree_file)
  106
+
  107
+        output, error = cline()
  108
+        self.assertTrue(output.strip().startswith("CLUSTAL"))
  109
+        self.assertTrue(error.strip() == "")
  110
+
  111
+        #Check the output...
  112
+        align = AlignIO.read(cline.outfile, "clustal")
  113
+        #The length of the alignment will depend on the version of clustalw
  114
+        #(clustalw 2.1 and clustalw 1.83 are certainly different).
  115
+        output_records = SeqIO.to_dict(SeqIO.parse(cline.outfile,"clustal"))
  116
+        self.assertTrue(set(input_records.keys()) == set(output_records.keys()))
  117
+        for record in align:
  118
+            self.assertTrue(str(record.seq) == str(output_records[record.id].seq))
  119
+            self.assertTrue(str(record.seq).replace("-", "") == \
  120
+                   str(input_records[record.id].seq))
  121
+
  122
+        #Check the DND file was created.
  123
+        #TODO - Try and parse this with Bio.Nexus?
  124
+        self.assertTrue(os.path.isfile(tree_file))
  125
+
  126
+    def add_file_to_clean(self, filename):
  127
+        """Adds a file for deferred removal by the tearDown routine."""
  128
+        self.files_to_clean.add(filename)
  129
+
  130
+
  131
+class ClustalWTestErrorConditions(ClustalWTestCase):
  132
+    """Test general error conditions."""
  133
+
  134
+    def test_empty_file(self):
  135
+        """Test a non-existing input file."""
  136
+        input_file = "does_not_exist.fasta"
  137
+        self.assertFalse(os.path.isfile(input_file))
  138
+        cline = ClustalwCommandline(clustalw_exe, infile=input_file)
  139
+
  140
+        try:
  141
+            stdout, stderr = cline()
  142
+        except ApplicationError, err:
  143
+            self.assertTrue("Cannot open sequence file" in str(err) or \
  144
+                            "Cannot open input file" in str(err) or \
  145
+                            "non-zero exit status" in str(err))
  146
+        else:
  147
+            self.fail("expected an ApplicationError")
  148
+
  149
+    def test_single_sequence(self):
  150
+        """Test an input file containing a single sequence."""
  151
+        input_file = "Fasta/f001"
  152
+        self.assertTrue(os.path.isfile(input_file))
  153
+        self.assertTrue(len(list(SeqIO.parse(input_file, "fasta"))) == 1)
  154
+        cline = ClustalwCommandline(clustalw_exe, infile=input_file)
  155
+
  156
+        try:
  157
+            stdout, stderr = cline()
  158
+
  159
+            #Zero return code is a possible bug in clustal?
  160
+            self.add_file_to_clean(input_file + ".aln")
  161
+            self.assertTrue("cannot do multiple alignment" in (stdout + stderr))
  162
+        except ApplicationError, err:
  163
+            self.assertTrue(str(err) == "No records found in handle")
  164
+
  165
+    def test_invalid_sequence(self):
  166
+        """Test an input file containing an invalid sequence."""
  167
+        input_file = "Medline/pubmed_result1.txt"
  168
+        self.assertTrue(os.path.isfile(input_file))
  169
+        cline = ClustalwCommandline(clustalw_exe, infile=input_file)
  170
+
  171
+        try:
  172
+            stdout, stderr = cline()
  173
+        except ApplicationError, err:
  174
+            #Ideally we'd catch the return code and raise the specific
  175
+            #error for "invalid format", rather than just notice there
  176
+            #is not output file.
  177
+            #Note:
  178
+            #Python 2.3 on Windows gave (0, 'Error')
  179
+            #Python 2.5 on Windows gives [Errno 0] Error
  180
+            self.assertTrue("invalid format" in str(err) or \
  181
+                            "not produced" in str(err) or \
  182
+                            "No sequences in file" in str(err) or\
  183
+                            "non-zero exit status " in str(err))
  184
+        else:
  185
+            self.fail("expected an ApplicationError")
  186
+
  187
+
  188
+class ClustalWTestNormalConditions(ClustalWTestCase):
  189
+    """Tests for normal conditions."""
  190
+
  191
+    def test_properties(self):
  192
+        """Test passing options via properties."""
  193
+        cline = ClustalwCommandline(clustalw_exe)
  194
+        cline.infile = "Fasta/f002"
  195
+        cline.outfile = "temp_test.aln"
  196
+        cline.align = True
  197
+
  198
+        self.standard_test_procedure(cline)
  199
+
  200
+    def test_simple_fasta(self):
  201
+        """Test a simple fasta input file."""
  202
+        input_file = "Fasta/f002"
  203
+        output_file = "temp_test.aln"
  204
+        cline = ClustalwCommandline(clustalw_exe,
  205
+                                    infile=input_file,
  206
+                                    outfile=output_file)
  207
+
  208
+        self.standard_test_procedure(cline)
  209
+
  210
+    def test_newtree(self):
  211
+        """Test newtree files."""
  212
+        input_file = "Registry/seqs.fasta"
  213
+        output_file = "temp_test.aln"
  214
+        newtree_file = "temp_test.dnd"
184 215
         cline = ClustalwCommandline(clustalw_exe,
185 216
                                     infile=input_file,
186 217
                                     outfile=output_file,
187  
-                                    stats=statistics_file)
188  
-    else:
  218
+                                    newtree=newtree_file,
  219
+                                    align=True)
  220
+
  221
+        self.standard_test_procedure(cline)
  222
+        cline.newtree = "temp with space.dnd"
  223
+        self.standard_test_procedure(cline)
  224
+
  225
+    def test_large_input_file(self):
  226
+        """Test a large input file."""
  227
+
  228
+        #Create a large input file by converting another example file
  229
+        #(See Bug 2804, this will produce so much output on stdout that
  230
+        #subprocess could suffer a deadlock and hang).  Using all the
  231
+        #records should show the deadlock but is very slow - just thirty
  232
+        #seems to lockup on Mac OS X, even 20 on Linux (without the fix).
  233
+        input_file = "temp_cw_prot.fasta"
  234
+        handle = open(input_file, "w")
  235
+        records = list(SeqIO.parse("NBRF/Cw_prot.pir", "pir"))[:40]
  236
+        SeqIO.write(records, handle, "fasta")
  237
+        handle.close()
  238
+        del handle, records
  239
+        output_file = "temp_cw_prot.aln"
  240
+
189 241
         cline = ClustalwCommandline(clustalw_exe,
190 242
                                     infile=input_file,
191 243
                                     outfile=output_file)
192  
-    assert str(eval(repr(cline)))==str(cline)
193  
-    if newtree_file is not None:
194  
-        #Test using a property:
195  
-        cline.newtree = newtree_file
196  
-        #I don't just want the tree, also want the alignment:
197  
-        cline.align = True
198  
-        assert str(eval(repr(cline)))==str(cline)
199  
-    output, error = cline()
200  
-    assert output.strip().startswith("CLUSTAL")
201  
-    assert error.strip() == ""
202  
-    #Check the output...
203  
-    align = AlignIO.read(output_file, "clustal")
204  
-    #The length of the alignment will depend on the version of clustalw
205  
-    #(clustalw 2.0.10 and clustalw 1.83 are certainly different).
206  
-    print "Got an alignment, %i sequences" % (len(align))
207  
-    if clustalw_exe == "clustalw2":
208  
-        assert os.path.isfile(statistics_file)
209  
-        os.remove(statistics_file)
210  
-    output_records = SeqIO.to_dict(SeqIO.parse(output_file,"clustal"))
211  
-    assert set(input_records.keys()) == set(output_records.keys())
212  
-    for record in align:
213  
-        assert str(record.seq) == str(output_records[record.id].seq)
214  
-        assert str(record.seq).replace("-","") == \
215  
-               str(input_records[record.id].seq)
216  
-
217  
-    #Clean up...
218  
-    os.remove(output_file)
219  
-
220  
-    #Check the DND file was created.
221  
-    #TODO - Try and parse this with Bio.Nexus?
222  
-    if newtree_file is not None:
223  
-        tree_file = newtree_file
224  
-    else:
225  
-        #Clustalw will name it based on the input file
226  
-        tree_file = os.path.splitext(input_file)[0] + ".dnd"
227  
-    assert os.path.isfile(tree_file), \
228  
-           "Did not find tree file %s" % tree_file
229  
-    os.remove(tree_file)
230  
-
231  
-#Clean up any stray temp files..
232  
-if os.path.isfile("Fasta/f001.aln"):
233  
-    os.remove("Fasta/f001.aln")
234  
-if os.path.isfile("Medline/pubmed_result1.aln"):
235  
-    os.remove("Medline/pubmed_result1.aln")
236  
-if os.path.isfile(temp_filename_with_spaces):
237  
-    os.remove(temp_filename_with_spaces)
238  
-if os.path.isfile(temp_large_fasta_file):
239  
-    os.remove(temp_large_fasta_file)
240  
-
241  
-print "Done"
  244
+
  245
+        self.add_file_to_clean(input_file)
  246
+        self.standard_test_procedure(cline)
  247
+
  248
+    def test_input_filename_with_space(self):
  249
+        """Test an input filename containing a space."""
  250
+        input_file = "Clustalw/temp horses.fasta"
  251
+        handle = open(input_file, "w")
  252
+        SeqIO.write(SeqIO.parse("Phylip/hennigian.phy", "phylip"), handle, "fasta")
  253
+        handle.close()
  254
+        output_file = "temp with space.aln"
  255
+
  256
+        cline = ClustalwCommandline(clustalw_exe,
  257
+                                    infile=input_file,
  258
+                                    outfile=output_file)
  259
+
  260
+        self.add_file_to_clean(input_file)
  261
+        self.standard_test_procedure(cline)
  262
+
  263
+    def test_output_filename_with_spaces(self):
  264
+        """Test an output filename containing spaces."""
  265
+        input_file = "GFF/multi.fna"
  266
+        output_file = "temp with space.aln"
  267
+        cline = ClustalwCommandline(clustalw_exe,
  268
+                                    infile=input_file,
  269
+                                    outfile=output_file)
  270
+
  271
+        self.standard_test_procedure(cline)
  272
+
  273
+
  274
+class ClustalWTestVersionTwoSpecific(ClustalWTestCase):
  275
+    """Tests specific to ClustalW2."""
  276
+
  277
+    def test_statistics(self):
  278
+        """Test a statistics file."""
  279
+        if clustalw_exe == "clustalw2":
  280
+            input_file = "Fasta/f002"
  281
+            output_file = "temp_test.aln"
  282
+            statistics_file = "temp_stats.txt"
  283
+            cline = ClustalwCommandline(clustalw_exe,
  284
+                                        infile=input_file,
  285
+                                        outfile=output_file,
  286
+                                        stats=statistics_file)
  287
+
  288
+            self.add_file_to_clean(statistics_file)
  289
+            self.standard_test_procedure(cline)
  290
+            self.assertTrue(os.path.isfile(statistics_file))
  291
+        else:
  292
+            print "Skipping ClustalW2 specific test."
  293
+
  294
+
  295
+if __name__ == "__main__":
  296
+    runner = unittest.TextTestRunner(verbosity = 2)
  297
+    unittest.main(testRunner=runner)

0 notes on commit fbd59c6

Please sign in to comment.
Something went wrong with that request. Please try again.