Updating tests for more modules

jaredgk · Aug 4, 2017 · 2884f74 · 2884f74
1 parent 8d37cb5
commit 2884f74
Show file tree

Hide file tree

Showing 2 changed files with 136 additions and 8 deletions.
diff --git a/andrew/andrew_tests.py b/andrew/andrew_tests.py
@@ -1,58 +1,180 @@
-import unittest, filecmp, sys, os
+import unittest
+import filecmp
+import sys
+import os
+import logging
+import random
+
+# Import scripts to test
 import vcftools
 import vcf_calc
-import logging
+import vcf_sampler
 
+# Used to compare two files, returns bool
 def compare_to_expected(test_output, expected_output):
     return filecmp.cmp(test_output, expected_output)
 
-class vcftools_tests (unittest.TestCase):
-
+# Run tests for the functions within the vcftools module
+class vcftools_module_tests (unittest.TestCase):
+    # Check vcftools input argument assignment function
     def test_vcf_argument_parser (self):
+        # Assign the input argument from the function
         input_arg = vcftools.assign_vcftools_input_arg('example/locus8.vcf.gz')
+        # Confirm the correct argument has been assigned
         self.assertEqual(input_arg, ['--gzvcf', 'example/locus8.vcf.gz'])
 
+    # Check vcftools log output creation
     def test_produce_vcftools_log (self):
+        # Create the log output using the function
         vcftools.produce_vcftools_log('Log Test:\n1\n2\n3\n', 'out', 'logTest')
+        # Confirm the correct log output has been created
         self.assertTrue(compare_to_expected('out.logTest.log', 'example/locus8.logTest.log'))
+        # Remove log output file
         self.addCleanup(os.remove, 'out.logTest.log')
 
+    # Check vcftools log for errors
     def test_check_vcftools_for_errors (self):
+        # Check the outcome of an output file without errors
         self.assertTrue(vcftools.check_vcftools_for_errors('Log Test:\n1\n2\n3\nRun Time'))
+        # Disable logging module for the following test
         logging.disable(logging.CRITICAL)
+        # Check the outcome of an output file with errors
         with self.assertRaises(Exception) as cm:
             vcftools.check_vcftools_for_errors('Log Test:\n1\n2\n3\nError: No Input')
         self.assertEqual(str(cm.exception), 'Error: No Input')
 
+# Run tests for the vcf calc function
+class vcf_calc_tests (unittest.TestCase):
+
+    # Check that Fst window function is operating correctly
     def test_Fst_window (self):
+        # Run the function with the following arguments
         vcf_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'windowed-weir-fst', '--pop-file', 'example/Paniscus.txt', '--pop-file', 'example/Troglodytes.txt', '--out', 'out'])
+
+        # Confirm that the output is what is expected
         self.assertTrue(compare_to_expected('out.windowed.weir.fst', 'example/locus8.windowed.weir.fst'))
+
+        # Remove the ouput and log files created by the function
         self.addCleanup(os.remove, 'out.windowed.weir.fst')
         self.addCleanup(os.remove, 'out.windowed.weir.fst.log')
 
+    # Check that the Tajima's D function is operating correctly
     def test_tajimasD (self):
+        # Run the function with the following arguments
         vcf_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'TajimaD', '--out', 'out'])
+
+        # Confirm that the output is what is expected
         self.assertTrue(compare_to_expected('out.Tajima.D', 'example/locus8.Tajima.D'))
+
+        # Remove the ouput and log files created by the function
         self.addCleanup(os.remove, 'out.Tajima.D')
         self.addCleanup(os.remove, 'out.Tajima.D.log')
 
+    # Check that the window nucleotide diversity function is operating correctly
     def test_window_pi (self):
+        # Run the function with the following arguments
         vcf_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'pi', '--out', 'out'])
+
+        # Confirm that the output is what is expected
         self.assertTrue(compare_to_expected('out.windowed.pi', 'example/locus8.windowed.pi'))
+
+        # Remove the ouput and log files created by the function
         self.addCleanup(os.remove, 'out.windowed.pi')
         self.addCleanup(os.remove, 'out.windowed.pi.log')
 
+    # Check that the allele frequency function is operating correctly
     def test_freq (self):
+        # Run the function with the following arguments
         vcf_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'freq', '--out', 'out'])
+
+        # Confirm that the output is what is expected
         self.assertTrue(compare_to_expected('out.frq', 'example/locus8.frq'))
+
+        # Remove the ouput and log files created by the function
         self.addCleanup(os.remove, 'out.frq')
         self.addCleanup(os.remove, 'out.frq.log')
 
+    # Check that the heterozygosity function is operating correctly
     def test_het (self):
+        # Run the function with the following arguments
         vcf_calc.run(['example/locus8.vcf.gz', '--calc-statistic', 'het', '--out', 'out'])
+
+        # Confirm that the output is what is expected
+        self.assertTrue(compare_to_expected('out.het', 'example/locus8.het'))
+
+        # Remove the ouput and log files created by the function
+        self.addCleanup(os.remove, 'out.het')
+        self.addCleanup(os.remove, 'out.het.log')
+
+# Run tests for the vcf sampler function
+class vcf_sampler_tests (unittest.TestCase):
+
+    # Confirm the random sampler is operating correctly
+    def test_random_sampler (self):
+        # Expected output (based on seed value)
+        expected_sample = [777, 669,  99, 352, 467,
+                           534, 978, 130, 671, 364,
+                           488, 203, 666, 227, 458,
+                            40, 974, 487, 461, 714,
+                           415, 888,  23, 833, 468]
+
+        # Assign random seed
+        random.seed(1000)
+
+        # Confirm that the output is what is expected
+        self.assertEqual(vcf_sampler.random_vcftools_sampler(range(0, 1000), 25), expected_sample)
+
+    # Confirm the uniform sampler is operating correctly
+    def test_uniform_sampler (self):
+        # Expected output (based on seed value)
+        expected_sample = [155, 133,  19,  70,  93,
+                           306, 395, 226, 334, 272,
+                           497, 440, 533, 445, 491,
+                           608, 794, 697, 692, 742,
+                           883, 977, 804, 966, 893]
+
+        # Assign random seed
+        random.seed(1000)
+
+        # Confirm that the output is what is expected
+        self.assertEqual(vcf_sampler.uniform_vcftools_sampler(range(0, 1000), 5, 25), expected_sample)
+
+    # Confirm column assignment is operating correctly
+    def test_column_assignment (self):
+        # Confirm that the output is what is expected (without errors)
+        self.assertEqual(vcf_sampler.assign_position_columns(['CHROM', 'NULL', 'BIN_START', 'BIN_END']), (0, 2, 3))
+        self.assertEqual(vcf_sampler.assign_position_columns(['CHROM', 'NULL', 'BIN_START', 'NULL']), (0, 2, None))
+
+        # Disable logging module for the following test
+        logging.disable(logging.CRITICAL)
+
+        # Confirm that the output is what is expected (with errors)
+        with self.assertRaises(ValueError) as cm:
+            vcf_sampler.assign_position_columns(['CHROM', 'NULL', 'NULL', 'BIN_END'])
+        self.assertEqual(str(cm.exception), 'Cannot find BIN_START column in file specified by --statistic-file.')
+        # Confirm that the output is what is expected (with errors)
+        with self.assertRaises(ValueError) as cm:
+            vcf_sampler.assign_position_columns(['NULL', 'NULL', 'BIN_START', 'BIN_END'])
+        self.assertEqual(str(cm.exception), 'Cannot find CHROM column in file specified by --statistic-file.')
+        # Confirm that the output is what is expected (with errors)
+        with self.assertRaises(ValueError) as cm:
+            vcf_sampler.assign_position_columns(['NULL', 'NULL', 'NULL', 'NULL'])
+        self.assertEqual(str(cm.exception), 'Cannot find CHROM and BIN_START columns in file specified by --statistic-file.')
+    # Need file that is big enough to test (but not too large)
+    '''
+    # Check that the heterozygosity function is operating correctly
+    def test_sampler (self):
+        # Run the function with the following arguments
+        vcf_sampler.run(['example/locus8.vcf.gz', '--calc-statistic', 'het', '--out', 'out'])
+
+        # Confirm that the output is what is expected
         self.assertTrue(compare_to_expected('out.het', 'example/locus8.het'))
+
+        # Remove the ouput and log files created by the function
         self.addCleanup(os.remove, 'out.het')
         self.addCleanup(os.remove, 'out.het.log')
+    '''
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/andrew/vcf_sampler.py b/andrew/vcf_sampler.py
@@ -109,21 +109,27 @@ def assign_position_columns (sample_headers):
             return sample_headers.index('CHROM'), sample_headers.index('BIN_START'), None
         else:
             if 'CHROM' in sample_headers:
-                sys.exit('Cannot find BIN_START in file')
+                logging.error('Cannot find BIN_START column in file specified by --statistic-file.')
+                raise ValueError('Cannot find BIN_START column in file specified by --statistic-file.')
+            elif 'BIN_START' in sample_headers:
+                logging.error('Cannot find CHROM column in file specified by --statistic-file.')
+                raise ValueError('Cannot find CHROM column in file specified by --statistic-file.')
             else:
-                sys.exit('Cannot find CHROM and BIN_START in file')
+                logging.error('Cannot find CHROM and BIN_START columns in file specified by --statistic-file.')
+                raise ValueError('Cannot find CHROM and BIN_START columns in file specified by --statistic-file.')
+
     return sample_headers.index('CHROM'), sample_headers.index('BIN_START'), sample_headers.index('BIN_END')
 
 def assign_statistic_column (sample_headers, statistic):
     statistic_converter = {'windowed-weir-fst':'MEAN_FST', 'TajimaD':'TajimaD'}
 
     if not statistic_converter.has_key(statistic):
         logging.critical('Statistic not found. Statistic list needs to be updated. Please contact the PPP Team.')
-        sys.exit('Statistic not found. Statistic list needs to be updated. Please contact the PPP Team.')
+        raise Exception('Statistic not found. Statistic list needs to be updated. Please contact the PPP Team.')
 
     if statistic_converter[statistic] not in sample_headers:
         logging.error('Statistic selected not found in file specified by --statistic-file.')
-        sys.exit('Statistic selected not found in file specified by --statistic-file.')
+        raise ValueError('Statistic selected not found in file specified by --statistic-file.')
 
     return statistic_converter[statistic]