Skip to content

Commit 787a01a

Browse files
author
Johannes Groos
committed
Set default Mappingquality filter to 30, bowtie2 option to fast. reworked tests for new defaults
1 parent 8f32d1b commit 787a01a

13 files changed

+3560
-16011
lines changed

vxdetector/VXdetector.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,11 @@
44
import glob
55
import os
66
import sys
7-
import time
8-
9-
# SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
10-
# sys.path.append(os.path.dirname(SCRIPT_DIR))
11-
# sys.path.append('$CONDA/lib/python3.9/site-packages')
12-
13-
import pandas as pd # noqa: E402
14-
from vxdetector.interact_bowtie2 import mapbowtie2, buildbowtie2 # noqa: E402
15-
from vxdetector.interact_bedtools import overlap # noqa: E402
16-
import vxdetector.Output_counter as Output_counter # noqa: E402
17-
import vxdetector.files_manager as files_manager # noqa: E402
7+
import pandas as pd
8+
import vxdetector.Output_counter as Output_counter
9+
import vxdetector.files_manager as files_manager
10+
from vxdetector.interact_bowtie2 import mapbowtie2, buildbowtie2
11+
from vxdetector.interact_bedtools import overlap
1812

1913

2014
def do_statistic(result):
@@ -252,8 +246,4 @@ def main():
252246

253247

254248
if __name__ == '__main__':
255-
start_time = time.time()
256249
main()
257-
print("--- %s seconds ---" % (time.time() - start_time))
258-
with open('/homes/jgroos/Desktop/Output/time.txt', 'a') as f:
259-
f.write("\n --- %s seconds ---" % (time.time() - start_time))

vxdetector/tests/test_Output_counter.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,7 @@
22

33
import unittest
44
import os
5-
import sys
6-
7-
import_path = f'{__file__.rsplit("/", 2)[0]}/'
8-
sys.path.append(import_path)
9-
10-
import Output_counter as oc # noqa: E402
5+
import vxdetector.Output_counter as oc
116

127

138
data_path = f'{os.path.dirname(__file__)}/test_data/'
@@ -79,7 +74,7 @@ def test_no_aligned(self):
7974
row = {'Number of Reads': 64421,
8075
'Unaligned Reads [%]': 100,
8176
'Not properly paired': 'not paired',
82-
'Sequenced variable region': 'No variable Region',
77+
'Sequenced variable region': 'Not 16S',
8378
'V1': 0.0, 'V2': 0.0,
8479
'V3': 0.0, 'V4': 0.0, 'V5': 0.0,
8580
'V6': 0.0, 'V7': 0.0, 'V8': 0.0, 'V9': 0.0,
@@ -106,13 +101,15 @@ def test_return_value(self):
106101
new_row = oc.create_row(test_data, paired=False)
107102
self.assertEqual(new_row, row)
108103
test_data = f'{data_path}paired/'
109-
row = {'Not aligned to a variable region': 0.014914649681528664,
110-
'Not properly paired': 'not paired', 'Number of Reads': 64421,
111-
'Sequenced variable region': 'V45',
112-
'Unaligned Reads [%]': 12.189999999999998, 'V1': 0.0,
113-
'V2': 0.0, 'V3': 0.0, 'V4': 48.9871668789809,
114-
'V5': 38.800461146496815, 'V6': 0.007457324840764332,
115-
'V7': 0.0, 'V8': 0.0, 'V9': 0.0}
104+
row = {'Number of Reads': 64421,
105+
'Unaligned Reads [%]': 12.189999999999998,
106+
'Not properly paired': 'not paired',
107+
'Sequenced variable region': 'V45', 'V1': 0.0,
108+
'V2': 0.0, 'V3': 0.0, 'V4': 45.375854271356786,
109+
'V5': 42.3973743718593, 'V6': 0.0122571189279732,
110+
'V7': 0.0, 'V8': 0.0, 'V9': 0.0,
111+
'Not aligned to a variable region': 0.0245142378559464}
112+
116113
new_row = oc.create_row(test_data, paired=False)
117114
self.assertEqual(new_row, row)
118115

vxdetector/tests/test_VXdetector.py

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,22 @@
66
import os
77
import sys
88
from glob import glob
9-
10-
11-
sys.path.append('$CONDA/lib/python3.9/site-packages')
12-
13-
import pandas as pd # noqa: E402
14-
15-
sys.path.append(f'{__file__.rsplit("/", 2)[0]}/')
16-
17-
import VXdetector as vx # noqa: E402
18-
import shutil # noqa: E402
9+
import pandas as pd
10+
import vxdetector.VXdetector as vx
11+
import shutil
1912

2013
path = f'{os.path.dirname(__file__)}/'
2114
output_test = f'{path}test_data/Output_test.csv'
2215
result = {'5011_S225_L001': {'Number of Reads': 64421,
2316
'Unaligned Reads [%]': 12.189999999999998,
24-
'Not properly paired': 0.005588239859673088,
17+
'Not properly paired': 0.004843141211716676,
2518
'Sequenced variable region': 'V45', 'V1': 0.0,
26-
'V2': 0.0, 'V3': 0.0, 'V4': 48.9871668789809,
27-
'V5': 38.800461146496815,
28-
'V6': 0.007457324840764332,
19+
'V2': 0.0, 'V3': 0.0, 'V4': 45.375854271356786,
20+
'V5': 42.3973743718593,
21+
'V6': 0.0122571189279732,
2922
'V7': 0.0, 'V8': 0.0, 'V9': 0.0,
3023
'Not aligned to a variable region':
31-
0.014914649681528664}}
24+
0.0245142378559464}}
3225

3326

3427
class test_do_output(unittest.TestCase):
@@ -53,19 +46,20 @@ def test_csv_output(self):
5346
self.assertEqual(output, content)
5447

5548
def test_numeric_conversion(self):
56-
mixed_result = {'5011_S225_L001': {'Number of Reads': '64421',
49+
mixed_result = {'5011_S225_L001': {'Not aligned to a variable region':
50+
0.0245142378559464,
51+
'Number of Reads': '64421',
5752
'Unaligned Reads [%]':
5853
'12.189999999999998',
54+
'V7': 0.0, 'V8': 0.0, 'V9': 0.0,
5955
'Not properly paired':
60-
0.005588239859673088,
56+
0.004843141211716676,
6157
'Sequenced variable region':
6258
'V45', 'V1': 0.0, 'V2': 0.0,
63-
'V3': 0.0, 'V4': '48.9871668789809',
64-
'V5': 38.800461146496815,
65-
'V6': 0.007457324840764332,
66-
'V7': 0.0, 'V8': 0.0, 'V9': 0.0,
67-
'Not aligned to a variable region':
68-
0.014914649681528664}}
59+
'V3': 0.0, 'V4': 45.375854271356786,
60+
'V5': '42.3973743718593',
61+
'V6': 0.0122571189279732,
62+
}}
6963
single_file = True
7064
new_file = f'{self.fp_tmpdir}test3.csv'
7165
vx.do_output(mixed_result, new_file, single_file)
@@ -84,9 +78,9 @@ def test_output_options(self):
8478
'paired,Sequenced variable region,V1,V2,V3,V4,V5,V6,'
8579
'V7,V8,V9,Not aligned to a variable region\n'
8680
'5011_S225_L001,64421,12.189999999999998,'
87-
'0.005588239859673088,V45,0.0,0.0,0.0,48.9871668789809,'
88-
'38.800461146496815,0.007457324840764332,0.0,0.0,0.0'
89-
',0.014914649681528664\n')
81+
'0.004843141211716676,V45,0.0,0.0,0.0,45.375854271356786,'
82+
'42.3973743718593,0.0122571189279732,0.0,0.0,0.0'
83+
',0.0245142378559464\n')
9084
capturedOutput = io.StringIO()
9185
sys.stdout = capturedOutput
9286
new_file = sys.stdout
@@ -100,6 +94,12 @@ def test_output_options(self):
10094

10195

10296
class test_do_statistic(unittest.TestCase):
97+
def setUp(self):
98+
self.fp_tmpdir = tempfile.mkdtemp()
99+
100+
def tearDown(self):
101+
shutil.rmtree(self.fp_tmpdir)
102+
103103
def test_order(self):
104104
result = pd.read_csv(f'{path}test_data/mixed.csv', index_col=0)
105105
columns = ''.join(['Number of Reads',
@@ -120,7 +120,7 @@ def test_basic_function(self):
120120
result = pd.read_csv(f'{path}test_data/result_unpaired.csv',
121121
index_col=0)
122122
statistic = vx.do_statistic(result).round(5)
123-
statistic.to_csv(f'{path}test_data/statistic1.csv')
123+
statistic.to_csv(f'{self.fp_tmpdir}/statistic1.csv')
124124
self.assertTrue(statistic.equals(expected))
125125

126126

@@ -133,9 +133,14 @@ def tearDown(self):
133133
'Output/test_data.csv'):
134134
os.remove(f'{__file__.rsplit("/", 3)[0]}/Output/test_data.csv')
135135
file_list = glob(f'{__file__.rsplit("/", 3)[0]}/Indexed_bt2/*.bt2')
136+
directory_list = glob(f'{__file__.rsplit("/", 3)[0]}/tmp_files_*')
137+
if os.path.exists(f'{path}test_data/dir_test_actual.csv'):
138+
file_list.append(f'{path}test_data/dir_test_actual.csv')
136139
for file in file_list:
137140
os.remove(file)
138141
shutil.rmtree(self.fp_tmpdir)
142+
for directory in directory_list:
143+
shutil.rmtree(directory)
139144

140145
def test_singleFile(self):
141146
expected = f'{path}test_data/Output_test.csv'
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
,Number of Reads,Unaligned Reads [%],Not properly paired,Sequenced variable region,V1,V2,V3,V4,V5,V6,V7,V8,V9,Not aligned to a variable region
2-
5011_S225_L001,64421,12.189999999999998,0.005588239859673088,V45,0.0,0.0,0.0,48.9871668789809,38.800461146496815,0.007457324840764332,0.0,0.0,0.0,0.014914649681528664
2+
5011_S225_L001,64421,12.189999999999998,0.004843141211716676,V45,0.0,0.0,0.0,45.375854271356786,42.3973743718593,0.0122571189279732,0.0,0.0,0.0,0.0245142378559464
Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
,Number of Reads,Unaligned Reads [%],Not properly paired,Sequenced variable region,V1,V2,V3,V4,V5,V6,V7,V8,V9,Not aligned to a variable region
2-
Average,43829.8,11.374,0.004813250141643036,V45,0.0,0.0,0.0,62.51489118968541,25.916430839388727,0.0,0.0,0.0,0.0,0.19467797092586253
3-
Standard deviation,7958.908825963519,2.140789573965645,0.0010836849387105916,,0.0,0.0,0.0,16.35871651979062,14.321883965171105,0.0,0.0,0.0,0.0,0.15121699956111073
4-
5001_S229_L001,48981.0,8.64,0.0048181948102325395,V4,0.0,0.0,0.0,76.6020787818603,14.697437934458788,0.0,0.0,0.0,0.0,0.06048328368090036
5-
5002_S245_L001,52063.0,9.659999999999997,0.00526285461844304,V4,0.0,0.0,0.0,78.8072340425532,11.258176291793314,0.0,0.0,0.0,0.0,0.27458966565349546
6-
5004_S20_L001,31352.0,11.920000000000002,0.006379178361826996,V45,0.0,0.0,0.0,66.46267564966314,21.53255052935515,0.0,0.0,0.0,0.0,0.08477382098171318
7-
5005_S265_L001,42162.0,13.260000000000005,0.0037711683506475024,V45,0.0,0.0,0.0,44.40946455889852,42.19783783783784,0.0,0.0,0.0,0.0,0.132697603263641
8-
5006_S277_L001,44591.0,13.39,0.0038348545670651027,V45,0.0,0.0,0.0,46.2930029154519,39.896151603498545,0.0,0.0,0.0,0.0,0.4208454810495627
2+
Average,31314.428571428572,8.124285714285715,not paired,V45,0.0,0.0,0.0,39.54846684877456,23.396177364786663,0.0,0.0,0.0,0.0,0.35964150072449014
3+
Standard deviation,22340.102408427934,5.818693602355708,,,0.0,0.0,0.0,29.026142068327204,18.3249976982372,0.0,0.0,0.0,0.0,0.5215716865831309
4+
5001_S229_L001,48981.0,8.64,0.0038790551438312817,V4,0.0,0.0,0.0,75.65584204413473,15.598048780487806,0.0,0.0,0.0,0.0,0.10610917537746806
5+
5002_S245_L001,52063.0,9.659999999999997,0.00447534717553733,V45,0.0,0.0,0.0,56.54221176470588,32.30983529411765,0.0,0.0,0.0,0.0,1.4879529411764707
6+
5004_S20_L001,31352.0,11.920000000000002,0.004880071446797652,V45,0.0,0.0,0.0,56.73990697674419,31.13525581395349,0.0,0.0,0.0,0.0,0.20483720930232557
7+
5005_S265_L001,42162.0,13.260000000000005,0.0020160333950002374,V45,0.0,0.0,0.0,43.72145867098865,42.73737439222042,0.0,0.0,0.0,0.0,0.2811669367909238
8+
5006_S277_L001,44591.0,13.39,0.0027359781121751026,V45,0.0,0.0,0.0,44.17984848484848,41.99272727272727,0.0,0.0,0.0,0.0,0.43742424242424244
9+
no_qual_paired,26.0,0.0,0.0,Not 16S,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10+
no_qual_test,26.0,0.0,not paired,Not 16S,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0

vxdetector/tests/test_data/dir_test_actual.csv

Lines changed: 0 additions & 8 deletions
This file was deleted.

0 commit comments

Comments
 (0)