jlab · sjanssen2 · Jul 6, 2022 · Jul 5, 2022 · Jul 5, 2022 · Jul 5, 2022
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,2 @@
+[run]
+relative_files = True
diff --git a/.github/workflows/github_tests.yml b/.github/workflows/github_tests.yml
@@ -9,7 +9,7 @@ on:
 jobs:
   build-linux:
     runs-on: ubuntu-latest
-    
+
     steps:
     - name: Checkout Repo
       uses: actions/checkout@v2
@@ -23,8 +23,13 @@ jobs:
       run: |
         $CONDA/bin/flake8 vxdetector/
     - name: run python tests
-      run: nosetests vxdetector --with-coverage --with-doctest
-    - name: Coveralls
+      run: |
+        $CONDA/bin/nosetests vxdetector --with-doctest --with-coverage
+    - name: convert coverage
+      run: |
+        $CONDA/bin/coverage lcov
+    - name: send coverage report
       uses: coverallsapp/github-action@master
       with:
         github-token: ${{ secrets.GITHUB_TOKEN }}
+        path-to-lcov: "coverage.lcov"
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+.coverage
+coverage.lcov
diff --git a/environment.yml b/environment.yml
@@ -6,4 +6,4 @@ channels:
 dependencies:
   - flake8
   - nose
-  - coverage
+  - coverage >= 6  # to ensure lcov option is available
diff --git a/vxdetector/Output_counter.py b/vxdetector/Output_counter.py
@@ -18,24 +18,28 @@ def directory_navi(file_name, path, dir_name, dir_path):
     return file_name, dir_name, dir_path
 
 
-
-def create_output(path, file_name, unaligned_count, most_probable_V, probability, dir_name, dir_path):
-    file_name, dir_name, dir_path = directory_navi(file_name, path, dir_name, dir_path)
+def create_output(path, file_name, unaligned_count, most_probable_V,
+                  probability, dir_name, dir_path):
+    file_name, dir_name, dir_path = directory_navi(file_name, path,
+                                                   dir_name, dir_path)
     unaligned_count = unaligned_count.split(' ')[1]
     unaligned_count = float(unaligned_count.strip('()%'))
-    new_file = dir_path + dir_name +'.csv'
+    new_file = dir_path + dir_name + '.csv'
     if os.path.exists(new_file):
         header = True
     else:
         header = False
     with open(new_file, 'a', newline='') as o:
-        fieldnames = ['Read-file', 'Unaligned Reads [%]', 'Sequenced variable region', 'Probability [%]']
+        fieldnames = ['Read-file', 'Unaligned Reads [%]',
+                      'Sequenced variable region', 'Probability [%]']
         writer = csv.DictWriter(o, fieldnames=fieldnames)
-        if header == False:
+        if header is False:
             writer.writeheader()
-        writer.writerow({'Read-file' : file_name, 'Unaligned Reads [%]' : unaligned_count, 'Sequenced variable region' : most_probable_V, 'Probability [%]' : probability})
-
-
+        writer.writerow({
+            'Read-file': file_name,
+            'Unaligned Reads [%]': unaligned_count,
+            'Sequenced variable region': most_probable_V,
+            'Probability [%]': probability})
 
 
 def count(count, temp_path, file_name, file_type, path, dir_name, dir_path):
@@ -49,22 +53,23 @@ def count(count, temp_path, file_name, file_type, path, dir_name, dir_path):
                 dictionary[line_list[-1]] = 1
             else:
                 dictionary[line_list[-1]] += 1
-                #counts all appearing variable Regions
-    dictionary = sorted(dictionary.items(), key=lambda x:x[1], reverse=True)	#sorts the Dictionary with all variable regions for extracting the most probable one
+                # counts all appearing variable Regions
+    # sorts the Dictionary with all variable regions for extracting the most
+    # probable one
+    dictionary = sorted(dictionary.items(), key=lambda x: x[1], reverse=True)
     most_probable_V = dictionary[0]
     most_probable_V_count = most_probable_V[1]
     most_probable_V = most_probable_V[0].strip('\n')
     probability = round((most_probable_V_count/count)*100, 2)
     with open(Log_path, 'r') as log:
         lines = log.readlines()
         unaligned_count = lines[2].strip('\n\t ')
-    if file_type == None:
-        print( file_name + ': ')
+    if file_type is None:
+        print(file_name + ': ')
         print(unaligned_count)
-        print('The sequenced variable Region in this file is ' + most_probable_V + ' with a probability of ' + str(probability) + '%.')
+        print('The sequenced variable Region in this file is '
+              + most_probable_V + ' with a probability of '
+              + str(probability) + '%.')
     else:
-        create_output(path, file_name, unaligned_count, most_probable_V, probability, dir_name, dir_path)
-
-
-
-
+        create_output(path, file_name, unaligned_count, most_probable_V,
+                      probability, dir_name, dir_path)
diff --git a/vxdetector/VXdetector.py b/vxdetector/VXdetector.py
@@ -8,29 +8,47 @@
 import Output_counter
 import files_manager
 
-def workflow(path, temp_path, file_path, file_type, file_name, dir_name, dir_path):
+
+def workflow(path, temp_path, file_path, file_type, file_name, dir_name,
+             dir_path):
     interact_bowtie2.buildbowtie2(path)
-    if file_type != None:        
-        interact_bowtie2.mapbowtie2(file_path, path, temp_path, file_type)	#The Programm bowtie2 is used to align the Reads to a reference 16S database.
+    if file_type is not None:
+        # The Programm bowtie2 is used to align the Reads to a reference
+        # 16S database.
+        interact_bowtie2.mapbowtie2(file_path, path, temp_path, file_type)
     else:
-        interact_bowtie2.mapbowtie2(file_path, path, temp_path, file_type=' -q')
-    count = interact_samtools.SAMtoBAM(path, temp_path)	#convert the .sam file from bowtie2 to .bam for compability
-    interact_bedtools.overlap(path, temp_path)	#look which reads intersect with which variable Region
-    Output_counter.count(count, temp_path, file_name, file_type, path, dir_name, dir_path)	#counts the Variable Regions that are found with bedtools and prints the highest probable variable Region
+        interact_bowtie2.mapbowtie2(file_path, path, temp_path,
+                                    file_type=' -q')
+    # convert the .sam file from bowtie2 to .bam for compability
+    count = interact_samtools.SAMtoBAM(path, temp_path)
+    # look which reads intersect with which variable Region
+    interact_bedtools.overlap(path, temp_path)
+    # counts the Variable Regions that are found with bedtools and prints the
+    # highest probable variable Region
+    Output_counter.count(count, temp_path, file_name, file_type, path,
+                         dir_name, dir_path)
 
 
 def main():
-    parser = argparse.ArgumentParser(prog= 'VX detector', description= 'This programm tries to find which variable region of the 16S sequence was sequencend')
-    parser.add_argument('-d', '--directory', dest='dir_path', help='Directory path of the directory containing multiple fastq or fasta files.' )
-    parser.add_argument('-sf', '--single-file', dest='fasta_file', default=None,help= 'Filepath of the fastq file containing the sequencences of which the variable regions are to be verified.')
-	#/homes/jgroos/Downloads/study_raw_data_14513_062122-034504/per_sample_FASTQ/147774/5001_S229_L001_R1_001.fastq
+    parser = argparse.ArgumentParser(prog='VX detector', description=(
+        'This programm tries to find which variable region of the 16S '
+        'sequence was sequencend'))
+    parser.add_argument('-d', '--directory', dest='dir_path',
+                        help=('Directory path of the directory containing '
+                              'multiple fastq or fasta files.'))
+    parser.add_argument('-sf', '--single-file', dest='fasta_file',
+                        default=None,
+                        help=('Filepath of the fastq file containing the '
+                              'sequencences of which the variable regions '
+                              'are to be verified.'))
+    # /homes/jgroos/Downloads/study_raw_data_14513_062122-034504/per_sample_FASTQ/147774/5001_S229_L001_R1_001.fastq
     args = parser.parse_args()
     path = files_manager.get_lib()
     temp_path = files_manager.tmp_dir(path, temp_path='')
     file_type = None
     fasta_ext = ('.fasta', '.fa', '.ffa', '.ffn', '.faa', '.frn')
     fastq_ext = ('.fq', '.fastq',)
-    if args.fasta_file == None:
+    if args.fasta_file is None:
         for root, dirs, files in os.walk(args.dir_path, topdown=True):
             for file in files:
                 if '_R2_' in file:
@@ -47,14 +65,17 @@ def main():
                     file_type = ' -f'
                 else:
                     continue
-                workflow(path, temp_path, file_path, file_type, file_name, dir_name, args.dir_path)
-        if file_type == None:
-            print('There were no FASTA or FASTQ files with 16S sequences in this directory')
+                workflow(path, temp_path, file_path, file_type, file_name,
+                         dir_name, args.dir_path)
+        if file_type is None:
+            print('There were no FASTA or FASTQ files with 16S sequences '
+                  'in this directory')
     else:
-        workflow(path, temp_path, args.fasta_file, file_type, file_name='Your file', dir_name='', dir_path='')
-    #quit() #keeps the tmp folder for troubleshooting
+        workflow(path, temp_path, args.fasta_file, file_type,
+                 file_name='Your file', dir_name='', dir_path='')
+    # quit() #keeps the tmp folder for troubleshooting
     files_manager.tmp_dir(path, temp_path)
 
-if __name__ == '__main__':
-	main()
 
+if __name__ == '__main__':
+    main()
diff --git a/vxdetector/files_manager.py b/vxdetector/files_manager.py
@@ -4,16 +4,19 @@
 import tempfile
 import shutil
 
-def tmp_dir(path, temp_path): #erstellt tmp verzeichnis; sollte es auch deleten 
+
+def tmp_dir(path, temp_path):
+    """erstellt tmp verzeichnis; sollte es auch deleten"""
     if os.path.exists(temp_path):
         shutil.rmtree(temp_path)
     else:
-        temp_path = tempfile.mkdtemp(suffix=None, prefix='tmp_files_', dir=path) + '/'
-
-
+        temp_path = tempfile.mkdtemp(suffix=None, prefix='tmp_files_',
+                                     dir=path) + '/'
     return temp_path
 
-def get_lib(): #findet die richtigen ordner/dateien, damit das programm funktioniert
+
+def get_lib():
+    """findet die richtigen ordner/dateien, damit das programm funktioniert"""
     programm_path = os.path.dirname(__file__) + '/'
     if os.path.exists(programm_path+'Output/'):
         pass
@@ -23,17 +26,19 @@ def get_lib(): #findet die richtigen ordner/dateien, damit das programm funktion
         if os.path.exists(programm_path+'Indexed_bt2/annoted_ref.bed'):
             pass
         else:
-            print('ERROR: It seems the 16S variable region boundary reference file is missing.')
+            print('ERROR: It seems the 16S variable region boundary reference '
+                  'file is missing.')
         if os.path.exists(programm_path+'Indexed_bt2/85_otus.fasta'):
             pass
         else:
-            print('ERROR: It seems the Greengenes "85_otus.fasta" file is missing.')
+            print('ERROR: It seems the Greengenes "85_otus.fasta" file is'
+                  ' missing.')
         if os.path.exists(programm_path+'Indexed_bt2/85_otus_aligned.fasta'):
             pass
         else:
-            print('ERROR: It seems the Greengenes "85_otus_aligned.fasta" file is missing.')
+            print('ERROR: It seems the Greengenes "85_otus_aligned.fasta" '
+                  'file is missing.')
     else:
         print('ERROR: It seems the "Indexed_bt2" directory is missing.')
-        
+
     return programm_path
-
diff --git a/vxdetector/interact_bedtools.py b/vxdetector/interact_bedtools.py
@@ -2,11 +2,15 @@
 
 import os
 
+
 def overlap(path, temp_path):
-	bedtools_filepath = '/usr/bin/bedtools'
-	BAM_filepath = temp_path + 'bam_sorted.bam'
-	S_ref = path + 'Indexed_bt2/annoted_ref.bed' 
-	#reference "genome" Created by using a aligned greengenes databank and deleting all "-" while keeping track where the variable Regions are 
-	BED_filepath = temp_path + 'BED.bed'
-	cmd = bedtools_filepath + ' intersect -f 0.5 -wb -a ' + BAM_filepath + ' -b ' + S_ref + ' -bed > ' + BED_filepath #Intersects the .bam file with the reference
-	os.system(cmd)
+    bedtools_filepath = '/usr/bin/bedtools'
+    BAM_filepath = temp_path + 'bam_sorted.bam'
+    S_ref = path + 'Indexed_bt2/annoted_ref.bed'
+    # reference "genome" Created by using a aligned greengenes databank and
+    # deleting all "-" while keeping track where the variable Regions are
+    BED_filepath = temp_path + 'BED.bed'
+    # Intersects the .bam file with the reference
+    cmd = bedtools_filepath + ' intersect -f 0.5 -wb -a ' + BAM_filepath + \
+        ' -b ' + S_ref + ' -bed > ' + BED_filepath
+    os.system(cmd)
diff --git a/vxdetector/interact_bowtie2.py b/vxdetector/interact_bowtie2.py
@@ -3,22 +3,28 @@
 import os
 from os.path import exists
 
+
 def buildbowtie2(path):
-	bowtie2_path = '/vol/software/bin/bowtie2-build'
-	input_ref = path + 'Indexed_bt2/85_otus.fasta'	#reference genome greengenes is used
-	output_path = path + 'Indexed_bt2/bowtie2'
-	if exists(output_path + '.1.bt2'):
-		pass	#only builds an index for the alignment if there isn't already one
-	else:
-		cmd = bowtie2_path + ' -f ' + input_ref + ' ' + output_path
-		os.system(cmd)	#if a indexfile is missing a new index is build
+    bowtie2_path = '/vol/software/bin/bowtie2-build'
+    # reference genome greengenes is used
+    input_ref = path + 'Indexed_bt2/85_otus.fasta'
+    output_path = path + 'Indexed_bt2/bowtie2'
+    if exists(output_path + '.1.bt2'):
+        # only builds an index for the alignment if there isn't already one
+        pass
+    else:
+        cmd = bowtie2_path + ' -f ' + input_ref + ' ' + output_path
+        # if a indexfile is missing a new index is build
+        os.system(cmd)
+
 
 def mapbowtie2(fasta_file, path, temp_path, file_type):
     bowtie2_path = '/vol/software/bin/bowtie2'
     samtools_path = '/usr/bin/samtools'
     index_path = path + 'Indexed_bt2/bowtie2'
     log_path = temp_path + 'bowtie2.log'
     BAM_path = temp_path + 'BAM.bam'
-    cmd = bowtie2_path + ' -x ' + index_path + file_type + ' -U ' + fasta_file  + ' --fast 2> ' + log_path + ' | ' + samtools_path + ' view -b -S -F 4 -o ' + BAM_path
+    cmd = bowtie2_path + ' -x ' + index_path + file_type + ' -U ' \
+        + fasta_file + ' --fast 2> ' + log_path + ' | ' + samtools_path \
+        + ' view -b -S -F 4 -o ' + BAM_path
     os.system(cmd)
-
diff --git a/vxdetector/interact_samtools.py b/vxdetector/interact_samtools.py
@@ -3,34 +3,41 @@
 import os
 import subprocess
 
+
 def SAMtoBAM(path, temp_path):
     samtools_filepath = '/usr/bin/samtools'
     BAM_filepath = temp_path + 'BAM.bam'
     BAM_sorted_filepath = temp_path + 'bam_sorted.bam'
-    cmd = samtools_filepath + ' sort ' + BAM_filepath + ' -o ' + BAM_sorted_filepath #sorts the .bam file
+    # sorts the .bam file
+    cmd = samtools_filepath + ' sort ' + BAM_filepath + ' -o ' \
+        + BAM_sorted_filepath
     os.system(cmd)
-    cmd = [samtools_filepath,'view','-c',BAM_sorted_filepath]
+    cmd = [samtools_filepath, 'view', '-c', BAM_sorted_filepath]
     count = subprocess.run(cmd, stdout=subprocess.PIPE)
     count = int(count.stdout.decode('utf-8'))
-    cmd = samtools_filepath + ' index ' + BAM_sorted_filepath	#indexes the .bam file.
+    # indexes the .bam file.
+    cmd = samtools_filepath + ' index ' + BAM_sorted_filepath
     os.system(cmd)
-    
+
     return count
 
-"""   
+
+"""
 def count(path, temp_path):
     samtools_filepath = '/usr/bin/samtools'
     BED_filepath = temp_path + 'BED.bed'
     S_ref = path + 'Indexed_bt2/annoted_ref.bed'
     BAM_filepath = temp_path + 'bam_sorted.bam'
     pileup_path = temp_path + 'pileup.txt'
     pileup_test = temp_path + 'test.txt'
-    cmd = samtools_filepath + ' mpileup ' + BAM_filepath + ' -l ' + S_ref + ' -o ' + pileup_path + ' 2> /dev/null'
+    cmd = samtools_filepath + ' mpileup ' + BAM_filepath + ' -l ' + S_ref + \
+        ' -o ' + pileup_path + ' 2> /dev/null'
     os.system(cmd)
     with open(pileup_path, 'r') as pu:
         for line in pu:
             line_list = line.split('\t')
             with open(pileup_test, 'a') as t:
-                words = line_list[0] + '\t' + line_list[1] + '\t' + line_list[3] + '\n'
+                words = line_list[0] + '\t' + line_list[1] + '\t' + \
+                    line_list[3] + '\n'
                 t.write(words)
 """