From a7a918ea3053ccd08753ff4b36e146edd3b076c5 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Tue, 6 Jan 2015 02:07:48 +0200 Subject: [PATCH] test parsing blast results --- voseq/blast_local/tests.py | 8 ++++++++ voseq/blast_local/utils.py | 21 ++++++++++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/voseq/blast_local/tests.py b/voseq/blast_local/tests.py index 97a1d6fe..5ff6d2ee 100644 --- a/voseq/blast_local/tests.py +++ b/voseq/blast_local/tests.py @@ -124,3 +124,11 @@ def test_do_blast(self): self.blast.save_query_to_file() result = self.blast.do_blast() self.assertTrue(os.path.isfile(result)) + + def test_parse_blast_output(self): + self.blast.save_seqs_to_file() + self.blast.create_blast_db() + self.blast.save_query_to_file() + self.blast.do_blast() + result = self.blast.parse_blast_output() + self.assertTrue(1057 in [i['length'] for i in result]) diff --git a/voseq/blast_local/utils.py b/voseq/blast_local/utils.py index 4be63d50..512d2a8e 100644 --- a/voseq/blast_local/utils.py +++ b/voseq/blast_local/utils.py @@ -5,9 +5,10 @@ import subprocess import uuid +from Bio import SeqIO from Bio.Blast.Applications import NcbiblastnCommandline +from Bio.Blast import NCBIXML from Bio.Seq import Seq -from Bio import SeqIO from Bio.SeqRecord import SeqRecord import pytz @@ -32,6 +33,7 @@ def __init__(self, blast_type, voucher_code, gene_code, mask=None, test=None): :param voucher_code: :param gene_code: """ + self.e_value = 0.001 self.blast_type = blast_type self.voucher_code = voucher_code self.gene_code = gene_code @@ -184,7 +186,7 @@ def save_query_to_file(self): def do_blast(self): blastn_cline = NcbiblastnCommandline(query=self.query_file, db=self.db, - evalue=0.001, outfmt=5, out=self.output_file) + evalue=self.e_value, outfmt=5, out=self.output_file) blastn_cline() return self.output_file @@ -194,7 +196,20 @@ def parse_blast_output(self): match_description, max_score, total_score, query_cover, e_value, % ident, accession number """ - pass + handle = open(self.output_file, 'r') + blast_record = NCBIXML.read(handle) + hits = [] + append = hits.append + + for alignment in blast_record.alignments: + for hsp in alignment.hsps: + if hsp.expect < self.e_value: + obj = {} + obj['sequence'] = alignment.title + obj['length'] = alignment.length + obj['e_value'] = hsp.expect + append(obj) + return hits def strip_question_marks(self, seq): seq = re.sub('^\?+', '', seq)