Skip to content

Commit

Permalink
fetchfa: Avoid HTTP 414 error; blast2accmap: Print gi if exists
Browse files Browse the repository at this point in the history
  • Loading branch information
Jian-Long Huang committed Jan 30, 2013
1 parent a1b4c0c commit 24e189a
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 8 deletions.
15 changes: 12 additions & 3 deletions bdorpy/blast2accmap.py
Expand Up @@ -7,7 +7,7 @@
# http://opensource.org/licenses/MIT
#
# Author: Jian-Long Huang (jianlong@ntu.edu.tw)
# Version: 0.4
# Version: 0.5
# Created: 2013.1.25
#
# Required:
Expand All @@ -29,6 +29,7 @@

import sys
import argparse
import re
from Bio.Blast import NCBIXML
from fhandle import name, logmsg

Expand Down Expand Up @@ -63,9 +64,11 @@ def main():
fw.write('# E-value threshold: ' + str(args.ev_thresh) + '\n')
fw.write('# min hit number: ' + str(args.min_hit_num) + '\n')
fw.write('#\n')
fw.write('# query_accession hit_accession_1,hit_accession_2, ...\n\n')
fw.write('# filename query_accession,hit_accession_1,hit_accession_2, ...\n\n')
fw.flush()

gi = re.compile('gi\|(\d+)\|')

for blast_record in blast_records:
total_query_num += 1

Expand All @@ -81,7 +84,13 @@ def main():
continue

if hsp.expect <= args.ev_thresh:
hit_accs.append(alignment.accession)
match = gi.match(alignment.hit_id).group(1)

if match is None:
print(alignment.accession + ' does not have gi.')
hit_accs.append(alignment.accession)
else:
hit_accs.append(match)
break

if len(hit_accs) >= args.min_hit_num:
Expand Down
23 changes: 20 additions & 3 deletions bdorpy/fetchfa.py
Expand Up @@ -7,7 +7,7 @@
# http://opensource.org/licenses/MIT
#
# Author: Jian-Long Huang (jianlong@ntu.edu.tw)
# Version: 0.3
# Version: 0.4
# Created: 2013.1.24
#
# Required:
Expand Down Expand Up @@ -59,7 +59,7 @@ def main():
fwlog.write(i)
fwlog.flush()

Entrez.email = 'fetchfa@example.com'
Entrez.email = name.genid() + '@example.com'

if args.query_id is not None:
with open(args.output + '.fa', 'w') as fw, open(args.output + '.log', 'w') as fwlog:
Expand Down Expand Up @@ -90,11 +90,28 @@ def main():
query_num += 1

with open(os.path.abspath(args.output) + '/' + line.split('\t')[0] + '.fa', 'w') as fw:
alist = line.rstrip().split('\t')[1].split(',')

while len(alist) > 30:
alist_part = alist[0:30]
alist = alist[30:len(alist)]

handle = Entrez.efetch(db=args.database,
id=','.join(alist_part),
rettype='fasta',
retmode='text')
fw.write(handle.read())
fw.flush()
handle.close()

handle = Entrez.efetch(db=args.database,
id=line.split('\t')[0] + ',' + line.split('\t')[1].rstrip(),
id=','.join(alist),
rettype='fasta',
retmode='text')
fw.write(handle.read())
fw.flush()
handle.close()

fwlog.write('# Fetched queries: ' + str(query_num) + '\n')
fwlog.write('#\n')

Expand Down
4 changes: 2 additions & 2 deletions docs/format/blastaccmap.txt
Expand Up @@ -3,5 +3,5 @@
# Updated: 2013.1.25

col_index title
0 query_accession
1 hit_accession_1,hit_accession_2, ...
0 output file name to be created
1 accession_1,accession_2, ...

0 comments on commit 24e189a

Please sign in to comment.