# BLAST


## Remote blast

In [1]:
from Bio.Blast import NCBIWWW
result_handle = NCBIWWW.qblast("blastn", "nt", "8332116")

In [2]:
remote_result = result_handle.readlines()

In [3]:
len(remote_result)

1442

In [17]:
from Bio.Blast import NCBIXML
result_handle = NCBIWWW.qblast("blastn", "nt", "8332116")
blast_records = NCBIXML.parse(result_handle)

In [18]:
blast_rec_list = list(blast_records)

In [29]:
for align in blast_rec_list[0].alignments[0:5]:
    for hsp in align.hsps:
        print(hsp)

Score 482 (435 bits), expectation 1.2e-117, alignment length 624
Query:      59 ACAGAAAATGGGGAGAGAAATGAAGTACTTGGCCATGAAAACTGA...GTA 678
               || ||||||||| |||| | |||| ||  |||| |||| | ||||... ||
Sbjct:     278 ACCGAAAATGGGCAGAGGAGTGAATTATATGGCAATGACACCTGA...TTA 901
Score 468 (423 bits), expectation 7.5e-114, alignment length 590
Query:      63 AAAATGGGGAGAGAAATGAAGTACTTGGCCATGAAAACTGATCAA...CCC 649
               |||||||| |||  |||| | || ||||| |||||||| || |||...|||
Sbjct:      11 AAAATGGGTAGACGAATGGATTATTTGGCGATGAAAACCGAGCAA...CCC 600
Score 448 (405 bits), expectation 2.0e-108, alignment length 597
Query:      87 TTGGCCATGAAAACTGATCAATTGGCCGTGGCTAATATGATCGAT...TAG 679
               ||||||||||||||||| ||| ||||  |||||||| |||| |||...|||
Sbjct:      81 TTGGCCATGAAAACTGAGCAAATGGCGTTGGCTAATTTGATAGAT...TAG 677
Score 439 (397 bits), expectation 1.0e-105, alignment length 596
Query:      63 AAAATGGGGAGAGAAATGAAGTACTTGGCCATGAAAACTGATCAA...CTG 655
               |||||||||||    ||| |||  |||

## Parsing BLAST XML file

In [44]:
from Bio.Blast import NCBIXML
result_handle = open("files/my_blast.xml")
blast_record = NCBIXML.read(result_handle)
blast_record.database_sequences

3056429

In [42]:
from Bio.Blast import NCBIXML
result_handle = open("files/my_blast.xml")
blast_records = NCBIXML.parse(result_handle)

for rec in blast_records:
    print(rec.database)

refseq_rna


### BLAST record class

In [45]:
from Bio.Blast import NCBIXML
result_handle = open("files/my_blast.xml")
blast_record = NCBIXML.read(result_handle)

E_VALUE_THRESH = 0.04
for alignment in blast_record.alignments[1:5]:
    for hsp in alignment.hsps:
        if hsp.expect < E_VALUE_THRESH:
            print("****Alignment****")
            print("sequence:", alignment.title)
            print("length:", alignment.length)
            print("e value:", hsp.expect)
            print(hsp.query[0:75] + "...")
            print(hsp.match[0:75] + "...")
            print(hsp.sbjct[0:75] + "...")

****Alignment****
sequence: gi|301171311|ref|NR_035856.1| Pan troglodytes microRNA mir-520b (MIR520B), microRNA
length: 60
e value: 1.71483e-22
CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG...
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||...
CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG...
****Alignment****
sequence: gi|270133242|ref|NR_032573.1| Macaca mulatta microRNA mir-519a (MIR519A), microRNA
length: 85
e value: 2.54503e-20
CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG...
||||||| ||||||||||||||||| |||||||||||||||||||||||||||||||||||...
CCCTCTAGAGGGAAGCGCTTTCTGTGGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG...
****Alignment****
sequence: gi|301171322|ref|NR_035857.1| Pan troglodytes microRNA mir-520c (MIR520C), microRNA
length: 86
e value: 8.88303e-20
CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG...
|||||| |||||||| ||||||||||||||||||||||||||||||||||||||||||||...
CCTCTAGAGGGAAGCACTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTT

## Local BLAST

In [49]:
! makeblastdb -in files/ls_orchid.fasta -dbtype nucl -out files/orchid_blastdb



Building a new DB, current time: 12/21/2018 14:05:06
New DB name:   /home/alper/biopython/files/orchid_blastdb
New DB title:  files/ls_orchid.fasta
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 94 sequences in 0.00462008 seconds.


In [59]:
! blastn -db files/orchid_blastdb -query files/opuntia.fasta -task blastn -outfmt 7   

# BLASTN 2.6.0+
# Query: gi|6273291|gb|AF191665.1|AF191665 Opuntia marenae rpl16 gene; chloroplast gene for chloroplast product, partial intron sequence
# Database: files/orchid_blastdb
# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score
# 2 hits found
gi|6273291|gb|AF191665.1|AF191665	gi|2765597|emb|Z78472.1|PLZ78472	100.000	13	0	0	139	151	208	220	2.1	24.7
gi|6273291|gb|AF191665.1|AF191665	gi|2765594|emb|Z78469.1|PHZ78469	100.000	12	0	0	140	151	210	221	7.3	22.9
# BLASTN 2.6.0+
# Query: gi|6273290|gb|AF191664.1|AF191664 Opuntia clavata rpl16 gene; chloroplast gene for chloroplast product, partial intron sequence
# Database: files/orchid_blastdb
# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score
# 2 hits found
gi|6273290|gb|AF191664.1|AF191664	gi|2765597|emb|Z78472.1|PLZ78472	100.000	13	0	0	139	

In [66]:
from Bio.Blast.Applications import NcbiblastnCommandline
blastn_cline = NcbiblastnCommandline(query="files/opuntia.fasta", 
                                     db="files/orchid_blastdb", 
                                     evalue=10,
                                     outfmt=5, 
                                     task="blastn",
                                     out="files/opuntia.xml")

In [67]:
blastn_cline

NcbiblastnCommandline(cmd='blastn', out='files/opuntia.xml', outfmt=5, query='files/opuntia.fasta', db='files/orchid_blastdb', evalue=10, task='blastn')

In [68]:
print(blastn_cline)

blastn -out files/opuntia.xml -outfmt 5 -query files/opuntia.fasta -db files/orchid_blastdb -evalue 10 -task blastn


In [69]:
stdout, stderr = blastn_cline()

In [72]:
stderr

''

In [73]:
from Bio.Blast.Applications import NcbiblastnCommandline
blastn_cline = NcbiblastnCommandline(query="files/opuntia.fasta", 
                                     db="files/orchid_blastdb", 
                                     evalue=10,
                                     outfmt=7, 
                                     task="blastn",
                                     out="files/opuntia_blast.out")

In [74]:
stdout, stderr = blastn_cline()