# Blast Application - Ethan Payne

In [1]:
# Imports
from Bio.Blast import NCBIWWW, NCBIXML
from Bio import SeqIO
import pandas as pd

Using the previously made command line script, we can write a function that inputs a file name (including the file extension) and run a blast

In [2]:
def blast(file_name: str, blast_type: str, megablast: bool):
    """
    A function that runs a BLAST of desired type and saves the results as an xml file. 
    :inputs: file_name (including file extension), blast_type (one of blastn, blastp, tblastn or tblastx)
    :returns: None
    """ 

    # Error check
    if blast_type.lower() in ["blastn", "blastp", "blastx", "tblastn", "tblastx"]:
        
        # Open sequence file
        seq_file = next(SeqIO.parse(open(file_name), "fasta"))

        # Megablast
        if blast_type == "blastn":
            if megablast == True:
                query = NCBIWWW.qblast(blast_type, "nt", seq_file.seq, megablast=True)

        # Start query
        print("Starting query...")
        query = NCBIWWW.qblast(blast_type, "nt", seq_file.seq)
        print(query)

        # Store results as XML
        print("Storing results as an XML file...")
        with open(str(file_name).split(".")[0] + "_results.xml", "w") as save_file:
            blast_results = query.read()
            save_file.write(blast_results)

        print("Success! " + str(file_name).split(".")[0] + "_results.xml has been saved!")

        print("Parsing XML file...")
        # Parse XML results
        E_VALUE_THRESH = 1e-20
        for record in NCBIXML.parse(open(str(file_name).split(".")[0] + "_results.xml")):
            if record.alignments:
                print("\n")
                print("query: %s" % record.query[:100])
                for align in record.alignments:
                    for hsp in align.hsps:
                        if hsp.expect < E_VALUE_THRESH:
                            print("match: %s " % align.title[:100])

    else:
        print("blast_type not one of blastn, blastp, blastx, tblastn or tblastx")   

In [4]:
blast(file_name="aporrectodea_rosea.fna", blast_type="blastn", megablast=False)

Starting query...
<_io.StringIO object at 0x000001F6009C81F0>
Storing results as an XML file...
Success! aporrectodea_rosea_results.xml has been saved!
Parsing XML file...


query: No definition line
match: gi|1829765830|ref|NC_046733.1| Aporrectodea rosea haplogroup L4 mitochondrion, complete genome >gi|1 
match: gi|1786486037|gb|MK618512.1| Eisenia nordenskioldi pallida from Russia mitochondrion, partial genome 
match: gi|1786486121|gb|MK642871.1| Eisenia tracta from Kazakhstan mitochondrion, partial genome 
match: gi|1786486023|gb|MK618511.1| Eisenia nana from Kazakhstan mitochondrion, partial genome 
match: gi|2281547199|ref|NC_065213.1| Eisenia fetida mitochondrion, complete genome >gi|2264337310|gb|OK513 
match: gi|1786486065|gb|MK642867.1| Eisenia nordenskioldi nordenskioldi from Russia mitochondrion, partial  
match: gi|1786486009|gb|MK618510.1| Eisenia nordenskioldi nordenskioldi from Russia mitochondrion, partial  
match: gi|1786486093|gb|MK642869.1| Eisenia nordenskioldi pal

Now, we can turn our generated xml file into a pandas dataframe!

In [5]:
df = pd.read_xml("aporrectodea_rosea_results.xml", xpath=".//Hit")
df

Unnamed: 0,Hit_num,Hit_id,Hit_def,Hit_accession,Hit_len,Hit_hsps
0,1,gi|1829765830|ref|NC_046733.1|,Aporrectodea rosea haplogroup L4 mitochondrion...,NC_046733,15086,
1,2,gi|1786486037|gb|MK618512.1|,Eisenia nordenskioldi pallida from Russia mito...,MK618512,14567,
2,3,gi|1786486121|gb|MK642871.1|,"Eisenia tracta from Kazakhstan mitochondrion, ...",MK642871,14589,
3,4,gi|1786486023|gb|MK618511.1|,"Eisenia nana from Kazakhstan mitochondrion, pa...",MK618511,14599,
4,5,gi|2281547199|ref|NC_065213.1|,"Eisenia fetida mitochondrion, complete genome ...",NC_065213,16560,
5,6,gi|1786486065|gb|MK642867.1|,Eisenia nordenskioldi nordenskioldi from Russi...,MK642867,14576,
6,7,gi|1786486009|gb|MK618510.1|,Eisenia nordenskioldi nordenskioldi from Russi...,MK618510,14592,
7,8,gi|1786486093|gb|MK642869.1|,Eisenia nordenskioldi pallida from Russia mito...,MK642869,14553,
8,9,gi|1786486135|gb|MK642872.1|,Eisenia balatonica from Kazakhstan mitochondri...,MK642872,14589,
9,10,gi|1786486051|gb|MK618513.1|,Eisenia nordenskioldi nordenskioldi from Russi...,MK618513,14567,
