# BLAST Code

In [35]:
from Bio import SeqIO
from Bio.Blast import NCBIWWW 


x = input("Which protein are you looking for?")

f = open(f"{x}.fasta")
seq = SeqIO.read(f, "fasta")

result_handle = NCBIWWW.qblast("blastp", "nr", seq.format("fasta"))
save_file = open(f"{x}_blast.xml", "w")
save_file.write(result_handle.read())
save_file.close()
result_handle.close()

Which protein are you looking for? Q15399


In [36]:
from Bio.Blast import NCBIXML
import json

result_handle = open(f"{x}_blast.xml")
blast_records = NCBIXML.parse(result_handle)

allhits = []
hits = []
ids = []
for blast_record in blast_records:
    for alignment in blast_record.alignments: 
        for hsp in alignment.hsps:
            dict_all = {}
            info_hit = []
            if alignment.hit_id.split("|")[2] not in ids:
                info_hit.extend((alignment.hit_def, alignment.hit_id.split("|")[2], alignment.length, hsp.expect, hsp.score, alignment.accession))
                dict_all["Info"] = alignment.hit_def
                dict_all["Hit"] = alignment.hit_id.split("|")[2]
                ids.append(alignment.hit_id.split("|")[2])
                dict_all["Lenght"] = alignment.length
                dict_all["E-value"] = hsp.expect
                dict_all["Score"] = hsp.score
                dict_all["Accession Number"] = alignment.accession
                allhits.append(dict_all)
                hits.append(info_hit)
            
print(f"Number of hits: {len(allhits)}")
print(json.dumps(allhits, indent = 3))

Number of hits: 1
[
   {
      "Info": "toll-like receptor 1 precursor [Homo sapiens] >ref|XP_005262719.1| toll-like receptor 1 isoform X1 [Homo sapiens] >ref|XP_011512044.1| toll-like receptor 1 isoform X1 [Homo sapiens] >ref|XP_011512047.1| toll-like receptor 1 isoform X1 [Homo sapiens] >ref|XP_016864060.1| toll-like receptor 1 isoform X1 [Homo sapiens] >ref|XP_016864061.1| toll-like receptor 1 isoform X1 [Homo sapiens] >ref|XP_024309964.1| toll-like receptor 1 isoform X1 [Homo sapiens] >sp|Q15399.3| RecName: Full=Toll-like receptor 1; AltName: Full=Toll/interleukin-1 receptor-like protein; Short=TIL; AltName: CD_antigen=CD281; Flags: Precursor [Homo sapiens] >gb|AIC49870.1| TLR1, partial [synthetic construct] >emb|SJX28382.1| unnamed protein product, partial [Human ORFeome Gateway entry vector] >gb|AAI09094.1| Toll-like receptor 1 [Homo sapiens] >gb|AAY85639.1| toll-like receptor 1 [Homo sapiens] >dbj|BAG55014.1| toll-like receptor 1 [Homo sapiens]",
      "Hit": "",
      "Lenght":

In [15]:
import numpy as np
import pandas as pd
from tabulate import tabulate

info_firstline = ["Organism", "Identifier", "Lenght", "E-Value", "Score", "Accession Number"]

numpy_data = np.array(hits)
numpy_data_frame = numpy_data.reshape((len(numpy_data), 6))

info_genes = pd.DataFrame(data = numpy_data_frame, columns = info_firstline)
print(tabulate(info_genes, headers='keys', tablefmt='psql'))

+----+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+----------+-----------+---------+-----------

## Multiple BLAST

In [None]:
import requests

file_path = 'ids.txt'  # Replace with the file path of your text file

# Read in the UniProt IDs from the text file
with open(file_path, 'r') as id_file:
    ids = [line.strip() for line in id_file]

# Check if each ID can be retrieved by UniProt
updated_ids = []
for id in ids:
    url = f'https://www.uniprot.org/uniprot/{id}.fasta'
    response = requests.get(url)
    if response.ok:
        updated_ids.append(id)
    else:
        print(f'Unable to retrieve record for ID {id}, removing from file...')

# Write the updated IDs back to the file
with open(file_path, 'w') as id_file:
    for id in updated_ids:
        id_file.write(id + '\n')