In [2]:
import subprocess
from Bio import Entrez
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq

def write_fasta(sequence, filename):
    record = SeqRecord(Seq(sequence), id="gene", description="")
    with open(filename, "w") as output_handle:
        SeqIO.write(record, output_handle, "fasta")

def run_bioblast(query_file, subject_file, outfile):
    bioblast_cmd = "water" 
    subprocess.run([bioblast_cmd, f"-asequence={query_file}", f"-bsequence={subject_file}", f"-outfile={outfile}", f"-gapopen=10.0", f"-gapextend=0.5"] )

def parse_bioblast_output(outfile):
    with open(outfile, 'r') as result_file:
        result_lines = result_file.readlines()

        # Extract relevant information from the Bioblast output
        similarity_percentage = result_lines[24].split(":")[1].strip()
        
        return similarity_percentage

#fetch PSEN1 sequence
def fetch_PSEN1(id):
    #it asks for email to track ur activity
    Entrez.email = "aanya.kamath@gmail.com"
    #nucleotide just means nucleotide sequence
    handle = Entrez.efetch(db="nucleotide", id="NM_000021.4", rettype="gb", retmode="text")
    #rettype=return type, genbank is a format like FASTA
    record = SeqIO.read(handle, "genbank")
    handle.close()
    return record

#fetch PSEN2 sequence
def fetch_PSEN2(id):
    Entrez.email = "aanya.kamath@gmail.com"
    handle = Entrez.efetch(db="nucleotide", id="NM_000447.3", rettype="gb", retmode="text")
    record=SeqIO.read(handle, "genbank")
    handle.close()
    return record


if __name__ == "__main__":
    # Replace these with the actual gene sequences
    sequence_record1 = fetch_PSEN1(id)
    gene1_sequence = sequence_record1.seq
    print(gene1_sequence)
    sequence_record2 = fetch_PSEN2(id)
    gene2_sequence = sequence_record2.seq

    query_file = "gene1.fasta"
    subject_file = "gene2.fasta"
    output_file = "bioblast_result.txt"

    # Write gene sequences to FASTA files
    write_fasta(gene1_sequence, query_file)
    write_fasta(gene2_sequence, subject_file)

    # Run Bioblast
    run_bioblast(query_file, subject_file, output_file)

    # Parse the Bioblast output
    similarity_percentage = parse_bioblast_output(output_file)

    # Display the result
    print(f"Similarity Percentage: {similarity_percentage}%")

GGAAACAAAACAGCGGCTGGTCTGGAAGGAACCTGAGCTACGAGCCGCGGCGGCAGCGGGGCGGCGGGGAAGCGTATACCTAATCTGGGAGCCTGCAAGTGACAACAGCCTTTGCGGTCCTTAGACAGCTTGGCCTGGAGGAGAACACATGAAAGAAAGAACCTCAAGAGGCTTTGTTTTCTGTGAAACAGTATTTCTATACAGTTGCTCCAATGACAGAGTTACCTGCACCGTTGTCCTACTTCCAGAATGCACAGATGTCTGAGGACAACCACCTGAGCAATACTGTACGTAGCCAGAATGACAATAGAGAACGGCAGGAGCACAACGACAGACGGAGCCTTGGCCACCCTGAGCCATTATCTAATGGACGACCCCAGGGTAACTCCCGGCAGGTGGTGGAGCAAGATGAGGAAGAAGATGAGGAGCTGACATTGAAATATGGCGCCAAGCATGTGATCATGCTCTTTGTCCCTGTGACTCTCTGCATGGTGGTGGTCGTGGCTACCATTAAGTCAGTCAGCTTTTATACCCGGAAGGATGGGCAGCTAATCTATACCCCATTCACAGAAGATACCGAGACTGTGGGCCAGAGAGCCCTGCACTCAATTCTGAATGCTGCCATCATGATCAGTGTCATTGTTGTCATGACTATCCTCCTGGTGGTTCTGTATAAATACAGGTGCTATAAGGTCATCCATGCCTGGCTTATTATATCATCTCTATTGTTGCTGTTCTTTTTTTCATTCATTTACTTGGGGGAAGTGTTTAAAACCTATAACGTTGCTGTGGACTACATTACTGTTGCACTCCTGATCTGGAATTTTGGTGTGGTGGGAATGATTTCCATTCACTGGAAAGGTCCACTTCGACTCCAGCAGGCATATCTCATTATGATTAGTGCCCTCATGGCCCTGGTGTTTATCAAGTACCTCCCTGAATGGACTGCGTGGCTCATCTTGGCTGTGATTTCAGTATATGATTTAGTGGCTGTTTTGTG

Smith-Waterman local alignment of sequences


Similarity Percentage: 1421/2760 (51.5%)%
