**Análise de Homologias por BLAST**

In [1]:
from Bio import SeqIO
from Bio.Blast import NCBIWWW, NCBIXML
import re

def blast_and_filter(gene_names, e_value_threshold=1e-5, percent_identity_threshold=50, coverage_threshold=50):
    for name_gene in gene_names:
        # Leitura da sequência e execução do BLAST
        try:
            query_seq = SeqIO.read(f"genes/{name_gene}.fasta", "fasta")
        except FileNotFoundError:
            print(f"Arquivo não encontrado para {name_gene}. Pulando...")
            continue

        print(f"Iniciando busca BLAST para {name_gene}...")
        result_handle = NCBIWWW.qblast("blastp", "swissprot", query_seq.seq)
        print(f"Busca BLAST concluída para {name_gene}.")

        # Parsing e filtragem dos resultados
        blast_records = NCBIXML.parse(result_handle)
        output_path = f"genes/{name_gene}_blast.fasta"
        
        with open(output_path, "w") as output_handle:
            for blast_record in blast_records:
                print(f"Número de alinhamentos encontrados para {name_gene}:", len(blast_record.alignments))
                for alignment in blast_record.alignments:
                    print("Título do alinhamento:", alignment.title)
                    for hsp in alignment.hsps:
                        query_cover = (hsp.align_length / blast_record.query_letters) * 100
                        print(f"HSP: E-value: {hsp.expect}, Identities: {hsp.identities}, "
                              f"Align length: {hsp.align_length}, Query Cover: {query_cover:.2f}%")
                        
                        percent_identity = (hsp.identities / hsp.align_length) * 100
                        if (hsp.expect <= e_value_threshold and
                            percent_identity >= percent_identity_threshold and
                            query_cover >= coverage_threshold):
                            
                            species_match = re.search(r"\[(.*?)\]", alignment.title)
                            species = species_match.group(1) if species_match else "Unknown species"
                            
                            SeqIO.write(
                                SeqIO.SeqRecord(
                                    seq=hsp.sbjct,
                                    id=alignment.accession,
                                    description=f"E-value: {hsp.expect:.2e}, Identities: {hsp.identities}/{hsp.align_length}, "
                                                f"Query Cover: {query_cover:.2f}%, Percent Identity: {percent_identity:.2f}%, "
                                                f"Species: {species}"
                                ),
                                output_handle,
                                "fasta"
                            )
                            break  # Pega apenas o melhor HSP para cada alinhamento
        
        print(f"Resultados filtrados do BLAST para {name_gene} foram salvos em '{output_path}'")



#### **1: Gene ptsP**

In [None]:
gene_names = ["ptsP"]
blast_and_filter(gene_names)

Iniciando busca BLAST para ptsP...




#### **2. Gene ButyrylCoA**

In [4]:
gene_names = ["butyrylCoA"]
blast_and_filter(gene_names)

Iniciando busca BLAST para butyrylCoA...
Busca BLAST concluída para butyrylCoA.
Número de alinhamentos encontrados para butyrylCoA: 8
Título do alinhamento: sp|G2SYC0.1| RecName: Full=Butyryl-CoA:acetate CoA-transferase; Short=Butyryl-CoA CoA-transferase [Roseburia hominis A2-183]
HSP: E-value: 0.0, Identities: 332, Align length: 447, Query Cover: 99.78%
Título do alinhamento: sp|B0MC58.1| RecName: Full=Butyryl-CoA:acetate CoA-transferase; AltName: Full=Butyryl-CoA CoA-transferase [Anaerostipes caccae L1-92]
HSP: E-value: 0.0, Identities: 318, Align length: 447, Query Cover: 99.78%
Título do alinhamento: sp|Q0AVM5.1| RecName: Full=Probable butyrate:acetyl-CoA coenzyme A-transferase; Short=Butyrate CoA-transferase [Syntrophomonas wolfei subsp. wolfei str. Goettingen G311]
HSP: E-value: 2.47933e-164, Identities: 229, Align length: 444, Query Cover: 99.11%
Título do alinhamento: sp|P38942.3| RecName: Full=4-hydroxybutyrate coenzyme A transferase [Clostridium kluyveri DSM 555]
HSP: E-value

