## Análise da sequência e das features presentes no NCBI

In [8]:
from Bio import SeqIO
from Bio import Entrez

In [18]:
genes = [("rrnB","U00096",4166659,4168200),
         ("ampC","U00096.3",4377811,4378944)]

In [19]:
i = 1
for gene in genes:
    print(f"----> Gene {i}: Escherichia coli's {gene[0]} <----\n")
    i += 1
    # --> busca de dados em db="nucleotide" <--
    Entrez.email = "pg45464@alunos.uminho.pt"
    handle = Entrez.efetch(db="nucleotide",id=gene[1],seq_start=gene[2],seq_stop=gene[3],
                           rettype="gb",retmode="text")
    record = SeqIO.read(handle,"genbank")
    # --> anotações dos genes (rrnB e ampC) <--
    print("ANNOTATIONS")
    accessions = record.annotations["accessions"]
    print(f"Accessions: {accessions[0]} (region {accessions[2]})")
    source = record.annotations["source"]
    print(f"Source: {source}")
    taxonomy = record.annotations["taxonomy"]
    print("Taxonomy: " + " -> ".join(taxonomy))
    mol_type = record.annotations["molecule_type"]
    print(f"Molecule type: {mol_type}")
    topology = record.annotations["topology"]
    print(f"Molecule topology: {topology}")
    # --> guardar as sequências de DNA em formato fasta <--
    output = open(f"{gene[0]}_dna.fasta","w")
    output.write(f">{gene[1]} | Escherichia coli {gene[0]} | {accessions[2]} | {source}\n"
                f"{record.seq}")
    output.close()
    # --> features presentes nos genes de interesse <--
    print("\nFEATURES (types)")
    for feature in record.features:
        print(f"- {feature.type}")
    # --> CDS - imprime e guarda algumas informções acerca do gene e das proteínas associadas <--
    print("\nCDS qualifiers")
    for feature in record.features:
        if feature.type == "CDS":
            feat_gene = feature.qualifiers["gene"][0]
            feat_id = feature.qualifiers["protein_id"][0]
            feat_product = feature.qualifiers["product"][0]
            feat_translation = feature.qualifiers["translation"][0]
            print(f"Gene: {feat_gene}")
            print(f"Product: {feat_product}")
            print(f"Protein ID: {feat_id}")
            print(f"Translation: {feat_translation[0:60]}...")
            output2 = open(f"{gene[0]}_prot.fasta","w")
            output2.write(f">{gene[1]} | Escherichia coli {gene[0]} | {accessions[2]} | {source}\n"
                f"{feat_translation}")
            output2.close()
    if all(feature.type != "CDS" for feature in record.features):
        print("- None")
    print("\n")
    handle.close()

----> Gene 1: Escherichia coli's rrnB <----

ANNOTATIONS
Accessions: U00096 (region 4166659..4168200)
Source: Escherichia coli str. K-12 substr. MG1655
Taxonomy: Bacteria -> Proteobacteria -> Gammaproteobacteria -> Enterobacterales -> Enterobacteriaceae -> Escherichia
Molecule type: DNA
Molecule topology: linear

FEATURES (types)
- source
- gene
- rRNA

CDS qualifiers
- None


----> Gene 2: Escherichia coli's ampC <----

ANNOTATIONS
Accessions: U00096 (region 4377811..4378944)
Source: Escherichia coli str. K-12 substr. MG1655
Taxonomy: Bacteria -> Proteobacteria -> Gammaproteobacteria -> Enterobacterales -> Enterobacteriaceae -> Escherichia
Molecule type: DNA
Molecule topology: linear

FEATURES (types)
- source
- gene
- CDS

CDS qualifiers
Gene: ampC
Product: beta-lactamase
Protein ID: AAC77110.1
Translation: MFKTTLCALLITASCSTFAAPQQINDIVHRTITPLIEQQKIPGMAVAVIYQGKPYYFTWG...


