### **Análise de genes**

In [2]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqFeature

##### **1: Gene ptsP**

In [6]:
gene_ptsP = SeqIO.read("genes/ptsP.gb", "genbank") 
gene_ptsP

SeqRecord(seq=Seq('TCAAGCCTTCTCAGAGGGCTTGCTCAGGTCTACGCTGCGGATGATCTTGCGCAC...CAT'), id='NZ_DS483503.1', name='NZ_DS483503', description='Faecalibacterium prausnitzii M21/2 Scfld_02_24, whole genome shotgun sequence', dbxrefs=['BioProject:PRJNA224116', 'BioSample:SAMN02299416', 'Assembly:GCF_000154385.1'])

In [7]:
print(len(gene_ptsP.seq))
print(gene_ptsP.id)
print(gene_ptsP.description)
print(gene_ptsP.name)

1644
NZ_DS483503.1
Faecalibacterium prausnitzii M21/2 Scfld_02_24, whole genome shotgun sequence
NZ_DS483503


In [8]:
print(gene_ptsP.annotations)

{'molecule_type': 'DNA', 'topology': 'linear', 'data_file_division': 'CON', 'date': '10-APR-2024', 'accessions': ['NZ_DS483503'], 'sequence_version': 1, 'keywords': ['WGS', 'RefSeq'], 'source': 'Faecalibacterium prausnitzii M21/2', 'organism': 'Faecalibacterium prausnitzii M21/2', 'taxonomy': ['Bacteria', 'Bacillati', 'Bacillota', 'Clostridia', 'Eubacteriales', 'Oscillospiraceae', 'Faecalibacterium'], 'references': [Reference(title='Draft genome sequence of Faecalibacterium prausnitzii M21/2', ...), Reference(title='Draft genome sequence of Faecalibacterium prausnitzii M21/2', ...), Reference(title='Direct Submission', ...), Reference(title='Direct Submission', ...), Reference(title='Direct Submission', ...)], 'comment': "REFSEQ INFORMATION: The reference sequence is identical to\nDS483503.1.\nFaecalibacterium prausnitzii M21/2 (GenBank Accession Number for\n16S rDNA gene: AY305307) is a member of the division Firmicutes. In\none comprehensive 16S rDNA sequence-based enumeration of the

In [9]:
# Visualização da taxonomia do organismos da sequência anlisada e a base de dados
print(gene_ptsP.dbxrefs)
print(gene_ptsP.annotations["source"] )
print(gene_ptsP.annotations["taxonomy"])

['BioProject:PRJNA224116', 'BioSample:SAMN02299416', 'Assembly:GCF_000154385.1']
Faecalibacterium prausnitzii M21/2
['Bacteria', 'Bacillati', 'Bacillota', 'Clostridia', 'Eubacteriales', 'Oscillospiraceae', 'Faecalibacterium']


In [10]:
# Visualização das features e sua quantidade
print("Temos", len(gene_ptsP.features), "features")
gene_ptsP.features

Temos 3 features


[SeqFeature(SimpleLocation(ExactPosition(0), ExactPosition(1644), strand=1), type='source', qualifiers=...),
 SeqFeature(SimpleLocation(ExactPosition(0), ExactPosition(1644), strand=-1), type='gene', qualifiers=...),
 SeqFeature(SimpleLocation(ExactPosition(0), ExactPosition(1644), strand=-1), type='CDS', qualifiers=...)]

In [11]:
# Determinação dos tipos de feature e suas localizações
for t in gene_ptsP.features:
    print(t.type, t.location)

source [0:1644](+)
gene [0:1644](-)
CDS [0:1644](-)


In [12]:
# Saber quais features que são do tipo "CDS"
feat_cds= []
for i in range(len(gene_ptsP.features)):
    if gene_ptsP.features[i].type == "CDS":
        feat_cds.append(i)
        print(gene_ptsP.features[i])
feat_cds

type: CDS
location: [0:1644](-)
qualifiers:
    Key: EC_number, Value: ['2.7.3.9']
    Key: GO_function, Value: ['GO:0008965 - phosphoenolpyruvate-protein phosphotransferase activity [Evidence IEA]']
    Key: GO_process, Value: ['GO:0009401 - phosphoenolpyruvate-dependent sugar phosphotransferase system [Evidence IEA]; GO:0006808 - regulation of nitrogen utilization [Evidence IEA]']
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GeneID:75068783']
    Key: gene, Value: ['ptsP']
    Key: inference, Value: ['COORDINATES: similar to AA sequence:RefSeq:WP_015537632.1']
    Key: locus_tag, Value: ['FAEPRAM212_RS10210']
    Key: note, Value: ['Derived by automated computational analysis using gene prediction method: Protein Homology.']
    Key: old_locus_tag, Value: ['FAEPRAM212_02497']
    Key: product, Value: ['phosphoenolpyruvate--protein phosphotransferase']
    Key: protein_id, Value: ['WP_005925321.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MQ

[2]

In [13]:
print(len(gene_ptsP.features))


3


In [14]:
for i, feature in enumerate(gene_ptsP.features):
    print(f"Feature {i}: {feature}")


Feature 0: type: source
location: [0:1644](+)
qualifiers:
    Key: db_xref, Value: ['taxon:411485']
    Key: host, Value: ['Homo sapiens']
    Key: isolation_source, Value: ['biological product [ENVO:02000043]']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Faecalibacterium prausnitzii M21/2']
    Key: strain, Value: ['M21/2']

Feature 1: type: gene
location: [0:1644](-)
qualifiers:
    Key: db_xref, Value: ['GeneID:75068783']
    Key: gene, Value: ['ptsP']
    Key: locus_tag, Value: ['FAEPRAM212_RS10210']
    Key: old_locus_tag, Value: ['FAEPRAM212_02497']

Feature 2: type: CDS
location: [0:1644](-)
qualifiers:
    Key: EC_number, Value: ['2.7.3.9']
    Key: GO_function, Value: ['GO:0008965 - phosphoenolpyruvate-protein phosphotransferase activity [Evidence IEA]']
    Key: GO_process, Value: ['GO:0009401 - phosphoenolpyruvate-dependent sugar phosphotransferase system [Evidence IEA]; GO:0006808 - regulation of nitrogen utilization [Evidence IEA]']
    Key: codon

In [15]:
for feature in gene_ptsP.features:
    print(feature.type)
    print(feature.qualifiers)


source
{'organism': ['Faecalibacterium prausnitzii M21/2'], 'mol_type': ['genomic DNA'], 'strain': ['M21/2'], 'isolation_source': ['biological product [ENVO:02000043]'], 'host': ['Homo sapiens'], 'db_xref': ['taxon:411485']}
gene
{'gene': ['ptsP'], 'locus_tag': ['FAEPRAM212_RS10210'], 'old_locus_tag': ['FAEPRAM212_02497'], 'db_xref': ['GeneID:75068783']}
CDS
{'gene': ['ptsP'], 'locus_tag': ['FAEPRAM212_RS10210'], 'old_locus_tag': ['FAEPRAM212_02497'], 'EC_number': ['2.7.3.9'], 'inference': ['COORDINATES: similar to AA sequence:RefSeq:WP_015537632.1'], 'GO_function': ['GO:0008965 - phosphoenolpyruvate-protein phosphotransferase activity [Evidence IEA]'], 'GO_process': ['GO:0009401 - phosphoenolpyruvate-dependent sugar phosphotransferase system [Evidence IEA]; GO:0006808 - regulation of nitrogen utilization [Evidence IEA]'], 'note': ['Derived by automated computational analysis using gene prediction method: Protein Homology.'], 'codon_start': ['1'], 'transl_table': ['11'], 'product': ['p

In [16]:
# Saber a proteína codificada nos "CDS"
for i in feat_cds:
    coding_dna = gene_ptsP.features[i].extract(gene_ptsP.seq)
    print("DNA: ", coding_dna)
    print("Proteína: ", coding_dna.translate())

DNA:  ATGCAGGTAGGCACCGGCAAAAGCGTGCTCAATGGCATCGCCATTGGCAAGCTGAAGATCTACAAGAAAAAAGATACCGCGATTTCCACCGCCCCTGTGGCAGATACTGCCGCAGAGCTGGAGCGTTTTGAAGCTGCCCGCCAGAAGGCCATTGAGCAGCAGACCGCCCTGTATGAAAAGGCACTGGCAGAGGCCGGCGAGGACATCGCCGAGGTGTTCAACATCCACGCCATGATGCTGGATGATGACGACTTTGTGGACGCCATCAAGGAGATCATCAACGGACAGAAGATGTGCGCCGAGTACGCCGTCAAGACCGCCGGCAACAATCAGGCTGCGGTGTTCTCTGCGATGGACGACCCGTATCTGCAGGCCCGCAGCGCCGATGTGCTGGACATTGCGCAGGCCATGCTGGACATTCTGCAGGGGGTGGACAACGCTTCCCTGCAGGGCACCGAGCCCAGCATTCTGGTAGCCGAGGATCTGGCTCCCTCCGAGACCGTGCGCATGGATAAAAGCCTGCTGCTGGGCTTCATCACCCGCGAGGGCAGCTCCAACAGCCACACCGCCATTCTGGCACGCAGCATGAACATCCCCGCCCTGATCCAGTGCAAGGACATTCAGGACGACTGGGACGGCAAGATGGCTGTGGTGGACGGCTACAACGCCTGTGTGTATGTGGACCCCACCCCCGACCTGCTCAAGAGCCTGAAAAAGCGCCAGCAGGAGGACCAGAAAAAGCAGGCCCTGCTGCAGGAGCTGAAGGGCAAGCCCAACACCACGCTGGACGGCAAGACCATCAACGTGTTTGCCAACATCGGCGGCATGGGCGACGTGGGCGCTGTGCAGCAGAACGACGCCGGCGGCGTGGGTCTGTTCCGCACCGAGTTCGTTTACCTGAACTGCAAGGACTTCCCCACCGAGGACTACCAGTTCGAGGCTTACAAGCAGGTGTTGGAAAGCCTTGCACCCCGCAAGGTGGTGGTGCGCACCT

# Saber o número de features do tipo "gene"
feat_gene = []
for j in range(len(gene_ptsP.features)):
    if gene_ptsP.features[j].type == 'gene':
        feat_gene.append(j)
print('Número de features tipo gene: ', len(feat_gene))