[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/alessandronascimento/BioMolComp/blob/main/P09/KEGG2.ipynb)

# Análise de Dados do KEGG através do Biopython #

O banco de dados KEGG é acessível através do Biopython. 

In [None]:
!pip3 install biopython
from Bio.KEGG import REST


In [17]:
path_name = "Clavulanic" #@param {type:"string"}
organism = "sclf" #@param {type:"string"}


In [None]:
kegg_pathways = REST.kegg_list("pathway", organism).read()

# Filter all human pathways for repair pathways
clav_pathways = []
clav_descriptions = []
for line in kegg_pathways.rstrip().split("\n"):
    entry, description = line.split("\t")
    if path_name in description:
      clav_pathways.append(entry)
      clav_descriptions.append(description)

print("%4d via(s) identificada(s) envolvendo %s:\n" % (len(clav_pathways), path_name))
for i in range(0, len(clav_pathways)):
  print("%10s %30s\n" % (clav_pathways[i], clav_descriptions[i]))

In [None]:
# Get the genes for pathways and add them to a list
clav_genes = []
gene_descriptions = []
clav_compounds = []
cpd_descriptions = []

for pathway in clav_pathways:
    pathway_file = REST.kegg_get(pathway).read()  # query and read each pathway

    # iterate through each KEGG pathway file, keeping track of which section
    # of the file we're in, only read the gene in each pathway
    current_section = None
    for line in pathway_file.rstrip().split("\n"):
        section = line[:12].strip()  # section names are within 12 columns
        if not section == "":
            current_section = section

        if current_section == "GENE":
            gene_identifiers, gene_description = line[12:].split("  ")
            desc = gene_description.split();

            if not gene_identifiers in clav_genes:
                clav_genes.append(gene_identifiers)
                gene_descriptions.append(gene_description)

        elif current_section == "COMPOUND":
            cpd_identifiers, cpd_description = line[12:].split("  ")
            clav_compounds.append(cpd_identifiers)
            cpd_descriptions.append(cpd_description)

print("Foram identificados %d genes na via:" % (len(clav_genes)))
print("%-20s %-50s %-20s" % ("#Gene", "Anotacao", "EC/KO"))
for i in range(0,len(clav_genes)):
  desc = gene_descriptions[i].split()
  print("%-20s %-50s %-20s" % (clav_genes[i], gene_descriptions[i], desc[len(desc)-1]))

print()
print()

print("Foram identificados %d compostos na via:" % (len(clav_compounds)))
print("%-20s %-50s" % ("#Composto", "Anotacao"))
for i in range(0,len(clav_compounds)):
  print("%-20s %-50s" % (clav_compounds[i], cpd_descriptions[i]))

In [None]:
print("Buscando informacoes sobre os genes da via....")
for i in range(0,len(clav_genes)):
  print(organism+':'+clav_genes[i])
  kegg_aaseq = REST.kegg_get(str(organism+":"+clav_genes[i]), "aaseq").read()
  kegg_ntseq = REST.kegg_get(str(organism+":"+clav_genes[i]), "ntseq").read()
  print(kegg_ntseq)
  print(kegg_aaseq)
  print()
  print()
