In [49]:
import cobra
import os
import pandas as pd
import numpy as np
import urllib
from cobrame.util import building, mu, me_model_interface

## Metabolites

met_output_file = 'metabolites.txt'
m_model_file = './iYO844.json'

m_model = cobra.io.load_json_model(m_model_file)

In [50]:
def duplicates(lst, item, counter):
    return [i for i, x in enumerate(lst) if x == item and i != counter]

def duplicate_indices(lst, item):
    return [i for i, x in enumerate(lst) if x == item]

def convert_compartments(compartment_list):
    compartment_ids = ['c','e','p']
    compartment_equivalence = ['Cytosol','Extra-organism','Periplasm']
    
    for c in range(0,len(compartment_list)):
        if compartment_list[c] in compartment_ids:
            ind = compartment_ids.index(compartment_list[c])
            compartment_list[c] = compartment_equivalence[ind]
            
    return compartment_list

def convert_compartment(compartment_list):
    compartment_ids = ['c','e','p']
    compartment_equivalence = ['Cytosol','Extra-organism','Periplasm']
    if compartment_list in compartment_ids:
        ind = compartment_ids.index(compartment_list)
        compartment_list = compartment_equivalence[ind]
            
    return compartment_list

## 1. metabolites.txt

In [51]:
def generate_metfile(m_model,filename):
    file = open(filename,'w')
    
    metabolite_list = [None]*len(m_model.metabolites)
    compartment_list = [None]*len(m_model.metabolites)
    
    for m in range(0,len(m_model.metabolites)):
        ID = m_model.metabolites[m].id
        compartment_list[m] = ID[len(ID)-1]
        metabolite_list[m] = ID[:len(ID)-2]
        
    compartment_list = convert_compartments(compartment_list)
    
    other_mets_index = [];
    for c in range(0,len(m_model.metabolites)):
        if len(duplicates(other_mets_index,c,-1)) == 0:
            met = metabolite_list[c]
            name = m_model.metabolites[c].name
            formula = m_model.metabolites[c].formula

            other_mets_index = duplicates(metabolite_list,met,c)
            
            if formula is None:
                formula = 'no-formula'
            
            string = met + '\t' + name + '\t' + formula + '\t' + compartment_list[c]
            for other_mets in other_mets_index:
                string = string + ' AND ' + compartment_list[other_mets]
            
            string = string + '\n'
            file.write(string)
    file.close()

In [52]:
generate_metfile(m_model,met_output_file)

## reaction_matrix.txt

In [53]:
def generate_reactionmatrix_file(m_model,filename):
    file = open(filename,'w')
    
    reaction_matrix_list = []
    
    for rxn in m_model.reactions:
        for metabolite in rxn.metabolites:
            compartment = metabolite.compartment
            compartment = convert_compartment(compartment)
            
            met = metabolite.id
            met = met[:len(met)-2]
            
            coefficient = rxn.get_coefficient(metabolite)
            
            string = rxn.id + '\t' + met + '\t' + compartment + '\t' + str(coefficient) + '\n'
            
            file.write(string)

    file.close()
    
    
    

In [54]:
generate_reactionmatrix_file(m_model,'reaction_matrix.txt')

## reactions.txt

In [55]:
def generate_reactions_file(m_model,filename):
    file = open(filename,'w')
    
    string = '#name \t description \t is_reversible \t data_source \t is_spontaneous \n'
    
    file.write(string)
    reaction_matrix_list = []
    
    for rxn in m_model.reactions:
        rev = rxn.reversibility
        
        if rev == True:
            reversibility = 1
        else:
            reversibility = 0
        
        string = rxn.id + '\t' + rxn.name + '\t' + str(reversibility) + '\t' + '0' + '\n'
        file.write(string)

    file.close()

In [56]:
generate_reactions_file(m_model,'reactions.txt')

## protein_complexes.txt

In [57]:
from lxml import html
import requests
from bs4 import BeautifulSoup
import re

reaction_page = 'https://biocyc.org/BSUB/NEW-IMAGE?type=REACTION&object='
gene_page = 'https://biocyc.org/gene?orgid=BSUB&id='

In [58]:
def obtain_reaction_ids(soup):
    html_reaction_list = soup.select('a[href*="/BSUB/NEW-IMAGE?type=REACTION"]')
    reaction_id = []
    for reaction_obj in html_reaction_list:
        string = str(reaction_obj)
        string = string.split('object=',1)[1]
        string = string.split('" on',1)[0]
        reaction_id.append(string)
    return reaction_id

def obtain_complex_info(soup):
    html_cplx_list = reaction_soup.find_all('a',class_='ENZYME-IN-RXN-DISPLAY')

    cplx_id = []

    for cplx in html_cplx_list:
        string = str(cplx)
        cplx_id_str = string.split(';id=BSU',1)[1]
        cplx_id_str = cplx_id_str.split('" onmouseout=',1)[0]
        cplx_id_str = 'BSU'+cplx_id_str
        cplx_id.append(cplx_id_str)

        if cplx_id_str[len(cplx_id_str):len(cplx_id_str)-7] == 'MONOMER':
            stoich = 1
        else:
            print('Check stoichiometry for'+cplx_id_str)

    return cplx_id, stoich
    

In [64]:
#def generate_proteincomplexes_file(m_model,filname):
    filename = 'protein_complexes_test.txt'
    
    cplx_list = []
    
    #gene_list = m_model.genes
    gene_list = ['BSU35940']
    
    for gene in gene_list:
        gene_soup = BeautifulSoup(requests.get(gene_page+gene).content,'html.parser')
        reaction_id = obtain_reaction_ids(gene_soup)
        print(gene_soup.prettify())
        for rxn in reaction_id:
            reaction_page_rxn = reaction_page+rxn
            reaction_soup = BeautifulSoup(requests.get(reaction_page_rxn).content,'html.parser')
            print(reaction_soup.prettify())

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
 <head>
  <title>
   Bacillus subtilis subtilis 168 rbsA
  </title>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <script src="https://code.jquery.com/jquery-1.11.3.min.js">
  </script>
  <script>
   var $j = jQuery.noConflict();
  </script>
  <!-- 
<link rel="stylesheet" type="text/css" href="/yui/build/assets/skins/sam/skin.css"> 
<link rel="stylesheet" type="text/css" href="/yui/build/assets/skins/sam/treeview.css">
<link rel="stylesheet" type="text/css" href="/style.css" >

User web site style sheet customization should use the following file:
<link rel="stylesheet" type="text/css" href="/userWebsiteCustomization.css">

The previous 4 .css files are combined at web start up time by the Lisp fn publish-combined-static-files generating the following combined.css file. For some
reason it doesn't work for the yui gallery file to be combined -- the yui

In [65]:
print(gene_page+gene)


https://biocyc.org/gene?orgid=BSUB&id=BSU35940


In [43]:
html_cplx_list = reaction_soup.find_all('a',class_='ENZYME-IN-RXN-DISPLAY')
html_gene_list = reaction_soup.find_all('a',class_='GENE')

cplx_id = []
gene_id = []

for cplx in html_cplx_list:
    string = str(cplx)
    cplx_id_str = string.split(';id=BSU',1)[1]
    cplx_id_str = cplx_id_str.split('" onmouseout=',1)[0]
    cplx_id_str = 'BSU'+cplx_id_str
    cplx_id.append(cplx_id_str)

for gene in html_gene_list:
    string = str(gene)
    gene_id_str = string.split('',1)[1]

[<a class="ENZYME-IN-RXN-DISPLAY" href="/gene?orgid=BSUB&amp;id=BSU30870-MONOMER" onmouseout="return nd();" onmouseover="return overlib(' &lt;b&gt;Enzyme:&lt;/b&gt; putative UDP-glucose epimerase&lt;br&gt;&lt;b&gt;ID:&lt;/b&gt; BSU30870-MONOMER&lt;br&gt; &lt;b&gt;Synonyms:&lt;/b&gt; YtcB', WIDTH, 500);">putative UDP-glucose epimerase</a>, <a class="ENZYME-IN-RXN-DISPLAY" href="/gene?orgid=BSUB&amp;id=BSU38860-MONOMER" onmouseout="return nd();" onmouseover="return overlib(' &lt;b&gt;Enzyme:&lt;/b&gt; UDP-glucose 4-epimerase&lt;br&gt;&lt;b&gt;ID:&lt;/b&gt; BSU38860-MONOMER&lt;br&gt; &lt;b&gt;Synonyms:&lt;/b&gt; GneA, Gne, GalE&lt;br&gt; &lt;b&gt;Evidence:&lt;/b&gt; Inferred from direct assay [Soldo03]', WIDTH, 500);">UDP-glucose 4-epimerase</a>]
['BSU30870-MONOMER', 'BSU38860-MONOMER']


In [46]:

print(html_gene_list)

[<a class="GENE" href="/gene?orgid=BSUB&amp;id=BSU30870" onmouseout="return nd();" onmouseover="return overlib(' &lt;b&gt;Gene:&lt;/b&gt;  ytcB    BSU30870    BSU30870&lt;br&gt; &lt;b&gt;Location:&lt;/b&gt; 3,157,008 -&gt; 3,157,958   &lt;br&gt; &lt;b&gt;Product:&lt;/b&gt; putative UDP-glucose epimerase', WIDTH, 500);">ytcB</a>, <a class="GENE" href="/gene?orgid=BSUB&amp;id=BSU38860" onmouseout="return nd();" onmouseover="return overlib(' &lt;b&gt;Gene:&lt;/b&gt;  galE    BSU38860    BSU38860&lt;br&gt; &lt;b&gt;Synonyms:&lt;/b&gt; gne, gneA&lt;br&gt; &lt;b&gt;Location:&lt;/b&gt; 3,989,948 &lt;- 3,990,967   &lt;br&gt; &lt;b&gt;Product:&lt;/b&gt; UDP-glucose 4-epimerase', WIDTH, 500);">galE</a>]
