## Generate MetaCyc-dependendt files

### protein_complexes.txt

In [1]:
import cobra
import os
import pandas as pd
import numpy as np
import urllib
import pythoncyc as pc
import re
from Bio.SeqUtils import seq3
from Bio import Seq
from os.path import join

## Metabolites

met_output_file = 'metabolites.txt'

# Define Models
directory = '/home/jt/UCSD/bacillusme-master/ecolime/building_data/'
eco_directory = join(directory, 'iJO1366.json')
ijo_directory = join(directory, 'iYO844.json')
uni_directory = join(directory, 'universal_model.json')

eco = cobra.io.load_json_model(eco_directory)
m_model = cobra.io.load_json_model(ijo_directory)
uni = cobra.io.load_json_model(uni_directory)

## Fix of gene reaction rules
m_model.reactions.ACCOAC.gene_reaction_rule = '(BSU29200 and BSU29210) or BSU24350 or BSU24340 or BSU22440'
m_model.reactions.PDH.gene_reaction_rule = '(BSU14580 and BSU14590) or BSU14600 or BSU14610'
m_model.reactions.ACTD2.gene_reaction_rule = 'BSU08060 and BSU08070'
m_model.reactions.get_by_id('AIRC1').gene_reaction_rule = 'BSU06420 or BSU06430'
m_model.reactions.get_by_id('ANS').gene_reaction_rule = 'BSU00750 or BSU22680'
m_model.reactions.get_by_id('PRFGS_1').gene_reaction_rule = 'BSU06480 or BSU06470'
m_model.reactions.get_by_id('RNDR1').gene_reaction_rule = 'BSU17380 and BSU17390'
m_model.reactions.get_by_id('RNDR2').gene_reaction_rule = 'BSU17380 and BSU17390'
m_model.reactions.get_by_id('RNDR3').gene_reaction_rule = 'BSU17380 and BSU17390'
m_model.reactions.get_by_id('RNDR4').gene_reaction_rule = 'BSU17380 and BSU17390'


In [14]:
def frameid_to_str(frameid):
    string = str(frameid)
    string = string[1:len(string)-1]
    return string

def frames_to_strlist(frames):
    strlist = []
    for instance in frames:
        strlist.append(frameid_to_str(instance.frameid))

In [3]:
bsub = pc.select_organism('bsub')
ecoli = pc.select_organism('ecoli')

In [4]:
PROTEINS = bsub.proteins

In [8]:
## Get stoichiometry from homology with ECOLI
ecoli_blast = pd.read_csv('blast_out_bsub_as_ref.txt', sep='\t')
bsub_to_ecoli_dict = dict()
for key, row in ecoli_blast.iterrows():
    if row['ident'] > 0.2:
        bsub_id = row['BSUB_gene']
        ecoli_id = row['ECOLI_gene']
        bsub_to_ecoli_dict[bsub_id] = ecoli_id
bsub_to_ecoli_dict

# BLAST might have found false positive gene hits.
# However, stoichiometry is only used if proteins in complex are the same as well.

In [5]:
filename = 'protein_complexes.txt'
file = open(filename,'w')
CPLX_list = []
genes_all_cplxs = []

protein_complexes_dict = dict()

for cplx in PROTEINS.instances:
    
    cplx_string = frameid_to_str(cplx.frameid)
    string = cplx_string + '\t' + 'default_name' + '\t'
    
    
    genes_of_cplx = pc.PGDB.genes_of_protein(bsub,cplx)
    gene_ids_of_cplx = frames_to_strlist(genes_of_cplx)
    
    genes_of_cplx_comp = []
    grRule_string = ''
    if genes_of_cplx:
        
        for gene in genes_of_cplx:
            gene_string = frameid_to_str(gene)
            if cplx_string not in protein_complexes_dict.keys():
                protein_complexes_dict[cplx_string] = []
            protein_complexes_dict[cplx_string].append(gene_string)
            
            stoich = ''
            
            genes_of_cplx_comp.append(gene_string)
            string = string + gene_string + '(' + stoich + ')' + ' AND '
            grRule_string = grRule_string + gene_string + '(' + stoich + ')' + ' AND '


        string = string[0:len(string)-5]
        grRule_string = grRule_string[0:len(grRule_string)-5]
        
        string = string + '\t' + 'M_protein_recon' + '\n'

        
        file.write(string)

        CPLX_list.append(cplx_string)
        genes_all_cplxs.append(genes_of_cplx_comp)
    
file.close()
protein_complexes_dict

{'CPLX8J2-24': ['BSU06020', 'BSU06030'],
 'BSU08300-MONOMER': ['BSU08300'],
 'BSU09120-MONOMER': ['BSU09120'],
 'CPLX8J2-27': ['BSU22740', 'BSU22760'],
 'CPLX8J2-20': ['BSU33430', 'BSU33440'],
 'CPLX8J2-21': ['BSU28250', 'BSU28260'],
 'CPLX8J2-22': ['BSU14890', 'BSU14900', 'BSU14910', 'BSU14920'],
 'CPLX8J2-25': ['BSU36850', 'BSU36860', 'BSU36870'],
 'BSU26410-MONOMER': ['BSU26410'],
 'BSU21329-MONOMER': ['BSU21329'],
 'CPLX8J2-28': ['BSU36640', 'BSU36650', 'BSU36660'],
 'BSU28720-MONOMER': ['BSU28720'],
 'BSU40800-MONOMER': ['BSU40800'],
 'BSU40870-MONOMER': ['BSU40870'],
 'BSU38250-MONOMER': ['BSU38250'],
 'BSU17780-MONOMER': ['BSU17780'],
 'BSU39970-MONOMER': ['BSU39970'],
 'BSU25960-MONOMER': ['BSU25960'],
 'BSU30170-MONOMER': ['BSU30170'],
 'BSU36360-MONOMER': ['BSU36360'],
 'BSU06440-MONOMER': ['BSU06440'],
 'BSU38430-MONOMER': ['BSU38430'],
 'BSU30660-MONOMER': ['BSU30660'],
 'BSU34370-MONOMER': ['BSU34370'],
 'BSU00920-MONOMER': ['BSU00920'],
 'CPLX8J2-23': ['BSU28300', 'BSU283

### enzyme_reaction_association.txt

In [6]:
filename = 'enzyme_reaction_association.txt'
prot_cplx_filename = 'protein_complexes.txt'

file = open(filename,'w')
prot_cplx_file = open(prot_cplx_filename,'a')

standard_gene_length = 8

enz_rxn_assoc_list = []
artificial_cplxs = []
artificial_id = 0;
for reaction in m_model.reactions:
    if not (reaction.id[0:3] == 'EX_') and not (reaction.id[0:3] == 'DM_'):
        # Skip empty rules, these reactions are assigned to CPLX_dummy
        if not reaction.gene_reaction_rule:
            print reaction.id, ' has no rule'
            continue
            
        string = str(reaction.id) + '\t' 

        rule_string = str(reaction.gene_reaction_rule)
            
        if rule_string:
            rule_string = rule_string.replace('(','')
            rule_string = rule_string.replace(')','')
            rule_list = rule_string.split(' or ')
            enz_rxn_assoc = []

            reaction_cplx_list = []
            for rule in rule_list:
                rule_gene_list = rule.split(' and ')

                for index in range(0,len(genes_all_cplxs)-1):
                    ref_rule = genes_all_cplxs[index]

                    if set(ref_rule) == set(rule_gene_list):
                        rule_cplx = CPLX_list[index]
                        reaction_cplx_list.append(rule_cplx)

            enz_rxn_assoc_list.append(reaction_cplx_list)
            string = str(reaction.id) + '\t'

            if reaction_cplx_list:
                for cplx in reaction_cplx_list:
                    string = string + cplx + ' OR '
            else:
                for rule in rule_list:
                    stoichiometry_string = ''
                    if len(rule) == standard_gene_length:
                        artificial_cplx = rule + '-MONOMER'
                        stoichiometry_string = rule + '(1)'
                    else:
                        artificial_id = artificial_id + 1
                        artificial_cplx = 'CPLX000-' + str(artificial_id)
                        cplx_gene_list = rule.split(' and ')

                        for gene in cplx_gene_list:
                            stoichiometry_string = stoichiometry_string + gene + '()' + ' AND '
                        stoichiometry_string = stoichiometry_string[0:len(stoichiometry_string)-5]
                    string = string + artificial_cplx + ' OR '
                    
                    if artificial_cplx not in artificial_cplxs:
                        artificial_cplxs.append(artificial_cplx)
                        prot_cplx_file.write(artificial_cplx + '\t' + 'default_name' + '\t' +
                                             stoichiometry_string + '\t' + 'M_protein_recon' + '\n')
            
        string = string[0:len(string)-4]
        string = string + '\n'
        file.write(string)
    
file.close() 
prot_cplx_file.close()
artificial_cplxs = list(set(artificial_cplxs))



2HXMPt6  has no rule
2PGLYCt6  has no rule
2PGt6  has no rule
3AMPt6  has no rule
3CMPt6  has no rule
3GMPt6  has no rule
3PGt6  has no rule
3UMPt6  has no rule
5MTRt2  has no rule
6PGCt6  has no rule
ABTt_1  has no rule
ACACt2  has no rule
ACMANAt2  has no rule
ACNAMt2  has no rule
ACNPLYS  has no rule
ACt2r  has no rule
ADEt2  has no rule
ALLTNt2r  has no rule
ALLTAHr  has no rule
AMPt6  has no rule
ARABDI  has no rule
ARAB_Dt  has no rule
ARGKr  has no rule
ARGPt6  has no rule
ASNt2r  has no rule
ASPt2r  has no rule
ATPM  has no rule
BIOMASS_BS_10  has no rule
BTDt6_RR  has no rule
CHORt2  has no rule
CITRt2r  has no rule
CMPt6  has no rule
CO2t  has no rule
CYSGLYt2ir  has no rule
CYSTabc  has no rule
CYSabc  has no rule
DEXTRINt2  has no rule
DHAt  has no rule
DJENKabc  has no rule
DTMPt6  has no rule
ETHAAL  has no rule
ETOHt3  has no rule
F6Pt6_2  has no rule
FOLt  has no rule
FORAMD  has no rule
FRTT  has no rule
G1Pt6_2  has no rule
G3POA_BS  has no rule
G5SADs  has no rule
G6

## TUs_from_bsubcyc.txt

In [7]:
## Get rho dependence
rho_dependent_TUs = []
for terminator in bsub.rho_independent_terminators.instances:
    terminator_data = pc.PToolsFrame.PFrame.get_frame_data(terminator)
    terminator_id = frameid_to_str(terminator.frameid)
    TU_list = terminator_data.component_of
    
    for tu in TU_list:
        tu_id = frameid_to_str(tu)
        tu_id = tu_id.replace('-','_') 
        if tu_id not in rho_dependent_TUs:
            rho_dependent_TUs.append(tu_id)

In [8]:
# Get sigma-promoterBOX dict
promoterBOX_to_sigma_dict = dict()
for sigma in bsub.sigma_factors.instances:
    sigma_data = pc.PToolsFrame.PFrame.get_frame_data(sigma)
    sigma_id = frameid_to_str(sigma_data.frameid)
    promoterBOXes = sigma_data.recognized_promoters
    if promoterBOXes:
        for promoterBOX in promoterBOXes:
            promoterBOX_id = frameid_to_str(promoterBOX)
            if promoterBOX_id in promoterBOX_to_sigma_dict.keys():
                promoterBOX_to_sigma_dict[promoterBOX_id].append(sigma_id)
            else:
                promoterBOX_to_sigma_dict[promoterBOX_id] = sigma_id

In [9]:
TU_to_sigma_dict = dict()

# Get promoter - promoterBOX dict
promoter_to_promoterBOX_dict = dict()

for promoter in bsub.promoters.instances:
    promoter_data = pc.PToolsFrame.PFrame.get_frame_data(promoter)
    promoter_id = frameid_to_str(promoter.frameid)
    promoterBOXes = promoter_data.promoter_boxes
    if promoterBOXes:
        promoterBOX = frameid_to_str(promoterBOXes[0])
        TUs = promoter_data.component_of
        if TUs:
            for TU in TUs:
                TU_id = frameid_to_str(TU)
                TU_id = tu_id = TU_id.replace('-','_') 
                if TU_id in TU_to_sigma_dict.keys():
                    continue
                else:
                    try:
                        TU_to_sigma_dict[TU_id] = promoterBOX_to_sigma_dict[promoterBOX]
                    except:
                        TU_to_sigma_dict[TU_id] = 'BSU25200-MONOMER' # No sigma information, assume RpoD
TU_to_sigma_dict

{'CHROM_1_0': 'BSU25200-MONOMER',
 'CHROM_1_1': 'BSU00980-MONOMER',
 'CHROM_1_10': 'BSU15320-MONOMER',
 'CHROM_1_11': 'BSU04730-MONOMER',
 'CHROM_1_12': 'BSU25200-MONOMER',
 'CHROM_1_13': 'BSU25200-MONOMER',
 'CHROM_1_14': 'BSU25200-MONOMER',
 'CHROM_1_15': 'BSU15320-MONOMER',
 'CHROM_1_16': 'BSU25200-MONOMER',
 'CHROM_1_17': 'BSU25200-MONOMER',
 'CHROM_1_18': 'BSU34200-MONOMER',
 'CHROM_1_19': 'BSU25200-MONOMER',
 'CHROM_1_2': 'BSU25200-MONOMER',
 'CHROM_1_20': 'MONOMER8J2-6',
 'CHROM_1_21': 'BSU00980-MONOMER',
 'CHROM_1_22': 'BSU25200-MONOMER',
 'CHROM_1_23': 'BSU25200-MONOMER',
 'CHROM_1_24': 'MONOMER8J2-6',
 'CHROM_1_25': 'BSU25200-MONOMER',
 'CHROM_1_26': 'BSU25200-MONOMER',
 'CHROM_1_27': 'BSU25200-MONOMER',
 'CHROM_1_28': 'BSU25200-MONOMER',
 'CHROM_1_29': 'BSU25200-MONOMER',
 'CHROM_1_3': 'BSU15320-MONOMER',
 'CHROM_1_30': 'BSU25200-MONOMER',
 'CHROM_1_31': 'BSU16470-MONOMER',
 'CHROM_1_32': 'BSU25200-MONOMER',
 'CHROM_1_33': 'BSU25200-MONOMER',
 'CHROM_1_34': 'BSU25200-MONOMER

In [10]:
from Bio import SeqIO

filename = 'TUs_from_bsubcyc.txt'
file = open(filename,'w')

gb_filename = 'NC_000964.gb'   
gb_file = SeqIO.read(gb_filename, 'gb')
full_seq = str(gb_file.seq)
element_types={'CDS', 'rRNA','tRNA', 'ncRNA'}

head_string = 'TU_id' + '\t' + 'start' + '\t' + 'stop' + '\t' + 'tss' + '\t' + 'strand' + '\t' + 'rho_dependent' + '\t' + 'sigma' + '\n'
file.write(head_string)
TUs = bsub.transcription_units
for tu_PFrame in TUs.instances:
    tu_data = pc.PToolsFrame.PFrame.get_frame_data(tu_PFrame)
    
    ## Start and stop
    positions = []
    for gene_fid in tu_data.components:
        if 'BSU' in gene_fid:
            gene_PFrame = pc.PToolsFrame.PFrame(gene_fid,bsub,getFrameData=False, isClass=False)
            gene_data = pc.PToolsFrame.PFrame.get_frame_data(gene_PFrame)
            positions.append(gene_data.left_end_position)
            positions.append(gene_data.right_end_position)
            gene_id = gene_fid.replace('BSU','BSU_')
    if positions:
        tu_start = min(positions)
        tu_stop = max(positions)
    else:
        tu_start = 0
        tu_stop = 0
    
    tu_tss = tu_stop

    ## ID
    tu_id = frameid_to_str(tu_data.frameid)
    tu_id = tu_id.replace('-','_')
    
    ## Sigma
    try:
        sigma = TU_to_sigma_dict[tu_id]
    except:
        sigma = 'BSU25200-MONOMER' # No sigma information, assume RpoD
    
    ## Rho
    rho_dependence = 'False' if tu_id in rho_dependent_TUs else 'True'
    
    ## Strand    
    gene_id = gene_id[1:len(gene_id)-1] ## Only use one gene. The others should have the same direction.
    for feature in gb_file.features:
        if feature.type not in element_types or 'pseudo' in feature.qualifiers:
            continue
        if feature.qualifiers["locus_tag"][0] == gene_id:
            strand = '+' if feature.location.strand == 1 else '-'
            
    ##
    tu_id = tu_id + '_from_' + sigma
    string = str(tu_id) + '\t' + str(tu_start) + '\t' + str(tu_stop) + '\t' + str(tu_tss) + '\t' + str(strand) + '\t' + rho_dependence + '\t' + sigma + '\n'
    
    file.write(string)
    
file.close()

In [11]:
tu_data

0,1
citations,"[u'12823818', u'17962296']"
comment,[u'Experimental information from DBTBS: Northern blotting (5.3 kb transcript)']
common_name,pucABCDE
components,"[u'|BS8J2-422|', u'|PM8J2-611|', u'|TERM8J2-736|', u'|BSU32470|', u'|BSU32480|', u'|BSU32490|', u'|BSU32500|', u'|BSU32510|']"
creation_date,3486920276
creator,|taltman|
frameid,|TU8J2-467|
key_slots,|COMMON-NAME|
names,[u'pucABCDE']
pgdb,


## trna_to_codon dictionary

In [12]:
DNA_to_codon_table = {'TTT': 'F',
 'TTC': 'F',
 'TTA': 'L',
 'TTG': 'L',
 'TCT': 'S',
 'TCC': 'S',
 'TCA': 'S',
 'TCG': 'S',
 'TAT': 'Y',
 'TAC': 'Y',
 'TAA': '*',
 'TAG': '*',
 'TGT': 'C',
 'TGC': 'C',
 'TGA': '*',
 'TGG': 'W',
 'CTT': 'L',
 'CTC': 'L',
 'CTA': 'L',
 'CTG': 'L',
 'CCT': 'P',
 'CCC': 'P',
 'CCA': 'P',
 'CCG': 'P',
 'CAT': 'H',
 'CAC': 'H',
 'CAA': 'Q',
 'CAG': 'Q',
 'CGT': 'R',
 'CGC': 'R',
 'CGA': 'R',
 'CGG': 'R',
 'ATT': 'I',
 'ATC': 'I',
 'ATA': 'I',
 'ATG': 'M',
 'ACT': 'T',
 'ACC': 'T',
 'ACA': 'T',
 'ACG': 'T',
 'AAT': 'N',
 'AAC': 'N',
 'AAA': 'K',
 'AAG': 'K',
 'AGT': 'S',
 'AGC': 'S',
 'AGA': 'R',
 'AGG': 'R',
 'GTT': 'V',
 'GTC': 'V',
 'GTA': 'V',
 'GTG': 'V',
 'GCT': 'A',
 'GCC': 'A',
 'GCA': 'A',
 'GCG': 'A',
 'GAT': 'D',
 'GAC': 'D',
 'GAA': 'E',
 'GAG': 'E',
 'GGT': 'G',
 'GGC': 'G',
 'GGA': 'G',
 'GGG': 'G'}

tRNA_to_codon_table = dict()
for key in DNA_to_codon_table:
    aa_id = DNA_to_codon_table[key]
    aa_id = seq3(aa_id)
    key_tRNA = Seq.transcribe(key)
    tRNA_to_codon_table[key_tRNA] = aa_id
    

tRNA_to_codon_table

{'AAA': 'Lys',
 'AAC': 'Asn',
 'AAG': 'Lys',
 'AAU': 'Asn',
 'ACA': 'Thr',
 'ACC': 'Thr',
 'ACG': 'Thr',
 'ACU': 'Thr',
 'AGA': 'Arg',
 'AGC': 'Ser',
 'AGG': 'Arg',
 'AGU': 'Ser',
 'AUA': 'Ile',
 'AUC': 'Ile',
 'AUG': 'Met',
 'AUU': 'Ile',
 'CAA': 'Gln',
 'CAC': 'His',
 'CAG': 'Gln',
 'CAU': 'His',
 'CCA': 'Pro',
 'CCC': 'Pro',
 'CCG': 'Pro',
 'CCU': 'Pro',
 'CGA': 'Arg',
 'CGC': 'Arg',
 'CGG': 'Arg',
 'CGU': 'Arg',
 'CUA': 'Leu',
 'CUC': 'Leu',
 'CUG': 'Leu',
 'CUU': 'Leu',
 'GAA': 'Glu',
 'GAC': 'Asp',
 'GAG': 'Glu',
 'GAU': 'Asp',
 'GCA': 'Ala',
 'GCC': 'Ala',
 'GCG': 'Ala',
 'GCU': 'Ala',
 'GGA': 'Gly',
 'GGC': 'Gly',
 'GGG': 'Gly',
 'GGU': 'Gly',
 'GUA': 'Val',
 'GUC': 'Val',
 'GUG': 'Val',
 'GUU': 'Val',
 'UAA': 'Ter',
 'UAC': 'Tyr',
 'UAG': 'Ter',
 'UAU': 'Tyr',
 'UCA': 'Ser',
 'UCC': 'Ser',
 'UCG': 'Ser',
 'UCU': 'Ser',
 'UGA': 'Ter',
 'UGC': 'Cys',
 'UGG': 'Trp',
 'UGU': 'Cys',
 'UUA': 'Leu',
 'UUC': 'Phe',
 'UUG': 'Leu',
 'UUU': 'Phe'}

In [13]:
def get_key(my_dict, val): 
    key_list = []
    for key, value in my_dict.items(): 
         if val == value: 
                key_list.append(key)
    return key_list



In [14]:
trna_to_codon = dict()
for tRNA_PFrame in bsub.tRNAs.instances:
    tRNA_data = pc.PToolsFrame.PFrame.get_frame_data(tRNA_PFrame)
    
    tRNA_id = str(tRNA_data.frameid)
    tRNA_id = tRNA_id.replace('|','')
    tRNA_id = tRNA_id.replace('-tRNA','')
    tRNA_id = tRNA_id.replace('TRNA','tRNA')
    
    aa_id = tRNA_data.common_name
    aa_id = aa_id[len(aa_id)-3:len(aa_id)]
    
    codon_list = get_key(tRNA_to_codon_table, aa_id)
    
    trna_to_codon[tRNA_id] = codon_list
    
trna_to_codon
    

{'BSU_tRNA_1': ['UUU', 'UUC'],
 'BSU_tRNA_10': ['AUG'],
 'BSU_tRNA_11': ['GAA', 'GAG'],
 'BSU_tRNA_12': ['GUU', 'GUC', 'GUG', 'GUA'],
 'BSU_tRNA_13': ['ACC', 'ACA', 'ACG', 'ACU'],
 'BSU_tRNA_14': ['AAG', 'AAA'],
 'BSU_tRNA_15': ['CUU', 'CUG', 'CUA', 'CUC', 'UUG', 'UUA'],
 'BSU_tRNA_16': ['GGU', 'GGG', 'GGA', 'GGC'],
 'BSU_tRNA_17': ['CUU', 'CUG', 'CUA', 'CUC', 'UUG', 'UUA'],
 'BSU_tRNA_18': ['AGG', 'AGA', 'CGA', 'CGG', 'CGC', 'CGU'],
 'BSU_tRNA_19': ['CCG', 'CCA', 'CCU', 'CCC'],
 'BSU_tRNA_2': ['GAU', 'GAC'],
 'BSU_tRNA_20': ['GCA', 'GCG', 'GCC', 'GCU'],
 'BSU_tRNA_21': ['AUG'],
 'BSU_tRNA_22': ['GAU', 'GAC'],
 'BSU_tRNA_23': ['AAC', 'AAU'],
 'BSU_tRNA_24': ['ACC', 'ACA', 'ACG', 'ACU'],
 'BSU_tRNA_25': ['GGU', 'GGG', 'GGA', 'GGC'],
 'BSU_tRNA_26': ['AGG', 'AGA', 'CGA', 'CGG', 'CGC', 'CGU'],
 'BSU_tRNA_27': ['CCG', 'CCA', 'CCU', 'CCC'],
 'BSU_tRNA_28': ['GCA', 'GCG', 'GCC', 'GCU'],
 'BSU_tRNA_29': ['AAC', 'AAU'],
 'BSU_tRNA_3': ['GAA', 'GAG'],
 'BSU_tRNA_30': ['AGC', 'AGU', 'UCU', 'UCG'

In [15]:
tRNA_data

0,1
common_name,tRNA-Leu
creation_date,3465237858
creator,|keseler|
frameid,|BSU_TRNA_57-tRNA|
gene,[u'|BSU_TRNA_57|']
modified_form,[u'|charged-BSU_TRNA_57-tRNA|']
names,[u'an uncharged tRNA']
overview_node_shape,|TEE|
pgdb,
schema_p,True


## protein_modification.txt

In [16]:
filename = 'protein_modification.txt'
file = open(filename,'w')
file.write('Modified_enzyme' + '\t' + 'Core_enzyme' + '\t' + 'Modifications' + '\t' + 'Source' + '\n')
RXNS = bsub.enzymatic_reactions

add_cofactors_list = []
cofactors = {'|ZN+2|':'zn2','|CU+2|':'cu2','|NAD|':'nad','|FAD|':'fad','|MG+2|':'mg2','|CA+2|':'ca2',
             '|Reduced-2Fe-2S-Ferredoxins|':'2fe2s','|Oxidized-ferredoxins|':'2fe2s',
             '|Reduced-ferredoxins|':'2fe2s','|NADP|':'nad','|PYRIDOXAL_PHOSPHATE|':'pydx5p',
             '|ADENOSYLCOBALAMIN|':'adocbl','|BIOTIN|':'btn','|CPD-7|':'4fe4s','|COB-I-ALAMIN|':'cbl1',
             '|Hemes|':'heme','|MN+2|':'mn2','|HCO3|':'hco3','|CPD-6|':'2fe2s','|FMN|':'fmn',
             '|TETRA-H-BIOPTERIN|':'tbpt'}

cplx_cofactor_dict = dict()
for rxn in RXNS.instances:
    rxn_data = pc.PToolsFrame.PFrame.get_frame_data(rxn)
    cplx = rxn_data.enzyme
    cofactors_of_rxn = pc.PGDB.cofactors_and_pgroups_of_enzrxn(bsub,rxn)
    if cofactors_of_rxn:
        cplx = str(cplx[1:len(cplx)-1])
        #print rxn.frameid, cplx, cofactors_of_rxn
        if (cplx not in cplx_cofactor_dict.keys()):
            cplx_cofactor_dict[cplx] = []
        for cofactor in cofactors_of_rxn:
            try:
                if (cofactors[cofactor] not in cplx_cofactor_dict[cplx]):
                    cplx_cofactor_dict[cplx].append(cofactors[cofactor])
            except:
                add_cofactors_list.append(cofactor)

for cplx in cplx_cofactor_dict.keys():
    cofactor_string = ''
    mod_string = ''
    for cofactor in cplx_cofactor_dict[cplx]:
        cofactor_string = cofactor_string + cofactor + '()' + ' AND '
        mod_string = mod_string + '_mod_' + cofactor
    cofactor_string = cofactor_string[0:len(cofactor_string)-5]
    
    string = cplx + mod_string +'\t' + cplx + '\t' + cofactor_string + '\t' + 'M_protein_recon' + '\n'
    
    file.write(string)
file.close()

print add_cofactors_list

[]


In [17]:
filename = 'protein_modification.txt'
file = open(filename,'a')

cplx_cofactor_dict = dict()
RXNS = bsub.reactions
for rxn in RXNS.instances:
    
    rxn_data = pc.PToolsFrame.PFrame.get_frame_data(rxn)
    
    substrates = rxn_data.substrates
    rxn_cofactors = list(set(cofactors) & set(substrates))
    
    # print rxn_data.frameid, substrates
    if any(rxn_cofactors):
        cplx_list = pc.PGDB.enzymes_of_reaction(bsub,rxn)
        for cplx in cplx_list:
            cplx = str(cplx[1:len(cplx)-1])
            
            if (cplx not in cplx_cofactor_dict.keys()):
                cplx_cofactor_dict[cplx] = []
                
            for cofactor in rxn_cofactors:
                
                if (cofactors[cofactor] not in cplx_cofactor_dict[cplx]):
                    cplx_cofactor_dict[cplx].append(cofactors[cofactor])
                    
for cplx in cplx_cofactor_dict.keys():
    cofactor_string = ''
    mod_string = ''
    for cofactor in cplx_cofactor_dict[cplx]:
        cofactor_string = cofactor_string + cofactor + '()' + ' AND '
        mod_string = mod_string + '_mod_' + cofactor
    cofactor_string = cofactor_string[0:len(cofactor_string)-5]
    
    string = cplx + mod_string +'\t' + cplx + '\t' + cofactor_string + '\t' + 'M_protein_recon' + '\n'
    
    file.write(string)
file.close()


## Cleaved methionine

In [18]:
## Methionine cleaved feature dictionary
met_cleaved_features = []
for instance in bsub.amino_acid_sites.instances:
    instance_data = pc.PToolsFrame.PFrame.get_frame_data(instance)
    try :
        if instance.comment[0] == 'UniProt: Removed.':
            met_cleaved_features.append(instance.frameid)
    except:
        continue

In [19]:
met_cleaved_features

[u'|FTR8J2-35248|',
 u'|FTR8J2-32249|',
 u'|FTR8J2-29216|',
 u'|FTR8J2-26701|',
 u'|FTR8J2-25481|',
 u'|FTR8J2-23046|',
 u'|FTR8J2-21256|',
 u'|FTR8J2-34650|',
 u'|FTR8J2-31524|',
 u'|FTR8J2-28683|',
 u'|FTR8J2-26548|',
 u'|FTR8J2-25263|',
 u'|FTR8J2-22388|',
 u'|FTR8J2-21156|',
 u'|FTR8J2-36869|',
 u'|FTR8J2-34419|',
 u'|FTR8J2-30976|',
 u'|FTR8J2-28388|',
 u'|FTR8J2-26141|',
 u'|FTR8J2-24642|',
 u'|FTR8J2-22216|',
 u'|FTR8J2-20863|',
 u'|FTR8J2-36398|',
 u'|FTR8J2-33449|',
 u'|FTR8J2-30558|',
 u'|FTR8J2-27783|',
 u'|FTR8J2-25789|',
 u'|FTR8J2-24103|',
 u'|FTR8J2-21736|',
 u'|FTR8J2-15191|',
 u'|FTR8J2-35988|',
 u'|FTR8J2-32892|',
 u'|FTR8J2-29972|',
 u'|FTR8J2-27491|',
 u'|FTR8J2-25525|',
 u'|FTR8J2-23417|',
 u'|FTR8J2-21553|',
 u'|FTR8J2-35529|',
 u'|FTR8J2-35509|',
 u'|FTR8J2-32260|',
 u'|FTR8J2-29286|',
 u'|FTR8J2-26795|',
 u'|FTR8J2-25506|',
 u'|FTR8J2-23238|',
 u'|FTR8J2-21381|',
 u'|FTR8J2-5067|',
 u'|FTR8J2-34972|',
 u'|FTR8J2-31704|',
 u'|FTR8J2-28931|',
 u'|FTR8J2-26674|',
 

In [20]:
met_cleaved_prots = []
for protein in bsub.proteins.instances:
    protein_id = frameid_to_str(protein.frameid)
    gene_id = protein_id.split('-MONOMER')[0]
    try:
        features = protein.features
        if list(set(features) & set(met_cleaved_features)):
            met_cleaved_prots.append(gene_id)
    except:
        continue

In [21]:
string = ''
i = 0
for prot in met_cleaved_prots:
    i = i + 1
    string = string + "'" + prot + "'" + ','
    if not i%5:
        string = string + '\n'
print string

'BSU17410','BSU10790','BSU32150','BSU17460','BSU06480',
'BSU39920','BSU06180','BSU16100','BSU01390','BSU28230',
'BSU16150','BSU31390','BSU15990','BSU01440','BSU30540',
'BSU01290','BSU18000','BSU06850','BSU33540','BSU13900',
'BSU03520','BSU28310','BSU02890','BSU25020','BSU06150',
'BSU01050','BSU00730','BSU19530','BSU01410','BSU23860',
'BSU34790','BSU33940','BSU01100','BSU04730','BSU30190',
'BSU23040','BSU03130','BSU25410','BSU32710','BSU32890',
'BSU28440','BSU29120','BSU28500','BSU01340','BSU01020',
'BSU16250','BSU23470','BSU30650','BSU38550','BSU33910',
'BSU29660','BSU28870','BSU07000','BSU06030','BSU16170',
'BSU16690','BSU01310','BSU01150','BSU01700','BSU00510',
'BSU01200','BSU01040','BSU30120','BSU14580','BSU33400',
'BSU04190','BSU19550','BSU12290','BSU19240','BSU01120',
'BSU38140','BSU21870','BSU37660','BSU00110','BSU18030',
'BSU25480','BSU40030','BSU13180','BSU01780','BSU35000',
'BSU28430','BSU08820','BSU31350','BSU01250','BSU16500',
'BSU04400','BSU07830','BSU36830','BSU27320','BSU

## peptide_compartment_and_pathways

In [22]:
membrane_monomers = []
for protein in bsub.proteins.instances:
    protein_id = frameid_to_str(protein.frameid)
    protein_data = pc.PToolsFrame.PFrame.get_frame_data(protein)
    
    locations = protein_data.locations
    if locations is not None:
        location = frameid_to_str(locations[0])
        if location == 'CCI-PM-BAC-POS-GP':
            protein_id = protein_id.split('-MONOMER')[0]
            membrane_monomers.append(protein_id)

In [23]:
filename = 'peptide_compartment_and_pathways.txt'
file = open(filename,'w')

file.write('Complex' + '\t' + 'Complex_compartment' + '\t' + 'Protein' + '\t' + 'Protein_compartment' +
           '\t' + 'translocase_pathway''\n')

for protein in bsub.proteins.instances:
    protein_id = frameid_to_str(protein.frameid)
    protein_data = pc.PToolsFrame.PFrame.get_frame_data(protein)
    
    genes_of_cplx = pc.PGDB.genes_of_protein(bsub,protein)
    if (genes_of_cplx):
        test_gene = frameid_to_str(genes_of_cplx[0])
        if (test_gene in membrane_monomers):
            for gene in protein_complexes_dict[protein_id]:
                file.write(protein_id + '\t' + 'Inner_Membrane' + '\t' + gene + '(1)' + '\t' +
                               'Inner_Membrane' + '\t' + 'y' + '\n')
file.close()

In [28]:
bsub.methyl_transferases

In [24]:
for protein in bsub.proteins.instances:
    protein_id = frameid_to_str(protein.frameid)
    protein_data = pc.PToolsFrame.PFrame.get_frame_data(protein)
    if protein_id == 'BSU06030-MONOMER':
        break
protein_data

0,1
citations,"[u'21856851', u'22268681']"
comment,"[u'16.6: Maintain\n\n16.13: Shape\n\n10/23/2009 (keseler) Information from the deleted enzymatic-reaction frame, upon complex formation:\n Basis for assignment: AUTOMATED-NAME-MATCH .']"
common_name,chaperonin large subunit
component_of,[u'|CPLX8J2-24|']
creation_date,3465237858
creator,|keseler|
dblinks,"{u'|PROSITE|': [u'PS00296', u'|IN-FAMILY|', u'|kothari|', 3698001710, None, None], u'|DIP|': [u'DIP-58540N', None, u'|kothari|', 3698001659, None, None], u'|PFAM|': [u'PF00118', u'|IN-FAMILY|', u'|kothari|', 3698001734, None, None], u'|PRIDE|': [u'P28598', None, u'|kothari|', 3698001704, None, None], u'|INTERPRO|': [u'IPR027413', u'|IN-FAMILY|', u'|kothari|', 3698001740, None, None], u'|PRINTS|': [u'PR00298', u'|IN-FAMILY|', u'|kothari|', 3698001682, None, None], u'|PID|': [u'2632916', None, u'|keseler|', 3465237843, None, None], u'|UNIPROT|': [u'P28598', None, u'|keseler|', 3465237843, None, None], u'|HSSP|': [u'1IOK', None, u'|keseler|', 3465237843, None, None], u'|PROTEINMODELPORTAL|': [u'P28598', None, u'|kothari|', 3698001748, None, None]}"
features,"[u'|FTR8J2-27788|', u'|FTR8J2-27787|', u'|FTR8J2-27786|', u'|FTR8J2-27785|', u'|FTR8J2-27784|', u'|FTR8J2-27783|', u'|FTR8J2-18331|']"
frameid,|BSU06030-MONOMER|
gene,[u'|BSU06030|']


In [25]:
for instance in ecoli.transcription_units.instances:
    instance_data = pc.PToolsFrame.PFrame.get_frame_data(instance)
    if instance_data.frameid == '|TU0_14388|':
        break

In [26]:
instance_data

0,1
citations,[u':EV-COMP-AINF:3371579817:kr']
components,"[u'|G6270|', u'|EG11657|']"
creation_date,3371579817
creator,|kr|
frameid,|TU0-12959|
key_slots,|COMMON-NAME|
pgdb,
schema_p,True
synonym_slots,"[u'|ABBREV-NAME|', u'|SYNONYMS|']"
