In [1]:
%load_ext autoreload
%autoreload 2

## KEGGapFiller Example

### Initialisation

In [2]:
from refinegems.classes.gapfill import KEGGapFiller
from refinegems.utility.io import load_model

# Get model file loaded with COBRA (for missing reactions) & libSBML
modelpath = './test_files/JCSC1435.xml' # A link to this model will be added as soon as it is published
model = load_model(modelpath,'libsbml')
cmodel = load_model(modelpath,'cobra')

# Initialise GapFiller subclass to be used with required parameters
gfk = KEGGapFiller('sha')

* 'underscore_attrs_are_private' has been removed


### Missing genes

In [3]:
# Find missing genes
gfk.find_missing_genes(model)

# Get/show missing genes
gfk.missing_genes

100%|██████████| 1987/1987 [42:40<00:00,  1.29s/it]


Unnamed: 0,orgid:locus,locus_tag,kegg.orthology,ec-code,ncbiprotein,uniprot
0,sha:pSHaeA01,pSHaeA01,[K11210],[2.5.1.-],BAE05988,[Q4L2Y9]
1,sha:pSHaeA02,pSHaeA02,,,BAE05989,[Q4L2Y8]
2,sha:pSHaeA03,pSHaeA03,,,BAE05990,[Q4L2Y7]
3,sha:pSHaeB01,pSHaeB01,[K00561],[2.1.1.184],BAE05991,[Q4L2Y6]
4,sha:pSHaeB02,pSHaeB02,,,BAE05992,[Q4L2Y5]
...,...,...,...,...,...,...
1982,sha:SH2674,SH2674,[K03501],[2.1.1.170],BAE05983,[Q4L2Z4]
1983,sha:SH2675,SH2675,[K03495],,BAE05984,[Q4L2Z3]
1984,sha:SH2676,SH2676,[K03650],[3.6.-.-],BAE05985,[Q4L2Z2]
1985,sha:SH2677,SH2677,[K03536],[3.1.26.5],BAE05986,[Q4L2Z1]


### Missing reactions

In [5]:
# Find missing reactions
gfk.find_missing_reactions(cmodel)

# Get/show missing reactions
gfk.missing_reactions

100%|██████████| 11/11 [00:43<00:00,  3.93s/it]


Unnamed: 0,ec-code,ncbiprotein,id,equation,reference,is_transport,via,add_to_GPR
0,2.3.1.313,[BAE04147],R13186,,{'brenda': ['2.3.1.313']},,KEGG,
1,2.4.99.28,"[BAE04395, BAE04771]",R06178,,"{'kegg.pathway': ['ec00550'], 'brenda': ['2.4....",,KEGG,
2,2.4.99.28,"[BAE04395, BAE04771]",R06179,,"{'kegg.pathway': ['ec00550'], 'brenda': ['2.4....",,KEGG,
3,2.7.10.3,[BAE03698],R02584,,{'brenda': ['2.7.10.3']},,KEGG,
4,2.7.2.18,[BAE04998],R12793,,{'brenda': ['2.7.2.18']},,KEGG,
...,...,...,...,...,...,...,...,...
3616,7.5.2.4,"[BAE04398, BAE05568]",MNXR115570,1 MNXM1@MNXD1 + 1 MNXM21999@MNXD2 + 1 MNXM4033...,seedR:rxn18688,T,MetaNetX,
3617,7.5.2.4,"[BAE04398, BAE05568]",MNXR121346,1 MNXM12982@MNXD1 + 1 MNXM1@MNXD2 + 1 MNXM4033...,seedR:rxn41181,T,MetaNetX,
3618,7.5.2.4,"[BAE04398, BAE05568]",MNXR121347,1 MNXM163615@MNXD1 + 1 MNXM1@MNXD2 + 1 MNXM403...,seedR:rxn43450,T,MetaNetX,
3619,7.5.2.4,"[BAE04398, BAE05568]",MNXR123981,1 MNXM163613@MNXD1 + 1 MNXM1@MNXD2 + 1 MNXM403...,seedR:rxn42597,T,MetaNetX,


In [16]:
gfk.missing_reactions.to_csv('./test_files/kegg_missing_reactions.tsv', sep='\t', index=False)

### Fill model

In [7]:
# Fill model
model = gfk.fill_model(model)

Adding genes to model: 100%|██████████| 18/18 [00:00<00:00, 3138.54it/s]


AttributeError: 'NoneType' object has no attribute 'getPlugin'

### Statistics

In [8]:
# Get raw statistics
gfk._statistics

{'genes': {'missing (before)': 1987,
  'duplicates': 78,
  'added': 18,
  'missing (after)': 0},
 'reactions': {'added (total)': 0, 'failed to build': 0},
 'metabolites': {}}

## GeneGapFiller Example

### Initialisation
#### Get SwissProt DIAMOND database

In [None]:
%load_ext autoreload
%autoreload 2

In [2]:
from refinegems.utility.set_up import download_url

# Get SwissProt
# download_url('SwissProt gapfill', directory='./test_files/test_gapfill')

# On the command line:
# diamond makedb --in ./dev/test_files/test_gapfill/SwissProt.fasta -d ./dev/test_files/test_gapfill/swissprot

* 'underscore_attrs_are_private' has been removed


#### Initialise GeneGapFiller

In [2]:
from refinegems.classes.gapfill import GeneGapFiller
from refinegems.utility.io import load_model

# Get model file loaded with COBRA (for missing reactions) & libSBML
modelpath = './test_files/IMITSC147.xml' # A link to this model will be added as soon as it is published
model = load_model(modelpath,'libsbml')
cmodel = load_model(modelpath,'cobra')

# Get files required for the GeneGapFiller
gffpath = './test_files/IMITSC147_genome.gff'

tfasta = './test_files/IMITSC147_proteins_genome.fasta'
spdb = './test_files/test_gapfill/swissprot.dmnd'
spmap = './test_files/test_gapfill/SwissProt_mapping.tsv'
kwargs = {'outdir':'./test_files/test_gapfill/IMITSC147',
          'sens':'more-sensitive',
          'cov':95.0,
          't':4,
          'pid':90.0}

# Initialise GapFiller subclass to be used with required parameters
gfg = GeneGapFiller()

* 'underscore_attrs_are_private' has been removed


### Missing genes

In [3]:
# Find missing genes
gfg.find_missing_genes(gffpath,model)

# Get/show missing genes
gfg.missing_genes


Unnamed: 0,locus_tag,ncbiprotein,ec-code
0,IMITSC147_000001,extdb:IMITSC147_000001,
1,IMITSC147_000002,extdb:IMITSC147_000002,
2,IMITSC147_000003,extdb:IMITSC147_000003,
3,IMITSC147_000004,extdb:IMITSC147_000004,
4,IMITSC147_000005,extdb:IMITSC147_000005,
...,...,...,...
2439,IMITSC147_002523,extdb:IMITSC147_002523,
2440,IMITSC147_002524,extdb:IMITSC147_002524,
2441,IMITSC147_002525,extdb:IMITSC147_002525,
2442,IMITSC147_002526,,


### Missing reactions

In [4]:
%autoreload 2
# Find missing reactions
gfg.find_missing_reactions(model=cmodel,
                                   fasta=tfasta, 
                                   dmnd_db=spdb,
                                   swissprot_map=spmap,
                                   **kwargs)

# Get/show missing reactions
gfg.missing_reactions

100%|██████████| 4/4 [00:12<00:00,  3.06s/it]


Unnamed: 0,ec-code,ncbiprotein,id,equation,reference,is_transport,via,add_to_GPR
4,1.-.-.-,[extdb:IMITSC147_000502],MNXR108769,1 MNXM1@MNXD1 + 1 MNXM736332@MNXD1 + 1 MNXM738...,rheaR:34567,,MetaNetX,
5,1.-.-.-,[extdb:IMITSC147_000502],MNXR109748,1 MNXM10@MNXD1 + 1 MNXM4978@MNXD1 = 3 MNXM1@MN...,keggR:R05668,,MetaNetX,
6,1.-.-.-,[extdb:IMITSC147_000502],MNXR111597,1 MNXM1094296@MNXD1 + 2 MNXM1@MNXD1 + 2 MNXM73...,keggR:R08014,,MetaNetX,
7,1.-.-.-,[extdb:IMITSC147_000502],MNXR111600,1 MNXM1094296@MNXD1 + 2 MNXM10@MNXD1 + 2 MNXM1...,keggR:R08017,,MetaNetX,
8,1.-.-.-,[extdb:IMITSC147_000502],MNXR111623,1 MNXM1094296@MNXD1 + 2 MNXM10@MNXD1 + 2 MNXM1...,keggR:R08042,,MetaNetX,
...,...,...,...,...,...,...,...,...
3482,7.5.2.4,[extdb:IMITSC147_002172],MNXR121346,1 MNXM12982@MNXD1 + 1 MNXM1@MNXD2 + 1 MNXM4033...,seedR:rxn41181,T,MetaNetX,
3483,7.5.2.4,[extdb:IMITSC147_002172],MNXR121347,1 MNXM163615@MNXD1 + 1 MNXM1@MNXD2 + 1 MNXM403...,seedR:rxn43450,T,MetaNetX,
3484,7.5.2.4,[extdb:IMITSC147_002172],MNXR123981,1 MNXM163613@MNXD1 + 1 MNXM1@MNXD2 + 1 MNXM403...,seedR:rxn42597,T,MetaNetX,
3485,7.5.2.4,[extdb:IMITSC147_002172],MNXR191804,1 MNXM164493@MNXD1 + 1 MNXM1@MNXD2 + 1 MNXM403...,metacycR:TRANS-RXN-321,T,MetaNetX,


In [6]:
gfg.missing_reactions.to_csv('./test_files/gene_missing_reactions.tsv', sep='\t', index=False)

### Fill model

In [7]:
# Fill model
model = gfg.fill_model(model,formula_check='existence')

NameError: name 'gfg' is not defined

### Statistics

In [7]:
# Get raw statistics
gfg._statistics

{'genes': {'missing (before)': 1679,
  'duplicates': 9,
  'added': 76,
  'missing (after)': 0,
  'no locus tag': 0},
 'reactions': {'added (total)': 0,
  'failed to build': 0,
  'no NCBI, no EC': 4,
  'NCBI, no EC': 1369},
 'metabolites': {}}

## BioCycGapFiller example

### Initialisation

In [1]:
from refinegems.classes.gapfill import BioCycGapFiller
from refinegems.utility.io import load_model

# Get model file loaded with COBRA (for missing reactions) & libSBML
modelpath = './test_files/ATCC29970.xml' # A link to this model will be added as soon as it is published
model = load_model(modelpath,'libsbml')
cmodel = load_model(modelpath,'cobra')

# Get files required for the BioCycGapFiller
gffpath = './test_files/ATCC29970_RefSeq.gff'
biocyc_gene_tbl_path = './test_files/ATCC29970_Accession-22Reactions.txt'
biocyc_reacs_tbl_path = './test_files/ATCC29970_biocyc_rxns.txt'
fasta = './test_files/ATCC29970_proteins_genome.fasta'

# Initialise GapFiller subclass to be used with required parameters
gfbc = BioCycGapFiller(biocyc_gene_tbl_path, biocyc_reacs_tbl_path, gffpath)

* 'underscore_attrs_are_private' has been removed


### Missing genes

In [2]:
# Find missing genes
gfbc.find_missing_genes(model)

# Get/show missing genes
gfbc.missing_genes

Unnamed: 0,locus_tag,id,name,ncbiprotein
0,EQ029_01155,OLEATE-HYDRATASE-RXN,oleate hydratase,WP_053030667.1
1,EQ029_06240,TRANS-RXN1WO9-20,nucleotide exchange factor GrpE,WP_011275632.1
2,EQ029_07635,PHOSPHAGLYPSYN-RXN,CDP-diacylglycerol--glycerol-3-phosphate 3-pho...,WP_011275916.1
3,EQ029_03370,RXN-8348,molybdopterin molybdotransferase MoeA,WP_011275102.1
4,EQ029_03775,HEME-OXYGENASE-DECYCLIZING-RXN // RXN-18341 //...,heme oxygenase,WP_011275179.1
...,...,...,...,...
230,EQ029_01095,PYROGLUTAMYL-PEPTIDASE-I-RXN,pyroglutamyl-peptidase I,WP_011274622.1
231,EQ029_00835,TEICHOICSYN3-RXN,CDP-glycerol glycerophosphotransferase family ...,WP_057504685.1
232,EQ029_06820,TRNA-CYTIDYLYLTRANSFERASE-RXN,CCA tRNA nucleotidyltransferase,WP_037558740.1
233,EQ029_08625,RXN-18399 // RXN-18428 // RXN-18430,heme A synthase,WP_011276113.1


### Missing reactions

In [3]:
# Find missing reactions
gfbc.find_missing_reactions(cmodel)

# Get/show missing reactions
gfbc.missing_reactions

Unnamed: 0,add_to_GPR,ec-code,equation,id,ncbiprotein,reference,via
0,[3HAD80],[4.2.1.59],(3R)-3-hydroxyoctanoyl-[acp] -> (2E)-oct-2-e...,4.2.1.59-RXN,[WP_011275247.1],,BioCyc
1,[AMMQLT8],[2.1.1.163],S-adenosyl-L-methionine + demethylmenaquinol-8...,ADOMET-DMK-METHYLTRANSFER-RXN,[WP_011275738.1],,BioCyc
2,"[NTP1, ATPM]","[3.6.1.3, 3.6.1.15]",ATP + H2O -> ADP + phosphate + H+,ATPASE-RXN,[WP_016931380.1],,BioCyc
3,[LDH_D],[1.1.1.28],(R)-lactate + NAD+ <- pyruvate + NADH + H+,DLACTDEHYDROGNAD-RXN,[WP_011274818.1],,BioCyc
4,[DHNAOT4],[2.5.1.74],"all-trans-octaprenyl diphosphate + 1,4-dihydro...",DMK-RXN,[WP_016931372.1],,BioCyc
...,...,...,...,...,...,...,...
233,,[6.1.1.2],1 MNXM11@MNXD1 + 1 MNXM167418@MNXD1 + 1 MNXM72...,MNXR124075,[WP_011276231.1],{'metacyc.reaction': 'TRYPTOPHAN--TRNA-LIGASE-...,MetaNetX
234,,[6.1.1.1],1 MNXM11@MNXD1 + 1 MNXM16655@MNXD1 + 1 MNXM728...,MNXR124080,[WP_011275493.1],{'metacyc.reaction': 'TYROSINE--TRNA-LIGASE-RX...,MetaNetX
235,,[6.3.2.10],1 MNXM1104679@MNXD1 + 1 MNXM1446@MNXD1 + 1 MNX...,MNXR146695,[WP_011275264.1],{'metacyc.reaction': 'UDP-NACMURALGLDAPAALIG-R...,MetaNetX
236,,[5.1.3.14],1 MNXM1101258@MNXD1 = 1 MNXM1104529@MNXD1,MNXR151539,[WP_033079611.1],"{'metacyc.reaction': 'UDPGLCNACEPIM-RXN', 'ori...",MetaNetX


### Fill model

In [4]:
# Fill model
gfbc.fill_model(model)

Adding genes to model: 100%|██████████| 13/13 [00:00<00:00, 8296.71it/s]
Trying to add missing reacs: 100%|██████████| 226/226 [02:29<00:00,  1.51it/s]


Index(['add_to_GPR', 'ec-code', 'equation', 'id', 'ncbiprotein', 'reference',
       'via'],
      dtype='object')


Adding genes to model: 100%|██████████| 88/88 [00:00<00:00, 32306.24it/s]


<Model ATCC29970>

### Statistics

In [5]:
# Get raw statistics
gfbc._statistics

{'genes': {'missing (before)': 235,
  'duplicates': 0,
  'added': 101,
  'missing (after)': 134,
  'missing (unmappable)': 0},
 'reactions': {'added (total)': 183,
  'failed to build': 40,
  'missing (before)': 240,
  'add to GPR (BioCyc)': 11,
  'mapped2MNX': 227,
  'mapped2BiGG': 0,
  'remaining_unmapped': 2},
 'metabolites': {}}