In [1]:
import numpy, pandas, datetime
import cobra, cobra.test

In [2]:
import multiprocessing, multiprocessing.pool
from multiprocessing import Process, Queue

In [3]:
import matplotlib, matplotlib.pyplot
matplotlib.rcParams.update({'font.size':20, 'font.family':'FreeSans', 'xtick.labelsize':30, 'ytick.labelsize':30, 'axes.labelsize':40, 'figure.figsize':(12, 8)})

In [4]:
def growth_coupled_analysis(task):
    
    """
    This function performs the growth-coupled production.
    It takes as input a list as [first_gene_pair_index, second_gene_pair_index, metabolite_of_interest, biomass_reaction_label]
    It gives as output a list as [first_gene_pair_index, second_gene_pair_index, growth, min_production, max_production]
    """
    
    i = task[0]
    j = task[1]
    metabolite_of_interest = task[2]
    biomass_reaction_label = task[3]
    model = task[4]
    
    with model as model:
                
        # KO
        model.genes[i].knock_out()
        model.genes[j].knock_out()
        solution = model.optimize()
        if solution.status == 'optimal':
            ko_growth = solution.objective_value

            # growth-coupled production
            model.objective = metabolite_of_interest
            model.reactions.get_by_id(biomass_reaction_label).lower_bound = ko_growth
            max_production = model.optimize(objective_sense='maximize').objective_value
            min_production = model.optimize(objective_sense='minimize').objective_value

            #print(i,j)
            #print(model.genes[i], model.genes[j])
            #print('ko_growth', ko_growth)
            #print('productions', min_production, max_production)
            #print()
            result = [i, j, ko_growth, min_production, max_production]
        else:
            #print('unfeasible')
            #print()
            result = [i, j, 0, 0, 0]

    return result

In [5]:
def printt(message):

    print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S \t {}".format(message)))

    return None

# 1. load and explore the model

In [7]:
! git clone https://github.com/steinng/rmarinus

Cloning into 'rmarinus'...
remote: Enumerating objects: 37, done.[K
remote: Counting objects: 100% (37/37), done.[K
remote: Compressing objects: 100% (33/33), done.[K
remote: Total 37 (delta 6), reused 23 (delta 2), pack-reused 0[K
Receiving objects: 100% (37/37), 2.70 MiB | 5.28 MiB/s, done.
Resolving deltas: 100% (6/6), done.


In [8]:
model = cobra.io.read_sbml_model("rmarinus/Rmarinus_578_model.xml")
model.summary()

Scaling...
 A: min|aij| =  1.000e+00  max|aij| =  1.000e+00  ratio =  1.000e+00
Problem data seem to be well scaled


Metabolite,Reaction,Flux,C-Number,C-Flux
btn_e,EX_btn_e,5.729e-05,10,0.00%
ca2_e,EX_ca2_e,0.001172,0,0.00%
cbl1_e,EX_cbl1_e,5.729e-05,62,0.03%
cobalt2_e,EX_cobalt2_e,0.0007812,0,0.00%
cu2_e,EX_cu2_e,0.0007812,0,0.00%
fe2_e,EX_fe2_e,0.002,0,0.00%
fe3_e,EX_fe3_e,0.001771,0,0.00%
glc__D_e,EX_glc_e,2.3,6,99.97%
h_e,EX_h_e,4.658,0,0.00%
k_e,EX_k_e,0.04437,0,0.00%

Metabolite,Reaction,Flux,C-Number,C-Flux
5mta_c,DM_5mta,-0.009684,11,2.96%
co2_e,EX_co2_e,-3.496,1,97.04%
h2o_e,EX_h2o_e,-11.39,0,0.00%


In [9]:
model.objective

<optlang.glpk_interface.Objective at 0x7fd84400cf70>

In [10]:
wt_solution = model.optimize()
print(wt_solution.objective_value)

0.26039859518718955


In [17]:
number_of_genes = len(model.genes)
print(number_of_genes)

578


In [12]:
# create exchange reaction
model.add_boundary(model.metabolites.get_by_id("CAROT_RMAR_c"), type="demand")

0,1
Reaction identifier,DM_CAROT_RMAR_c
Name,Carotenoids in R. marinus demand
Memory address,0x07fd855597370
Stoichiometry,CAROT_RMAR_c -->  Carotenoids in R. marinus -->
GPR,
Lower bound,0
Upper bound,1000.0


In [13]:
print("demands", model.demands)
for reaction in model.demands:
  print(reaction)

demands [<Reaction DM_glcur at 0x7fd843a91cd0>, <Reaction DM_5mta at 0x7fd843bbfd90>, <Reaction DM_CAROT_RMAR_c at 0x7fd855597370>]
DM_glcur: glcur_c --> 
DM_5mta: 5mta_c --> 
DM_CAROT_RMAR_c: CAROT_RMAR_c --> 


# 2. growth-coupled metabolite production exploration

## 2.1. define metabolite of interest and biomass function label

In [14]:
metabolite_of_interest = 'DM_CAROT_RMAR_c'
biomass_reaction_label = 'BIOMASS'

## 2.2. run serial

The cell below has been running for more than 4500 minutes.

- 5 x 5 takes 313 ms and no good hits 
- 50 x 50 takes 40 sec and hits are very low ~ 1e-14 
- 250 x 250 takes 17 min

In [26]:
# %%time
# number_of_genes = len(model.genes)
# results = []
# for i in range(len(model.genes[:250])):
#     for j in range(len(model.genes[:250])):
#         if i < j:
            
#             task = [i, j, metabolite_of_interest, biomass_reaction_label, model]
#             result = growth_coupled_analysis(task)
#             results.append(result)         



CPU times: user 17min 18s, sys: 5.23 s, total: 17min 23s
Wall time: 17min 25s


In [23]:
# df = pandas.DataFrame(results, columns=['i', 'j', 'KO growth', 'min production', 'max production'])

In [24]:
# df.head()

Unnamed: 0,i,j,KO growth,min production,max production
0,0,1,0.260399,0.0,2.166217e-17
1,0,2,0.260399,0.0,2.166217e-17
2,0,3,0.260399,0.0,2.166217e-17
3,0,4,0.260399,0.0,2.166217e-17
4,0,5,0.260399,0.0,2.166217e-17


In [25]:
# df.sort_values(by=['min production'], ascending=False)

Unnamed: 0,i,j,KO growth,min production,max production
767,19,27,1.201247e-14,1.808115e-14,2.161417e-14
766,19,26,3.686903e-14,1.801723e-14,2.212486e-14
850,22,26,-1.380202e-13,1.179054e-14,-8.074452e-14
851,22,27,-1.581642e-13,1.179054e-14,-8.074452e-14
1112,34,42,-1.282637e-13,2.917762e-16,1.565530e-01
...,...,...,...,...,...
41,0,42,1.272818e-13,-1.773945e-16,1.565530e-01
168,3,28,4.571492e-17,-2.000673e-16,8.407418e-17
641,15,27,3.672151e-16,-2.013038e-16,-5.440382e-17
1133,35,49,2.058432e-16,-2.945813e-16,1.537860e-01


### 2.2.1. plot production envelope

In [None]:
# WT
plotting_wt_biomass = []
wt_production = []

biomass_space = numpy.linspace(0, wt_solution.objective_value, 100)

with model as model:
    model.objective = metabolite_of_interest
    for target in biomass_space:
        model.reactions.get_by_id(biomass_reaction_label).bounds = (target, target)
        solution = model.optimize()
        if solution.status == 'optimal':
            plotting_wt_biomass.append(target); wt_production.append(solution.objective_value)

In [None]:
# KO
i=21; j=126

plotting_ko_biomass = []
max_productions = []
min_productions = []

with model as model:
    model.genes[i].knock_out() 
    model.genes[j].knock_out()
    ko_solution = model.optimize()
    
    biomass_space = numpy.linspace(0, ko_solution.objective_value, 100)
    with model as model:
        model.objective = metabolite_of_interest
        for target in biomass_space:
            model.reactions.get_by_id(biomass_reaction_label).lower_bound = target
            max_production = model.optimize(objective_sense='maximize').objective_value
            min_production = model.optimize(objective_sense='minimize').objective_value
            plotting_ko_biomass.append(target); max_productions.append(max_production); min_productions.append(min_production)

In [None]:
# make figure
matplotlib.pyplot.plot(plotting_wt_biomass, wt_production, '-', color='black', lw=4, label='WT')
matplotlib.pyplot.fill_between(plotting_ko_biomass, min_productions, max_productions, color='orange', alpha=0.5, label='KO')

matplotlib.pyplot.xlabel('Growth')
matplotlib.pyplot.ylabel('Production')
matplotlib.pyplot.grid(ls=':')
matplotlib.pyplot.legend()

matplotlib.pyplot.tight_layout()

## 2.3. run in parallel environment

Using multiprocessing could be difficult because if the function yields an error, it is difficult to track. Consider using testing functions and serial code as in previous section to avoid errors while executing the parallel approach.

In [15]:
number_of_threads = 8

In [21]:
printt('working with {} genes'.format(number_of_genes))

tasks = []
for i in range(len(model.genes[:5])):
    for j in range(len(model.genes[:5])):
        if i < j:
            task = [i, j, metabolite_of_interest, biomass_reaction_label, model]
            tasks.append(task)
printt('working with {} gene pairs'.format(len(tasks)))

2022-04-16 17:42:48 	 working with 578 genes
2022-04-16 17:42:48 	 working with 10 gene pairs


In [22]:
%%time
printt('entering a parallel world of {} threads'.format(number_of_threads))
hydra = multiprocessing.pool.Pool(number_of_threads)
hydra_output = hydra.map(growth_coupled_analysis, tasks)
hydra.close()
printt('completed {} tasks'.format(len(hydra_output)))

2022-04-16 17:42:53 	 entering a parallel world of 8 threads


Process SpawnPoolWorker-57:
Traceback (most recent call last):
  File "/Users/adrian/anaconda3/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/adrian/anaconda3/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/adrian/anaconda3/lib/python3.8/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/adrian/anaconda3/lib/python3.8/multiprocessing/queues.py", line 358, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'growth_coupled_analysis' on <module '__main__' (built-in)>
Process SpawnPoolWorker-58:
Traceback (most recent call last):
  File "/Users/adrian/anaconda3/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/adrian/anaconda3/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/adrian/anaconda3/lib/

KeyboardInterrupt: 

unk = read(handle, remaining)
KeyboardInterrupt
Process SpawnPoolWorker-69:
Traceback (most recent call last):
  File "/Users/adrian/anaconda3/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/adrian/anaconda3/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/adrian/anaconda3/lib/python3.8/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/adrian/anaconda3/lib/python3.8/multiprocessing/queues.py", line 355, in get
    with self._rlock:
  File "/Users/adrian/anaconda3/lib/python3.8/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
Process SpawnPoolWorker-70:
Traceback (most recent call last):
  File "/Users/adrian/anaconda3/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/adrian/anaconda3/lib/python3.8/multiprocessing/process.py", line 10

In [None]:
df = pandas.DataFrame(hydra_output, columns=['i', 'j', 'KO growth', 'min production', 'max production'])
df.sort_values(by=['min production'], ascending=False)