# info
This notebook finds the gene pairs that are conditionally esential. <br>
Iterate over pairs and state their essentiality likelihood.

In [1]:
import os, pickle, cobra

# 0. user-defined variables

In [2]:
# necio5
simulation_dir = '/Users/adrian/projects/hpc_results/constrained/double_KO/deployments/'
model_file = '/Users/adrian/projects/mevu/data/model/Recon3DModel_301.mat'
heatmap_info_file = '/Users/adrian/projects/mevu/results/heatmap.doubleKO.pickle'

In [3]:
# drangajokull
simulation_dir = '/home/adrian/projects/endomevu/data/hpc_results/constrained/double_KO/deployments/'
model_file = '/home/adrian/projects/endomevu/data/model/Recon3DModel_301.mat'
heatmap_info_file = '/home/adrian/projects/endomevu/results/heatmap.doubleKO.pickle'

# 1. simulate original model

In [4]:
%%time
model = cobra.io.load_matlab_model(model_file)

CPU times: user 2min 33s, sys: 93.4 ms, total: 2min 33s
Wall time: 2min 33s


In [5]:
optimization_results = model.optimize()
original_growth_value = optimization_results.objective_value # 755.0032155506631

In [6]:
print(len(model.genes))
expected_pairs = int(((2248*2248)-2248)/2)
print(expected_pairs)

2248
2525628


# 1. read data

In [7]:
problem_size = len(model.genes)
essentiality = {}
gene_pairs = []

for i in range(problem_size):
    for j in range(problem_size):
        if i < j:
            a = model.genes[i].id
            b = model.genes[j].id
            gene_pair = (a, b)
            
            gene_pairs.append(gene_pair)
            essentiality[gene_pair] = []
                
print('', len(gene_pairs))
print(gene_pairs[:10])

 2525628
[('26.1', '8639.1'), ('26.1', '314.2'), ('26.1', '314.1'), ('26.1', '1591.1'), ('26.1', '1594.1'), ('26.1', '10993.1'), ('26.1', '6818.1'), ('26.1', '89874.1'), ('26.1', '160287.1'), ('26.1', '55293.1')]


In [8]:
simulation_folders = next(os.walk(simulation_dir))[1]
simulation_folders.sort()

## 1.1. iterate over samples

In [12]:

### necio5 | 10 conditions | Wall time: 24 min
### drangajokull | 10 conditions | Wall time: 27 min
### drangajokull | 100 conditions | Wall time: x hours | maybe 4.5 hours
### drangajokull | all conditions (xx) | Wall time: xx hours | maybe 4.8 days

number_of_working_conditions = 100 # should be len(simulation_folders) = 2590
for i in range(number_of_working_conditions):
    
    #print(i, end=' ')
    simulation_folder = simulation_folders[i]
    print(i)
    print(simulation_folder)
    
    # 1. read the jar file
    pickle_files = os.listdir(simulation_dir + simulation_folder + '/results/')
    condition_name = pickle_files[0]
    #print('working with {}...'.format(condition_name))
    if len(pickle_files) != 1:
        raise ValueError('Found a diffent number of expected files')
    jar = simulation_dir + simulation_folder + '/results/' + condition_name
    f = open(jar,'rb')
    [sampleID, result, double_ko_results] = pickle.load(f)
    f.close()
    
    # 2. add gene essentiality to each pair
    filtered = double_ko_results[double_ko_results['growth'] < original_growth_value/2]
    #print(filtered.shape)
    for pair in filtered['ids']:
        cobra_pair_list = list(pair)
        if len(cobra_pair_list) == 2:
            a = cobra_pair_list[0]; b = cobra_pair_list[1]
            forward = (a, b); reverse = (b, a)

            if forward in gene_pairs:
                essentiality[forward].append(condition_name)
            if reverse in gene_pairs:
                essentiality[reverse].append(condition_name)

0
a30
1
a31
2
a32
3
a33
4
a34
5
a35
6
a36
7
a37
8
a38
9
a39
10
a40
11
a41
12
a42
13
a43
14
a44
15
a45
16
a46
17
a47
18
a48
19
a49
20
a50
21
a51
22
a52
23
a53
24
a54
25
a55


IndexError: list index out of range

# 2. store data

In [10]:
jar = heatmap_info_file
f = open(jar,'wb')
pickle.dump(essentiality, f)
f.close()