# **OLIVIA - Immunization**

In [51]:
import sys
sys.path.append('../../olivia')

!pip install -r requirements.txt



# BIOCONDUCTOR TEST

### Immunization

In [52]:
from olivia.model import OliviaNetwork
from olivia.immunization import *

bioconductor = OliviaNetwork(r'aux_data/bioconductor_model.olv')

In [53]:
failure_vulnerability(bioconductor)

Computing Reach
     Processing node: 3K      


24.817326873753206

In [54]:
immunization_delta(bioconductor,{'methods','BiocGenerics'})

Reach retrieved from metrics cache
Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done
Computing Reach
     Processing node: 3K      


1.1658592191507537

In [55]:
target = {'methods','BiocGenerics'}

In [56]:
%time immunization_delta(bioconductor, target)

Reach retrieved from metrics cache
Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done
Computing Reach
     Processing node: 3K      
CPU times: user 242 ms, sys: 272 Âµs, total: 243 ms
Wall time: 239 ms


1.1658592191507537

In [57]:
%time immunization_delta(bioconductor, target, algorithm='analytic')

CPU times: user 45.8 ms, sys: 0 ns, total: 45.8 ms
Wall time: 44.6 ms


1.1658592191507553

In [58]:
immunization_delta(
    bioconductor,
    {'methods','BiocGenerics'}, 
    cost_metric = Impact
)

Computing Impact
     Processing node: 2K      

     Processing node: 3K      
Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done
Computing Impact
     Processing node: 3K      


6.757195782274152

In [59]:
immunization_delta(bioconductor,{'a4'}, cost_metric = Impact)

Impact retrieved from metrics cache
Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done
Computing Impact
     Processing node: 3K      


0.022513536620117236

### Selecting Immunization targets - Subcritical networks


In [60]:
failure_vulnerability(bioconductor)

Reach retrieved from metrics cache


24.817326873753206

In [61]:
immunization_delta(bioconductor, iset_random(bioconductor, 20), algorithm='analytic')

0.963237389569678

In [62]:
immunization_delta(bioconductor, iset_random(bioconductor, 1000), algorithm='analytic')

12.744941578797379

In [63]:
immunization_delta(bioconductor, iset_random(bioconductor, 20, indirect=True), algorithm='analytic')

7.063265887717298

In [64]:
target = bioconductor.get_metric(DependentsCount).top(20)
target

Computing Dependents Count


[('R', 1795),
 ('methods', 1492),
 ('stats', 1293),
 ('utils', 1063),
 ('ggplot2', 717),
 ('S4Vectors', 661),
 ('graphics', 639),
 ('grDevices', 593),
 ('GenomicRanges', 539),
 ('IRanges', 515),
 ('SummarizedExperiment', 509),
 ('BiocGenerics', 480),
 ('Biobase', 459),
 ('dplyr', 433),
 ('BiocParallel', 323),
 ('GenomeInfoDb', 308),
 ('data.table', 298),
 ('Biostrings', 291),
 ('parallel', 270),
 ('Matrix', 267)]

In [65]:
immunization_delta(bioconductor, {l[0] for l in target}, algorithm='analytic')

10.490738102023368

In [66]:
immunization_delta(bioconductor, 
                   iset_naive_ranking(20, bioconductor.get_metric(DependentsCount)),
                   algorithm='analytic')

DependentsCount retrieved from metrics cache


10.490738102023368

In [67]:
immunization_delta(bioconductor, 
                   iset_naive_ranking(20, bioconductor.get_metric(Reach)),
                   algorithm='analytic')

Reach retrieved from metrics cache


8.719293245939014

In [68]:
delta_set = iset_delta_set_reach(bioconductor)
print(delta_set)

Reach retrieved from metrics cache
Computing Surface
     Processing node: 0K      
Reach retrieved from metrics cache
Surface retrieved from metrics cache
{'ensembldb', 'Biostrings', 'limma', 'GenomicAlignments', 'AnnotationHub', 'XVector', 'GSEABase', 'Biobase', 'rtracklayer', 'DelayedArray', 'IRanges', 'VariantAnnotation', 'BiocParallel', 'S4Vectors', 'BiocIO', 'biomaRt', 'GenomicFeatures', 'BiocGenerics', 'DelayedMatrixStats', 'DESeq2', 'SingleCellExperiment', 'bumphunter', 'MatrixGenerics', 'BiocFileCache', 'annotate', 'AnnotationDbi', 'GenomicRanges', 'KEGGREST', 'minfi', 'genefilter', 'biovizBase', 'SummarizedExperiment', 'scuttle', 'Gviz', 'clusterProfiler', 'enrichplot', 'Rsamtools', 'BSgenome', 'HDF5Array', 'GenomeInfoDb'}


In [69]:
immunization_delta(bioconductor, delta_set, algorithm='analytic')

13.260758050726702

In [70]:
smaller = iset_naive_ranking(4, bioconductor.get_metric(DependentsCount), subset = delta_set)
print(smaller)
immunization_delta(bioconductor, smaller, algorithm='analytic')

DependentsCount retrieved from metrics cache
{'SummarizedExperiment', 'IRanges', 'GenomicRanges', 'S4Vectors'}


2.7771444856084355

Notice that the SAP approach is completely innefective in subcritical networks

In [71]:
immunization_delta(bioconductor, iset_sap(bioconductor), algorithm='analytic')

0.0

Even the immunization of the largest SCC in full has a negligible effect on the network's vulnerability

In [72]:
immunization_delta(bioconductor, bioconductor.sorted_clusters()[0], algorithm='analytic')

0.03562268452550584

### Selecting Immunization targets - Advanced

#### Manipulating immunization sets

In [73]:
delta_set & iset_naive_ranking(100, bioconductor.get_metric(DependentsCount))

DependentsCount retrieved from metrics cache


{'AnnotationDbi',
 'AnnotationHub',
 'BSgenome',
 'Biobase',
 'BiocFileCache',
 'BiocGenerics',
 'BiocParallel',
 'Biostrings',
 'DESeq2',
 'DelayedArray',
 'GSEABase',
 'GenomeInfoDb',
 'GenomicAlignments',
 'GenomicFeatures',
 'GenomicRanges',
 'Gviz',
 'IRanges',
 'Rsamtools',
 'S4Vectors',
 'SingleCellExperiment',
 'SummarizedExperiment',
 'VariantAnnotation',
 'biomaRt',
 'limma',
 'rtracklayer'}

In [74]:
immunization_delta(bioconductor, {'AnnotationDbi',
 'AnnotationHub',
 'BSgenome',
 'Biobase',
 'BiocFileCache',
 'BiocGenerics',
 'BiocParallel',
 'Biostrings',
 'DESeq2',
 'DelayedArray',
 'GSEABase',
 'GenomeInfoDb',
 'GenomicAlignments',
 'GenomicFeatures',
 'GenomicRanges',
 'Gviz',
 'IRanges',
 'Rsamtools',
 'S4Vectors',
 'SingleCellExperiment',
 'SummarizedExperiment',
 'VariantAnnotation',
 'biomaRt',
 'limma',
 'rtracklayer'}, algorithm='analytic')

11.684525505842121

In [75]:
immunization_delta(bioconductor,
                   iset_naive_ranking(10, bioconductor.get_metric(DependentsCount)), 
                   algorithm='analytic')

DependentsCount retrieved from metrics cache


4.9344542604730695

#### Ranking by compound metrics

In [76]:
upper = iset_naive_ranking(50, bioconductor.get_metric(Reach)*bioconductor.get_metric(Surface))
immunization_delta(bioconductor, upper, algorithm='analytic')

Reach retrieved from metrics cache
Surface retrieved from metrics cache


15.991165574237675

In [77]:
len(delta_set)

40

In [78]:
upper = iset_naive_ranking(18, bioconductor.get_metric(Reach)*bioconductor.get_metric(Surface))

Reach retrieved from metrics cache
Surface retrieved from metrics cache


In [79]:
# void set
delta_set-upper

{'AnnotationHub',
 'BiocFileCache',
 'BiocIO',
 'BiocParallel',
 'DESeq2',
 'DelayedMatrixStats',
 'GSEABase',
 'Gviz',
 'HDF5Array',
 'MatrixGenerics',
 'SingleCellExperiment',
 'VariantAnnotation',
 'annotate',
 'biovizBase',
 'bumphunter',
 'clusterProfiler',
 'enrichplot',
 'ensembldb',
 'genefilter',
 'limma',
 'minfi',
 'scuttle'}

#### Brute force and Greedy selection

In [80]:
[(i, immunization_delta(bioconductor, {i}, algorithm='analytic')) for i in delta_set]

[('ensembldb', 0.10686805357651753),
 ('Biostrings', 1.1137076090054145),
 ('limma', 0.18324308919920204),
 ('GenomicAlignments', 0.40267882587631804),
 ('AnnotationHub', 0.19606725562838415),
 ('XVector', 0.7056141350812197),
 ('GSEABase', 0.14933029353092048),
 ('Biobase', 0.4970076944998575),
 ('rtracklayer', 0.6702764320319179),
 ('DelayedArray', 0.31689940153889995),
 ('IRanges', 0.4066685665431747),
 ('VariantAnnotation', 0.3120547164434312),
 ('BiocParallel', 1.17041892277002),
 ('S4Vectors', 0.4351667141635794),
 ('BiocIO', 0.12852664576802508),
 ('biomaRt', 1.048731832430892),
 ('GenomicFeatures', 0.8466799658022228),
 ('BiocGenerics', 0.6015958962667427),
 ('DelayedMatrixStats', 0.0957537760045597),
 ('DESeq2', 0.13337133086349387),
 ('SingleCellExperiment', 0.12653177543459676),
 ('bumphunter', 0.14733542319749215),
 ('MatrixGenerics', 0.42832715873468225),
 ('BiocFileCache', 0.6027358221715589),
 ('annotate', 0.16984895981761186),
 ('AnnotationDbi', 1.207181533200342),
 ('G

In [81]:
immunization_delta(bioconductor, delta_set-{'Biostrings'}, algorithm='analytic')

12.834995725277857

#### Using other centrality measures

In [82]:
import networkx as nx

# Using the NetworkX implementation of betweenness centrality
# You can access the full NetworkX network underlying the repository using OliviaModel.network.
# Here we restrict the centrality computation to the subgraph induced by the SAP set:
sap = iset_sap(bioconductor)
sap_betweenness = nx.betweenness_centrality(bioconductor.network.subgraph(sap))

In [83]:
from olivia.packagemetrics import MetricStats

# Build a MetricStats object to use in iset_naive_ranking
# MetricStats constructor admits a dictionary of values such as those produced by NetworkX
sap_betweenness = MetricStats(sap_betweenness)

In [84]:
immunization_delta(bioconductor, iset_naive_ranking(10, sap_betweenness))

Reach retrieved from metrics cache
Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done
Computing Reach
     Processing node: 3K      


0.0