# **OLIVIA - Immunization**

In [10]:
import sys
sys.path.append('../../olivia')

!pip install -r requirements.txt



# BIOCONDUCTOR TEST

### Immunization

In [11]:
from olivia.model import OliviaNetwork
from olivia.immunization import *

bioconductor = OliviaNetwork(r'../olivia_finder/results/olivia_prebuilts/bioconductor.olv')

In [12]:
failure_vulnerability(bioconductor)

Computing Reach
     Processing node: 3K      


24.817326873753206

In [13]:
immunization_delta(bioconductor,{'methods','BiocGenerics'})

Reach retrieved from metrics cache
Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done
Computing Reach
     Processing node: 3K      


1.1658592191507537

In [14]:
target = {'methods','BiocGenerics'}

In [15]:
%time immunization_delta(bioconductor, target)

Reach retrieved from metrics cache
Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done
Computing Reach
     Processing node: 3K      
CPU times: user 307 ms, sys: 11.5 ms, total: 318 ms
Wall time: 313 ms


1.1658592191507537

In [16]:
%time immunization_delta(bioconductor, target, algorithm='analytic')

CPU times: user 44.5 ms, sys: 225 µs, total: 44.7 ms
Wall time: 43.6 ms


1.1658592191507553

In [17]:
immunization_delta(
    bioconductor,
    {'methods','BiocGenerics'}, 
    cost_metric = Impact
)

Computing Impact
     Processing node: 3K      
Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done
Computing Impact
     Processing node: 3K      


6.757195782274152

In [18]:
immunization_delta(bioconductor,{'a4'}, cost_metric = Impact)

Impact retrieved from metrics cache
Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done
Computing Impact
     Processing node: 3K      


0.022513536620117236

### Selecting Immunization targets - Subcritical networks


In [19]:
failure_vulnerability(bioconductor)

Reach retrieved from metrics cache


24.817326873753206

In [20]:
immunization_delta(bioconductor, iset_random(bioconductor, 20), algorithm='analytic')

0.28213166144200624

In [21]:
immunization_delta(bioconductor, iset_random(bioconductor, 1000), algorithm='analytic')

13.868623539469935

In [22]:
immunization_delta(bioconductor, iset_random(bioconductor, 20, indirect=True), algorithm='analytic')

5.237389569677971

In [23]:
target = bioconductor.get_metric(DependentsCount).top(20)
target

Computing Dependents Count


[('R', 1795),
 ('methods', 1492),
 ('stats', 1293),
 ('utils', 1063),
 ('ggplot2', 717),
 ('S4Vectors', 661),
 ('graphics', 639),
 ('grDevices', 593),
 ('GenomicRanges', 539),
 ('IRanges', 515),
 ('SummarizedExperiment', 509),
 ('BiocGenerics', 480),
 ('Biobase', 459),
 ('dplyr', 433),
 ('BiocParallel', 323),
 ('GenomeInfoDb', 308),
 ('data.table', 298),
 ('Biostrings', 291),
 ('parallel', 270),
 ('Matrix', 267)]

In [24]:
immunization_delta(bioconductor, {l[0] for l in target}, algorithm='analytic')

10.490738102023368

In [25]:
immunization_delta(bioconductor, 
                   iset_naive_ranking(20, bioconductor.get_metric(DependentsCount)),
                   algorithm='analytic')

DependentsCount retrieved from metrics cache


10.490738102023368

In [26]:
immunization_delta(bioconductor, 
                   iset_naive_ranking(20, bioconductor.get_metric(Reach)),
                   algorithm='analytic')

Reach retrieved from metrics cache


8.719293245939014

In [27]:
delta_set = iset_delta_set_reach(bioconductor)
print(delta_set)

Reach retrieved from metrics cache
Computing Surface
     Processing node: 0K      
Reach retrieved from metrics cache
Surface retrieved from metrics cache
{'DelayedArray', 'BiocIO', 'SummarizedExperiment', 'GenomeInfoDb', 'rtracklayer', 'IRanges', 'clusterProfiler', 'limma', 'annotate', 'genefilter', 'BiocGenerics', 'DelayedMatrixStats', 'VariantAnnotation', 'GenomicAlignments', 'AnnotationDbi', 'XVector', 'ensembldb', 'bumphunter', 'S4Vectors', 'Biostrings', 'BiocFileCache', 'KEGGREST', 'MatrixGenerics', 'biovizBase', 'GSEABase', 'Gviz', 'Rsamtools', 'minfi', 'enrichplot', 'HDF5Array', 'DESeq2', 'GenomicRanges', 'GenomicFeatures', 'scuttle', 'BSgenome', 'Biobase', 'SingleCellExperiment', 'AnnotationHub', 'BiocParallel', 'biomaRt'}


In [28]:
immunization_delta(bioconductor, delta_set, algorithm='analytic')

13.260758050726702

In [29]:
smaller = iset_naive_ranking(4, bioconductor.get_metric(DependentsCount), subset = delta_set)
print(smaller)
immunization_delta(bioconductor, smaller, algorithm='analytic')

DependentsCount retrieved from metrics cache
{'S4Vectors', 'SummarizedExperiment', 'GenomicRanges', 'IRanges'}


2.7771444856084355

### 03 - Selecting Immunization targets - Supercritical networks

Supercritical packet dependency networks are those that contain a strongly connected (SCC) component of significant size. Maven is one of them.

In [30]:
maven = OliviaNetwork(r'data/maven-2020-01-12.olv')

FileNotFoundError: [Errno 2] No such file or directory: 'data/maven-2020-01-12.olv'

Supercritical networks are much more vulnerable, because the big SCC contributes greatly to the propagation of defects.

In [None]:
failure_vulnerability(maven)

Computing Reach
     Processing node: 124K      


1805.5391236430194

Lets see what happens if we immunize the delta set for Maven:

In [None]:
maven_delta_set = iset_delta_set_reach (maven)
immunization_delta(maven, maven_delta_set)

Reach retrieved from metrics cache
Computing Surface
     Processing node: 0K        
Reach retrieved from metrics cache
Surface retrieved from metrics cache
Reach retrieved from metrics cache
Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done
Computing Reach
     Processing node: 119K      


1800.3279159303206

In [None]:
len(maven_delta_set)

6540

In [None]:
sap = iset_sap(maven)

immunization_delta(maven, sap)

Reach retrieved from metrics cache
Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done
Computing Reach
     Processing node: 125K      


1695.3186300807877

In [None]:
len(sap)

351

In [None]:
immunization_delta(maven, iset_naive_ranking(100, maven.get_metric(DependentsCount), subset=sap))

DependentsCount retrieved from metrics cache
Reach retrieved from metrics cache
Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done
Computing Reach
     Processing node: 125K      


1544.4391883362787

Notice that the SAP approach is completely innefective in subcritical networks

In [None]:
immunization_delta(bioconductor, iset_sap(bioconductor), algorithm='analytic')

0.013040223771815782

Even the immunization of the largest SCC in full has a negligible effect on the network's vulnerability

In [None]:
immunization_delta(bioconductor, bioconductor.sorted_clusters()[0], algorithm='analytic')

0.019717921443485796

### Selecting Immunization targets - Advanced

#### Manipulating immunization sets

In [None]:
delta_set & iset_naive_ranking(100, bioconductor.get_metric(DependentsCount))

DependentsCount retrieved from metrics cache


{'AnnotationDbi',
 'AnnotationHub',
 'BSgenome',
 'Biobase',
 'BiocFileCache',
 'BiocGenerics',
 'BiocParallel',
 'Biostrings',
 'DESeq2',
 'DelayedArray',
 'GSEABase',
 'GenomeInfoDb',
 'GenomicAlignments',
 'GenomicFeatures',
 'GenomicRanges',
 'Gviz',
 'IRanges',
 'Rsamtools',
 'S4Vectors',
 'SingleCellExperiment',
 'SummarizedExperiment',
 'VariantAnnotation',
 'biomaRt',
 'limma',
 'rtracklayer'}

In [None]:
immunization_delta(bioconductor, {'boto3',
                         'configparser',
                         'cryptography',
                         'jsonschema',
                         'packaging',
                         'pbr',
                         'pytest',
                         'requests',
                         'setuptools',
                         'wheel'}, algorithm='analytic')

5.7339360989638735

In [None]:
immunization_delta(bioconductor,
                   iset_naive_ranking(10, bioconductor.get_metric(DependentsCount)), 
                   algorithm='analytic')

DependentsCount retrieved from metrics cache


3.7312768388291375

#### Ranking by compound metrics

In [None]:
upper = iset_naive_ranking(50, bioconductor.get_metric(Reach)*bioconductor.get_metric(Surface))
immunization_delta(bioconductor, upper, algorithm='analytic')

Reach retrieved from metrics cache
Surface retrieved from metrics cache


8.024760666587873

In [None]:
len(delta_set)

18

In [None]:
upper = iset_naive_ranking(18, bioconductor.get_metric(Reach)*bioconductor.get_metric(Surface))

Reach retrieved from metrics cache
Surface retrieved from metrics cache


In [None]:
# void set
delta_set-upper

set()

#### Brute force and Greedy selection

In [None]:
[(i, immunization_delta(bioconductor, {i}, algorithm='analytic')) for i in delta_set]

[('Biostrings', 1.1137076090054145),
 ('HDF5Array', 0.08064975776574522),
 ('BiocParallel', 1.17041892277002),
 ('GenomeInfoDb', 1.0048446850954689),
 ('clusterProfiler', 0.23311484753491024),
 ('genefilter', 0.3174693644913081),
 ('ensembldb', 0.10686805357651753),
 ('BiocIO', 0.12852664576802508),
 ('IRanges', 0.4066685665431747),
 ('DESeq2', 0.13337133086349387),
 ('GenomicAlignments', 0.40267882587631804),
 ('Rsamtools', 0.3579367341122827),
 ('biomaRt', 1.048731832430892),
 ('GenomicFeatures', 0.8466799658022228),
 ('BiocGenerics', 0.6015958962667427),
 ('limma', 0.18324308919920204),
 ('Biobase', 0.4970076944998575),
 ('biovizBase', 0.16414933029353093),
 ('scuttle', 0.03163294385864919),
 ('KEGGREST', 0.8757480763750356),
 ('VariantAnnotation', 0.3120547164434312),
 ('MatrixGenerics', 0.42832715873468225),
 ('bumphunter', 0.14733542319749215),
 ('DelayedMatrixStats', 0.0957537760045597),
 ('S4Vectors', 0.4351667141635794),
 ('XVector', 0.7056141350812197),
 ('Gviz', 0.3052151610

In [None]:
immunization_delta(bioconductor, delta_set-{'Biostrings'}, algorithm='analytic')

12.834995725277857

#### Using other centrality measures

In [None]:
import networkx as nx

# Using the NetworkX implementation of betweenness centrality
# You can access the full NetworkX network underlying the repository using OliviaModel.network.
# Here we restrict the centrality computation to the subgraph induced by the SAP set:
sap_betweenness = nx.betweenness_centrality(maven.network.subgraph(sap))

In [None]:
from olivia.packagemetrics import MetricStats

# Build a MetricStats object to use in iset_naive_ranking
# MetricStats constructor admits a dictionary of values such as those produced by NetworkX
sap_betweenness = MetricStats(sap_betweenness)

In [None]:
immunization_delta(maven, iset_naive_ranking(10, sap_betweenness))

Reach retrieved from metrics cache
Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done
Computing Reach
     Processing node: 125K      


471.4254134057055