# **OLIVIA - Model**

In [3]:
# Add olivia to the python path
import sys
sys.path.append('../../olivia')

# Install the requirements
!pip install -r requirements.txt



## BIOCONDUCTOR TEST

Build bioconductor model

In [4]:
from olivia.model import OliviaNetwork
from olivia.immunization import *
import networkx as nx
import pandas as pd


bioconductor_df = pd.read_csv('aux_data/bioconductor_adjlist_scraping.csv')
bioconductor_G = nx.from_pandas_edgelist(bioconductor_df, source='dependency', target='name', create_using=nx.DiGraph())
bioconductor = OliviaNetwork()
bioconductor.build_model(bioconductor_G)

# Save the model
bioconductor.save('aux_data/bioconductor_model.olv')

Building Olivia Model
     Finding strongly connected components (SCCs)...
     Building condensation network...
     Adding structural meta-data...
     Done


### Basic use

In [5]:
from olivia.model import OliviaNetwork

In [6]:
bioconductor = OliviaNetwork(r'aux_data/bioconductor_model.olv')

In [7]:
len(bioconductor)

3509

In [8]:
for package in bioconductor:
    if package[0]=='b':
        print(package)

betareg
biocViews
biovizBase
broom
base64enc
biomaRt
bit64
bsseq
biglm
beanplot
boot
bioDist
biclust
bitops
basecallQC
batchelor
beachmat
benchdamic
biobtreeR
biscuiteer
biscuiteerData
branchpointer
bumphunter
basilisk
bezier
bamsignals
bigmemory
biodb
biodbChebi
binom
bbmle
batchtools
bookdown
ballgown
bayNorm
bigPint
biocthis
biomformat
bluster
brendaDb
biwt
beadarray
biganalytics
beeswarm
bibtex
bslib
base
bestNormalize
bigrquery
bnlearn
bsplus
baseline
brglm
bootstrap
banocc
bcSeq
bigmelon
biocGraph
biodbKegg
biomvRCNS
bnbc
bugsigdbr
basilisk.utils
brew
bamlss
benchmarkme
bandle
biodbNci
blacksheepr
baySeq
breastCancerVDX
bench
backbone
bs4Dash
bdsmatrix
barcodetrackR
bioCancer
biodbHmdb
biodbUniprot
blima
breakpointR
breakpointRdata
bit
biosigner
base64url
bladderbatch
biobroom
bambu
bgx
biodbLipidmaps
bnem
base64
blockmodeling
beadarraySNP
bioassayR
biodbExpasy
biodbNcbi
brainflowprobes
binr
blme
babelgene
bnstruct
bigstatsr
biomartr
bacon
beer
biodbMirbase
biotmle
borealis
broom

### Package properties

Access via *getitem* returns a special view object:

In [9]:
bioconductor['BiocGenerics']

<olivia.model.PackageInfoView at 0x7f07dbac6e80>

In [10]:
print(f"BiocGenerics has {len(bioconductor['BiocGenerics'].direct_dependants())} direct dependants")  

BiocGenerics has 480 direct dependants


In [11]:
print(f"BiocGenerics has {len(bioconductor['BiocGenerics'].transitive_dependants())} transitive dependants (includes direct dependants)")

BiocGenerics has 1704 transitive dependants (includes direct dependants)


In [12]:
print(bioconductor['BiocGenerics'].transitive_dependants() - bioconductor['BiocGenerics'].direct_dependants())

{'beer', 'CNTools', 'Trendy', 'DMRcate', 'vissE', 'escape', 'cytoMEM', 'NanoStringDiff', 'oncoscanR', 'BioQC', 'VaSP', 'CHETAH', 'MGFR', 'tradeSeq', 'DaMiRseq', 'discordant', 'RITAN', 'wpm', 'phantasus', 'mCSEA', 'proActiv', 'segmenter', 'crisprVerse', 'CoGAPS', 'BgeeCall', 'affyContam', 'CTdata', 'LowMACA', 'PICS', 'MWASTools', 'cBioPortalData', 'RefPlus', 'metagenomeSeq', 'divergence', 'consensusOV', 'vidger', 'zellkonverter', 'progeny', 'normr', 'affyILM', 'SigFuge', 'FCBF', 'CrispRVariants', 'DominoEffect', 'alabaster', 'missRows', 'omicsViewer', 'miRspongeR', 'scBFA', 'Risa', 'rbsurv', 'EventPointer', 'BANDITS', 'combi', 'OncoSimulR', 'karyoploteR', 'miRmine', 'sangeranalyseR', 'treekoR', 'SplicingFactory', 'traviz', 'MatrixQCvis', 'qmtools', 'stJoincount', 'BEARscc', 'chihaya', 'GeneExpressionSignature', 'clustifyr', 'SeqSQC', 'motifmatchr', 'RBioinf', 'CRImage', 'BiFET', 'bioCancer', 'cogena', 'dce', 'a4', 'QDNAseq', 'GenomicOZone', 'GenomicDataCommons', 'VariantExperiment', 'tr

In [13]:
bioconductor['BiocGenerics'].direct_dependencies()

{'R', 'graphics', 'methods', 'stats', 'utils'}

In [14]:
bioconductor['MetaboCoreUtils'].transitive_dependencies()

{'BiocGenerics',
 'MASS',
 'MsCoreUtils',
 'R',
 'S4Vectors',
 'clue',
 'graphics',
 'methods',
 'stats',
 'stats4',
 'utils'}

### Package metrics

In [15]:
bioconductor['BiocGenerics'].reach()

1705

In [16]:
from olivia.packagemetrics import Reach
bioconductor_reach = bioconductor.get_metric(Reach)

Computing Reach
     Processing node: 3K      


In [17]:
bioconductor_reach

<olivia.packagemetrics.MetricStats at 0x7f07d731a220>

In [18]:
bioconductor_reach['BiocGenerics']

1705

In [19]:
bioconductor_reach = bioconductor.get_metric(Reach)

Reach retrieved from metrics cache


In [20]:
bioconductor.get_metric(Reach)['BiocGenerics']

Reach retrieved from metrics cache


1705

In [21]:
from olivia.packagemetrics import Impact

%time bioconductor.get_metric(Impact)['BiocGenerics']

Computing Impact
     Processing node: 3K      
CPU times: user 88.8 ms, sys: 558 µs, total: 89.4 ms
Wall time: 85 ms


7357

In [22]:
%time bioconductor.get_metric(Impact)['BiocGenerics']

Impact retrieved from metrics cache
CPU times: user 71 µs, sys: 0 ns, total: 71 µs
Wall time: 74.1 µs


7357

In [23]:
bioconductor.get_metric(Impact)['BANDITS']

Impact retrieved from metrics cache


1

In [24]:
bioconductor.get_metric(Impact)['GenomicFeatures']

Impact retrieved from metrics cache


462

In [25]:
from olivia.packagemetrics import Surface

bioconductor.get_metric(Surface)['BiocGenerics']

Computing Surface
     Processing node: 0K      


6

In [26]:
bioconductor_reach.top(10)

[('R', 2109),
 ('stats', 1997),
 ('methods', 1982),
 ('utils', 1957),
 ('graphics', 1860),
 ('BiocGenerics', 1705),
 ('stats4', 1533),
 ('grDevices', 1490),
 ('S4Vectors', 1462),
 ('Biobase', 1437)]

In [27]:
bioconductor_reach.bottom()

[('metaseqR2', 1)]

In [28]:
bioconductor.get_metric(Surface).top(10)

Surface retrieved from metrics cache


[('singleCellTK', 210),
 ('ChAMP', 168),
 ('benchdamic', 152),
 ('miRSM', 151),
 ('methylGSA', 144),
 ('netZooR', 141),
 ('microbiomeMarker', 141),
 ('FLAMES', 139),
 ('DMRcate', 138),
 ('epimutacions', 135)]

In [29]:
normalized_reach = bioconductor.get_metric(Reach)/len(bioconductor)
normalized_reach.top(10)

Reach retrieved from metrics cache


[('R', 0.6010259333143345),
 ('stats', 0.5691080079794814),
 ('methods', 0.5648332858364207),
 ('utils', 0.5577087489313195),
 ('graphics', 0.5300655457395269),
 ('BiocGenerics', 0.48589341692789967),
 ('stats4', 0.43687660302080367),
 ('grDevices', 0.42462239954402964),
 ('S4Vectors', 0.41664291821031635),
 ('Biobase', 0.4095183813052152)]

In [30]:
(bioconductor.get_metric(Surface)/len(bioconductor)).top(10)

Surface retrieved from metrics cache


[('singleCellTK', 0.05984611000284981),
 ('ChAMP', 0.04787688800227985),
 ('benchdamic', 0.04331718438301511),
 ('miRSM', 0.043032202906811055),
 ('methylGSA', 0.04103733257338273),
 ('netZooR', 0.04018238814477059),
 ('microbiomeMarker', 0.04018238814477059),
 ('FLAMES', 0.039612425192362494),
 ('DMRcate', 0.03932744371615845),
 ('epimutacions', 0.038472499287546306)]

In [31]:
from olivia.packagemetrics import DependentsCount

mean_degree = bioconductor.get_metric(DependentsCount).values.mean()
degree_divergence = (bioconductor.get_metric(DependentsCount)-mean_degree)**2
degree_divergence.top(5)

Computing Dependents Count
DependentsCount retrieved from metrics cache


[('R', 3193116.4110936164),
 ('methods', 2202046.240389712),
 ('stats', 1651043.3692013395),
 ('utils', 1112875.8798881448),
 ('ggplot2', 502580.78726916516)]

In [32]:
# Impact / Reach ratio
(bioconductor.get_metric(Impact)/bioconductor.get_metric(Reach)).top(5)

Impact retrieved from metrics cache
Reach retrieved from metrics cache


[('methods', 5.011604439959637),
 ('R', 4.87624466571835),
 ('stats', 4.815723585378067),
 ('utils', 4.7884517118037815),
 ('graphics', 4.51505376344086)]