In [1]:
library(OmnipathR)
library(nichenetr)
library(tidyverse)
library(mlrMBO)
library(parallelMap)
library(dplyr)

── [1mAttaching packages[22m ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.4.0      [32m✔[39m [34mpurrr  [39m 0.3.5 
[32m✔[39m [34mtibble [39m 3.1.8      [32m✔[39m [34mdplyr  [39m 1.0.10
[32m✔[39m [34mtidyr  [39m 1.2.1      [32m✔[39m [34mstringr[39m 1.5.0 
[32m✔[39m [34mreadr  [39m 2.1.3      [32m✔[39m [34mforcats[39m 0.5.2 
── [1mConflicts[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
Loading required package: mlr

Loading required package: ParamHelpers

Future development will only happe

In [2]:
interactionFormatTransf <- function(InputDf, InteractionType){
  
    OutputInt <- tibble(from = character(), to = character(), 
        source = character(), database = character())  
    
    n <- nrow(InputDf)
    sources <- dplyr::pull(InputDf, sources)
    sourceNodes <- dplyr::pull(InputDf, from)
    targetNodes <- dplyr::pull(InputDf, to)
    
    for (i in seq(n)){
        currentSources <- unlist(strsplit(sources[i],";"))
        for (j in seq(length(currentSources))){
            OutputInt <- add_row(OutputInt, 
                from = sourceNodes[i] , 
                to = targetNodes[i],  
                # source = paste(currentSources[j], InteractionType, sep="_"),
                source = currentSources[j],
                database = currentSources[j]) 
        }
    }
    
    return(OutputInt)
}

In [3]:
lr <- read.csv("../CPDB/intersect_db.csv")

In [4]:
lr <- lr %>%
    select(protein_name_a,protein_name_b,sources) %>%
    rename(from=protein_name_a, to=protein_name_b) %>% 
    filter(from != to) %>% 
    distinct()

In [5]:
lr_Network_Omnipath <- 
    lr %>%
    interactionFormatTransf(InteractionType="LigrecExtra") %>%
    dplyr::distinct() 

In [6]:
saveRDS(lr_Network_Omnipath, 
    "NNET_Custom/lig_rec_sources.rds")

In [7]:
## We next get protein-protein interactions from the different datasets availabe
## in Omnipath
AllInteractions <- 
    import_post_translational_interactions(exclude = "ligrecextra") %>% 
    dplyr::select(source_genesymbol, target_genesymbol, sources) %>% 
    dplyr::rename(from=source_genesymbol, to=target_genesymbol) %>% 
    dplyr::filter(from != to) %>% 
    dplyr::distinct() 

In [8]:
ligands <- unique(pull(lr, from))

In [9]:
# sig_Network_Omnipath <- sig_Network_Omnipath_raw

In [10]:
## Original Omnipath interactions
sig_Network_Omnipath <- 
    interactionFormatTransf(AllInteractions, InteractionType="Signalling") %>%
    dplyr::distinct() 

## I have to remove self-interactions in the signaling network
sig_Network_Omnipath <- sig_Network_Omnipath %>% 
    dplyr::filter(from != to)

# ## I also have to remove interactions going to ligands. See Methods Nichenet 
# ## paper
# sig_Network_Omnipath <- sig_Network_Omnipath %>% 
#     dplyr::filter(!(to %in% ligands))

## There are in addition some records containing not input gene, we remove them
## since they are giving problems with running the model.
sig_Network_Omnipath <- sig_Network_Omnipath %>% 
    dplyr::filter(from != "") %>% 
    dplyr::filter(to != "")


## We also remove signaling interactions that are already in the lig-receptor 
## network. 
sig_Network_Omnipath <- dplyr::anti_join(
  sig_Network_Omnipath, 
  lr_Network_Omnipath, 
  by = c("from" = "from", "to" = "to"))

nrow(sig_Network_Omnipath)

In [11]:
saveRDS(sig_Network_Omnipath, 
    "NNET_Custom/sig_Network.rds")

In [12]:
gr_Interactions_Omnipath <- 
    import_dorothea_interactions(dorothea_levels = c("A","B","C")) %>%  
    select(source_genesymbol, target_genesymbol, sources) %>%
    rename(from=source_genesymbol, to=target_genesymbol) %>% 
    filter(from != to) %>%
    distinct()  

gr_Network_Omnipath <- 
    interactionFormatTransf(
        gr_Interactions_Omnipath, 
        InteractionType="Dorothea") %>%
    dplyr::distinct() 
nrow(gr_Network_Omnipath)
## [1] 113897

In [13]:
saveRDS(gr_Network_Omnipath,
    "NNET_Custom/gr_Network.rds")

In [14]:
expression_settings_validation <- 
    readRDS("expression_settings.rds")

# index <- which(!unlist(lapply(expression_settings_validation, 
#     function(x) any(x$from != "IFNA1"))))

# expression_settings_validation <- expression_settings_validation[-index]

In [15]:
All_sources <- unique(c(lr_Network_Omnipath$source,
    sig_Network_Omnipath$source, gr_Network_Omnipath$source))

my_source_weights_df <- 
     tibble(source = All_sources, weight = rep(1,length(All_sources)))

additional_arguments_topology_correction <- 
    list(source_names = my_source_weights_df$source %>% unique(), 
        algorithm = "PPR", 
        correct_topology = FALSE,
        lr_network = lr_Network_Omnipath, 
        sig_network = sig_Network_Omnipath, 
        gr_network = gr_Network_Omnipath, 
        settings = lapply(expression_settings_validation, 
            convert_expression_settings_evaluation), 
        secondary_targets = FALSE, 
        remove_direct_links = "no", 
        cutoff_method = "quantile")

nr_datasources <- additional_arguments_topology_correction$source_names %>% 
    length()

obj_fun_multi_topology_correction = makeMultiObjectiveFunction(name = "nichenet_optimization",
    description = "data source weight and hyperparameter optimization: expensive black-box function", 
    fn = model_evaluation_optimization, 
    par.set = makeParamSet(
        makeNumericVectorParam("source_weights", len = nr_datasources, 
            lower = 0, upper = 1, tunable = FALSE), 
        makeNumericVectorParam("lr_sig_hub", len = 1, lower = 0, upper = 1, 
            tunable = TRUE),  
        makeNumericVectorParam("gr_hub", len = 1, lower = 0, upper = 1, 
            tunable = TRUE),  
        makeNumericVectorParam("ltf_cutoff", len = 1, lower = 0.9, 
            upper = 0.999, tunable = TRUE),  
        makeNumericVectorParam("damping_factor", len = 1, lower = 0.01, 
            upper = 0.99, tunable =TRUE)), 
    has.simple.signature = FALSE,
    n.objectives = 4, 
    noisy = FALSE,
    minimize = c(FALSE,FALSE,FALSE,FALSE))

optimization_results = 
    lapply(1,mlrmbo_optimization, obj_fun = obj_fun_multi_topology_correction, 
           niter = 8, ncores = 8, nstart = 160, 
           additional_arguments = additional_arguments_topology_correction)

saveRDS(optimization_results, "NNET_Custom/Optimization_results.rds")

Loading required package: mco

Loading required package: emoa


Attaching package: ‘emoa’


The following object is masked from ‘package:dplyr’:

    coalesce


Starting parallelization in mode=multicore with cpus=8.



    source_weights1 source_weights2 source_weights3 source_weights4
1       0.429172704     0.767242255     0.559948773     0.816136240
2       0.117665040     0.641618487     0.354844160     0.534250153
3       0.046326490     0.949232979     0.936780758     0.461484132
4       0.321873540     0.955745813     0.609488768     0.646500303
5       0.801878797     0.874020626     0.929448671     0.410328241
6       0.157113841     0.389417361     0.946860306     0.667455032
7       0.052636179     0.361518839     0.202547398     0.076130915
8       0.666241606     0.323001632     0.318854848     0.269876401
9       0.590494499     0.214779118     0.834688228     0.433435940
10      0.276961765     0.601350234     0.784987848     0.786423330
11      0.464647569     0.973339155     0.031872677     0.093900082
12      0.795282319     0.047582621     0.630898907     0.621444613
13      0.469932277     0.854752248     0.550955379     0.136718548
14      0.077070611     0.001930634     0.064894

Computing y column(s) for design. Not provided.

Mapping in parallel: mode = multicore; level = mlrMBO.feval; cpus = 8; elements = 160.

[mbo] 0: source_weights=0.429,0.767,...; lr_sig_hub=0.241; gr_hub=0.128; ltf_cutoff=0.942; damping_factor=0.257 : y_1 = 0.603, y_2 = 0.0282, y_3 = 0.81, y_4 = 0.0518 : 561.7 secs : initdesign

[mbo] 0: source_weights=0.118,0.642,...; lr_sig_hub=0.543; gr_hub=0.0472; ltf_cutoff=0.994; damping_factor=0.806 : y_1 = 0.569, y_2 = 0.0272, y_3 = 0.68, y_4 = 0.0189 : 485.3 secs : initdesign

[mbo] 0: source_weights=0.0463,0.949...; lr_sig_hub=0.833; gr_hub=0.875; ltf_cutoff=0.97; damping_factor=0.879 : y_1 = 0.592, y_2 = 0.0212, y_3 = 0.71, y_4 = 0.0162 : 516.3 secs : initdesign

[mbo] 0: source_weights=0.322,0.956,...; lr_sig_hub=0.0649; gr_hub=0.322; ltf_cutoff=0.988; damping_factor=0.548 : y_1 = 0.593, y_2 = 0.0303, y_3 = 0.73, y_4 = 0.0359 : 690.1 secs : initdesign

[mbo] 0: source_weights=0.802,0.874,...; lr_sig_hub=0.967; gr_hub=0.235; ltf_cutoff=0.916;

[mbo] 0: source_weights=0.134,0.505,...; lr_sig_hub=0.959; gr_hub=0.988; ltf_cutoff=0.986; damping_factor=0.413 : y_1 = 0.568, y_2 = 0.0227, y_3 = 0.71, y_4 = 0.0137 : 523.9 secs : initdesign

[mbo] 0: source_weights=0.334,0.256,...; lr_sig_hub=0.875; gr_hub=0.476; ltf_cutoff=0.972; damping_factor=0.312 : y_1 = 0.586, y_2 = 0.024, y_3 = 0.7, y_4 = 0.0189 : 557.0 secs : initdesign

[mbo] 0: source_weights=0.684,0.666,...; lr_sig_hub=0.62; gr_hub=0.484; ltf_cutoff=0.921; damping_factor=0.287 : y_1 = 0.601, y_2 = 0.0234, y_3 = 0.7, y_4 = 0.025 : 539.2 secs : initdesign

[mbo] 0: source_weights=0.317,0.0273...; lr_sig_hub=0.21; gr_hub=0.362; ltf_cutoff=0.968; damping_factor=0.419 : y_1 = 0.601, y_2 = 0.0276, y_3 = 0.78, y_4 = 0.0304 : 534.8 secs : initdesign

[mbo] 0: source_weights=0.067,0.236,...; lr_sig_hub=0.454; gr_hub=0.681; ltf_cutoff=0.931; damping_factor=0.96 : y_1 = 0.613, y_2 = 0.0288, y_3 = 0.68, y_4 = 0.0189 : 644.1 secs : initdesign

[mbo] 0: source_weights=0.859,0.377,...; l

[mbo] 0: source_weights=0.609,0.919,...; lr_sig_hub=0.908; gr_hub=0.194; ltf_cutoff=0.955; damping_factor=0.0154 : y_1 = 0.596, y_2 = 0.0274, y_3 = 0.76, y_4 = 0.0293 : 615.5 secs : initdesign

[mbo] 0: source_weights=0.183,0.886,...; lr_sig_hub=0.324; gr_hub=0.0058; ltf_cutoff=0.93; damping_factor=0.717 : y_1 = 0.614, y_2 = 0.0305, y_3 = 0.77, y_4 = 0.0428 : 695.8 secs : initdesign

[mbo] 0: source_weights=0.819,0.0581...; lr_sig_hub=0.0286; gr_hub=0.822; ltf_cutoff=0.911; damping_factor=0.214 : y_1 = 0.6, y_2 = 0.0126, y_3 = 0.7, y_4 = 0.0221 : 622.0 secs : initdesign

[mbo] 0: source_weights=0.511,0.194,...; lr_sig_hub=0.71; gr_hub=0.475; ltf_cutoff=0.936; damping_factor=0.947 : y_1 = 0.605, y_2 = 0.0277, y_3 = 0.684, y_4 = 0.0221 : 545.9 secs : initdesign

[mbo] 0: source_weights=0.892,0.295,...; lr_sig_hub=0.182; gr_hub=0.593; ltf_cutoff=0.927; damping_factor=0.354 : y_1 = 0.608, y_2 = 0.0237, y_3 = 0.75, y_4 = 0.0359 : 636.5 secs : initdesign

[mbo] 0: source_weights=0.364,0.182,

[mbo] 0: source_weights=0.208,0.167,...; lr_sig_hub=0.0513; gr_hub=0.265; ltf_cutoff=0.983; damping_factor=0.262 : y_1 = 0.593, y_2 = 0.0268, y_3 = 0.78, y_4 = 0.0304 : 542.2 secs : initdesign

[mbo] 0: source_weights=0.638,0.517,...; lr_sig_hub=0.0381; gr_hub=0.334; ltf_cutoff=0.977; damping_factor=0.972 : y_1 = 0.608, y_2 = 0.0295, y_3 = 0.76, y_4 = 0.0301 : 553.8 secs : initdesign

[mbo] 0: source_weights=0.0306,0.568...; lr_sig_hub=0.569; gr_hub=0.135; ltf_cutoff=0.981; damping_factor=0.934 : y_1 = 0.6, y_2 = 0.0335, y_3 = 0.69, y_4 = 0.0221 : 577.0 secs : initdesign

[mbo] 0: source_weights=0.112,0.421,...; lr_sig_hub=0.801; gr_hub=0.72; ltf_cutoff=0.975; damping_factor=0.275 : y_1 = 0.588, y_2 = 0.0223, y_3 = 0.724, y_4 = 0.0258 : 470.7 secs : initdesign

[mbo] 0: source_weights=0.881,0.676,...; lr_sig_hub=0.39; gr_hub=0.0625; ltf_cutoff=0.991; damping_factor=0.858 : y_1 = 0.584, y_2 = 0.0307, y_3 = 0.72, y_4 = 0.0189 : 429.0 secs : initdesign

[mbo] 0: source_weights=0.101,0.364

[mbo] 2: source_weights=0.486,0.256,...; lr_sig_hub=0.633; gr_hub=0.563; ltf_cutoff=0.99; damping_factor=0.356 : y_1 = 0.572, y_2 = 0.0303, y_3 = 0.76, y_4 = 0.0258 : 486.2 secs : infill_dib

[mbo] 2: source_weights=0.958,0.648,...; lr_sig_hub=0.083; gr_hub=0.705; ltf_cutoff=0.962; damping_factor=0.284 : y_1 = 0.6, y_2 = 0.0213, y_3 = 0.76, y_4 = 0.0281 : 542.8 secs : infill_dib

[mbo] 2: source_weights=0.718,0.717,...; lr_sig_hub=0.691; gr_hub=0.899; ltf_cutoff=0.973; damping_factor=0.684 : y_1 = 0.591, y_2 = 0.0209, y_3 = 0.714, y_4 = 0.0161 : 550.5 secs : infill_dib

[mbo] 2: source_weights=0.5,0.271,0....; lr_sig_hub=0.518; gr_hub=0.932; ltf_cutoff=0.968; damping_factor=0.256 : y_1 = 0.595, y_2 = 0.0186, y_3 = 0.694, y_4 = 0.0213 : 483.0 secs : infill_dib

[mbo] 2: source_weights=0.0808,0.412...; lr_sig_hub=0.871; gr_hub=0.883; ltf_cutoff=0.916; damping_factor=0.831 : y_1 = 0.6, y_2 = 0.0191, y_3 = 0.68, y_4 = 0.0116 : 557.2 secs : infill_dib

[mbo] 2: source_weights=0.372,0.701,..

[mbo] 7: source_weights=0.537,0.253,...; lr_sig_hub=0.061; gr_hub=0.657; ltf_cutoff=0.958; damping_factor=0.28 : y_1 = 0.599, y_2 = 0.0168, y_3 = 0.76, y_4 = 0.0221 : 546.6 secs : infill_dib

[mbo] 7: source_weights=0.338,0.0879...; lr_sig_hub=0.365; gr_hub=0.384; ltf_cutoff=0.964; damping_factor=0.677 : y_1 = 0.602, y_2 = 0.0302, y_3 = 0.76, y_4 = 0.0304 : 565.4 secs : infill_dib

[mbo] 7: source_weights=0.576,0.706,...; lr_sig_hub=0.468; gr_hub=0.942; ltf_cutoff=0.921; damping_factor=0.301 : y_1 = 0.598, y_2 = 0.0153, y_3 = 0.71, y_4 = 0.0221 : 524.5 secs : infill_dib

[mbo] 7: source_weights=0.896,0.679,...; lr_sig_hub=0.541; gr_hub=0.214; ltf_cutoff=0.91; damping_factor=0.537 : y_1 = 0.604, y_2 = 0.0298, y_3 = 0.82, y_4 = 0.0359 : 746.7 secs : infill_dib

[mbo] 7: source_weights=0.721,0.18,0...; lr_sig_hub=0.646; gr_hub=0.0903; ltf_cutoff=0.948; damping_factor=0.871 : y_1 = 0.61, y_2 = 0.033, y_3 = 0.755, y_4 = 0.0304 : 512.7 secs : infill_dib

Mapping in parallel: mode = multicore