In [1]:
library(OmnipathR)
library(nichenetr)
library(tidyverse)
library(mlrMBO)
library(parallelMap)
library(dplyr)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.4.2      [32m✔[39m [34mpurrr  [39m 0.3.5 
[32m✔[39m [34mtibble [39m 3.2.1      [32m✔[39m [34mdplyr  [39m 1.0.10
[32m✔[39m [34mtidyr  [39m 1.2.1      [32m✔[39m [34mstringr[39m 1.5.0 
[32m✔[39m [34mreadr  [39m 2.1.3      [32m✔[39m [34mforcats[39m 0.5.2 
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
Loading required package: mlr

Loading required package: ParamHelpers

Future development will only happen in 'mlr3'
(<https://mlr3.mlr-org.com>). Due to the focus on 'mlr3' there might be
uncaught bugs meanwhile in {mlr} - please consider switching.

Loading required package: smoof

Loading required package: checkmate



In [2]:
interactionFormatTransf <- function(InputDf, InteractionType){
  
    OutputInt <- tibble(from = character(), to = character(), 
        source = character(), database = character())  
    
    n <- nrow(InputDf)
    sources <- dplyr::pull(InputDf, sources)
    sourceNodes <- dplyr::pull(InputDf, from)
    targetNodes <- dplyr::pull(InputDf, to)
    
    for (i in seq(n)){
        currentSources <- unlist(strsplit(sources[i],";"))
        for (j in seq(length(currentSources))){
            OutputInt <- add_row(OutputInt, 
                from = sourceNodes[i] , 
                to = targetNodes[i],  
                # source = paste(currentSources[j], InteractionType, sep="_"),
                source = currentSources[j],
                database = currentSources[j]) 
        }
    }
    
    return(OutputInt)
}

In [6]:
lr <- read.csv("../../run_CellPhoneDB/build_customDB/intersect_db.csv")

In [8]:
lr <- lr %>%
    select(protein_name_a,protein_name_b,sources) %>%
    rename(from=protein_name_a, to=protein_name_b) %>% 
    filter(from != to) %>% 
    distinct()

In [9]:
lr_Network_Omnipath <- 
    lr %>%
    interactionFormatTransf(InteractionType="LigrecExtra") %>%
    dplyr::distinct() 

In [10]:
saveRDS(lr_Network_Omnipath, 
    "../../../../../results/method_comparison/build_customDB/NicheNet/lig_rec_sources.rds")

In [11]:
## We next get protein-protein interactions from the different datasets availabe
## in Omnipath
AllInteractions <- 
    import_post_translational_interactions(exclude = "ligrecextra") %>% 
    dplyr::select(source_genesymbol, target_genesymbol, sources) %>% 
    dplyr::rename(from=source_genesymbol, to=target_genesymbol) %>% 
    dplyr::filter(from != to) %>% 
    dplyr::distinct() 

In [12]:
ligands <- unique(pull(lr, from))

In [9]:
# sig_Network_Omnipath <- sig_Network_Omnipath_raw

In [14]:
# ## Original Omnipath interactions
# sig_Network_Omnipath <- 
#     interactionFormatTransf(AllInteractions, InteractionType="Signalling") %>%
#     dplyr::distinct() 

# ## I have to remove self-interactions in the signaling network
# sig_Network_Omnipath <- sig_Network_Omnipath %>% 
#     dplyr::filter(from != to)

# # ## I also have to remove interactions going to ligands. See Methods Nichenet 
# # ## paper
# # sig_Network_Omnipath <- sig_Network_Omnipath %>% 
# #     dplyr::filter(!(to %in% ligands))

# ## There are in addition some records containing not input gene, we remove them
# ## since they are giving problems with running the model.
# sig_Network_Omnipath <- sig_Network_Omnipath %>% 
#     dplyr::filter(from != "") %>% 
#     dplyr::filter(to != "")


# ## We also remove signaling interactions that are already in the lig-receptor 
# ## network. 
# sig_Network_Omnipath <- dplyr::anti_join(
#   sig_Network_Omnipath, 
#   lr_Network_Omnipath, 
#   by = c("from" = "from", "to" = "to"))

# nrow(sig_Network_Omnipath)

In [15]:
## Original Omnipath interactions
sig_Network_Omnipath <- 
    interactionFormatTransf(AllInteractions, InteractionType="Signalling") %>%
    dplyr::distinct() 

## I have to remove self-interactions in the signaling network
sig_Network_Omnipath <- sig_Network_Omnipath %>% 
    dplyr::filter(from != to)

# ## I also have to remove interactions going to ligands. See Methods Nichenet 
# ## paper
sig_Network_Omnipath <- sig_Network_Omnipath %>% 
    dplyr::filter(!(to %in% ligands))

## There are in addition some records containing not input gene, we remove them
## since they are giving problems with running the model.
sig_Network_Omnipath <- sig_Network_Omnipath %>% 
    dplyr::filter(from != "") %>% 
    dplyr::filter(to != "")


## We also remove signaling interactions that are already in the lig-receptor 
## network. 
sig_Network_Omnipath <- dplyr::anti_join(
  sig_Network_Omnipath, 
  lr_Network_Omnipath, 
  by = c("from" = "from", "to" = "to"))

nrow(sig_Network_Omnipath)

In [None]:
202063

In [16]:
saveRDS(sig_Network_Omnipath, 
    "../../../../../results/method_comparison/build_customDB/NicheNet/sig_Network.rds")

In [17]:
gr_Interactions_Omnipath <- 
    import_dorothea_interactions(dorothea_levels = c("A","B","C")) %>%  
    select(source_genesymbol, target_genesymbol, sources) %>%
    rename(from=source_genesymbol, to=target_genesymbol) %>% 
    filter(from != to) %>%
    distinct()  

gr_Network_Omnipath <- 
    interactionFormatTransf(
        gr_Interactions_Omnipath, 
        InteractionType="Dorothea") %>%
    dplyr::distinct() 
nrow(gr_Network_Omnipath)
## [1] 113897

In [2]:
saveRDS(gr_Network_Omnipath,
    "../../../../../results/method_comparison/build_customDB/NicheNet/gr_Network.rds")

In [20]:
expression_settings_validation <- 
    readRDS("../../../../../results/method_comparison/build_customDB/NicheNet/expression_settings.rds")

# index <- which(!unlist(lapply(expression_settings_validation, 
#     function(x) any(x$from != "IFNA1"))))

# expression_settings_validation <- expression_settings_validation[-index]

The process described above requires an internet connection to retrieve data. However, our compute nodes on the server do not have an internet connection. Therefore, it is necessary to run the initial steps locally, where we have access to the internet.

Once these initial steps are completed, we can export the variables in a format that can be easily transferred to the server. For example, exporting the variables in .rds format ensures compatibility across different environments.

After exporting the variables, they can be transferred to the server and loaded for further processing. Since the subsequent steps are compute-intensive, running them on the server's dedicated compute nodes can take advantage of their higher processing power and resources.

In summary, the initial steps requiring an internet connection should be run locally, or on a server where internet access is available. The variables can then be exported and transferred to the server for subsequent compute-intensive steps. This approach allows us to leverage the server's resources while ensuring the availability of necessary data.

In [23]:
ls()

In [24]:
save(list=c("expression_settings_validation", "lr_Network_Omnipath", "sig_Network_Omnipath", "gr_Network_Omnipath"), 
     file="myvariables.RData")

In [1]:
load("myvariables.RData")

In [15]:
All_sources <- unique(c(lr_Network_Omnipath$source,
    sig_Network_Omnipath$source, gr_Network_Omnipath$source))

my_source_weights_df <- 
     tibble(source = All_sources, weight = rep(1,length(All_sources)))

additional_arguments_topology_correction <- 
    list(source_names = my_source_weights_df$source %>% unique(), 
        algorithm = "PPR", 
        correct_topology = FALSE,
        lr_network = lr_Network_Omnipath, 
        sig_network = sig_Network_Omnipath, 
        gr_network = gr_Network_Omnipath, 
        settings = lapply(expression_settings_validation, 
            convert_expression_settings_evaluation), 
        secondary_targets = FALSE, 
        remove_direct_links = "no", 
        cutoff_method = "quantile")

nr_datasources <- additional_arguments_topology_correction$source_names %>% 
    length()

obj_fun_multi_topology_correction = makeMultiObjectiveFunction(name = "nichenet_optimization",
    description = "data source weight and hyperparameter optimization: expensive black-box function", 
    fn = model_evaluation_optimization, 
    par.set = makeParamSet(
        makeNumericVectorParam("source_weights", len = nr_datasources, 
            lower = 0, upper = 1, tunable = FALSE), 
        makeNumericVectorParam("lr_sig_hub", len = 1, lower = 0, upper = 1, 
            tunable = TRUE),  
        makeNumericVectorParam("gr_hub", len = 1, lower = 0, upper = 1, 
            tunable = TRUE),  
        makeNumericVectorParam("ltf_cutoff", len = 1, lower = 0.9, 
            upper = 0.999, tunable = TRUE),  
        makeNumericVectorParam("damping_factor", len = 1, lower = 0.01, 
            upper = 0.99, tunable =TRUE)), 
    has.simple.signature = FALSE,
    n.objectives = 4, 
    noisy = FALSE,
    minimize = c(FALSE,FALSE,FALSE,FALSE))

optimization_results = 
    lapply(1,mlrmbo_optimization, obj_fun = obj_fun_multi_topology_correction, 
           niter = 8, ncores = 8, nstart = 160, 
           additional_arguments = additional_arguments_topology_correction)

saveRDS(optimization_results, "../../../../../results/method_comparison/build_customDB/NicheNet/Optimization_results.rds")

Loading required package: mco

Loading required package: emoa


Attaching package: ‘emoa’


The following object is masked from ‘package:dplyr’:

    coalesce


Starting parallelization in mode=multicore with cpus=8.



    source_weights1 source_weights2 source_weights3 source_weights4
1       0.573086530     0.197161192      0.84716006     0.673123345
2       0.835907162     0.733864774      0.79758742     0.885643229
3       0.538408570     0.809848102      0.29117820     0.068182781
4       0.881976855     0.934818693      0.28682059     0.698499324
5       0.957792874     0.466725397      0.21268005     0.910012755
6       0.106095174     0.278167165      0.10441599     0.597878822
7       0.426559470     0.022788042      0.77997195     0.122584433
8       0.607309964     0.821902719      0.26350140     0.976788360
9       0.535489195     0.764853560      0.87952276     0.927923836
10      0.948147720     0.804371007      0.48468673     0.205832290
11      0.308958887     0.685656322      0.44004282     0.251964406
12      0.734762657     0.252156479      0.38420661     0.556088028
13      0.924486880     0.267172686      0.14170283     0.438851193
14      0.460601223     0.563334274      0.67191

Computing y column(s) for design. Not provided.

Mapping in parallel: mode = multicore; level = mlrMBO.feval; cpus = 8; elements = 160.

[mbo] 0: source_weights=0.573,0.197,...; lr_sig_hub=0.303; gr_hub=0.363; ltf_cutoff=0.991; damping_factor=0.797 : y_1 = 0.588, y_2 = 0.0298, y_3 = 0.7, y_4 = 0.0221 : 469.7 secs : initdesign

[mbo] 0: source_weights=0.836,0.734,...; lr_sig_hub=0.8; gr_hub=0.242; ltf_cutoff=0.914; damping_factor=0.267 : y_1 = 0.605, y_2 = 0.0276, y_3 = 0.76, y_4 = 0.0361 : 597.3 secs : initdesign

[mbo] 0: source_weights=0.538,0.81,0...; lr_sig_hub=0.294; gr_hub=0.988; ltf_cutoff=0.966; damping_factor=0.301 : y_1 = 0.597, y_2 = 0.0181, y_3 = 0.684, y_4 = 0.019 : 563.7 secs : initdesign

[mbo] 0: source_weights=0.882,0.935,...; lr_sig_hub=0.713; gr_hub=0.0969; ltf_cutoff=0.908; damping_factor=0.697 : y_1 = 0.606, y_2 = 0.031, y_3 = 0.79, y_4 = 0.0429 : 634.1 secs : initdesign

[mbo] 0: source_weights=0.958,0.467,...; lr_sig_hub=0.74; gr_hub=0.0589; ltf_cutoff=0.928; dam

[mbo] 0: source_weights=0.0224,0.355...; lr_sig_hub=0.767; gr_hub=0.484; ltf_cutoff=0.917; damping_factor=0.433 : y_1 = 0.601, y_2 = 0.0261, y_3 = 0.776, y_4 = 0.0359 : 727.7 secs : initdesign

[mbo] 0: source_weights=0.987,0.507,...; lr_sig_hub=0.897; gr_hub=0.129; ltf_cutoff=0.921; damping_factor=0.045 : y_1 = 0.605, y_2 = 0.0268, y_3 = 0.76, y_4 = 0.0337 : 520.2 secs : initdesign

[mbo] 0: source_weights=0.595,0.371,...; lr_sig_hub=0.548; gr_hub=0.452; ltf_cutoff=0.987; damping_factor=0.26 : y_1 = 0.577, y_2 = 0.0277, y_3 = 0.78, y_4 = 0.0359 : 603.8 secs : initdesign

[mbo] 0: source_weights=0.319,0.0962...; lr_sig_hub=0.574; gr_hub=0.297; ltf_cutoff=0.903; damping_factor=0.0208 : y_1 = 0.603, y_2 = 0.0232, y_3 = 0.684, y_4 = 0.0359 : 592.8 secs : initdesign

[mbo] 0: source_weights=0.742,0.789,...; lr_sig_hub=0.166; gr_hub=0.304; ltf_cutoff=0.904; damping_factor=0.632 : y_1 = 0.612, y_2 = 0.0299, y_3 = 0.75, y_4 = 0.0281 : 774.2 secs : initdesign

[mbo] 0: source_weights=0.876,0.3

[mbo] 0: source_weights=0.151,0.166,...; lr_sig_hub=0.656; gr_hub=0.423; ltf_cutoff=0.969; damping_factor=0.919 : y_1 = 0.6, y_2 = 0.033, y_3 = 0.69, y_4 = 0.0189 : 581.7 secs : initdesign

[mbo] 0: source_weights=0.174,0.285,...; lr_sig_hub=0.0992; gr_hub=0.799; ltf_cutoff=0.915; damping_factor=0.943 : y_1 = 0.608, y_2 = 0.0175, y_3 = 0.76, y_4 = 0.0221 : 672.0 secs : initdesign

[mbo] 0: source_weights=0.952,0.0177...; lr_sig_hub=0.615; gr_hub=0.0845; ltf_cutoff=0.946; damping_factor=0.0838 : y_1 = 0.596, y_2 = 0.0224, y_3 = 0.78, y_4 = 0.026 : 517.3 secs : initdesign

[mbo] 0: source_weights=0.635,0.544,...; lr_sig_hub=0.867; gr_hub=0.418; ltf_cutoff=0.959; damping_factor=0.112 : y_1 = 0.591, y_2 = 0.0221, y_3 = 0.72, y_4 = 0.0258 : 517.1 secs : initdesign

[mbo] 0: source_weights=0.613,0.54,0...; lr_sig_hub=0.843; gr_hub=0.111; ltf_cutoff=0.955; damping_factor=0.539 : y_1 = 0.597, y_2 = 0.0297, y_3 = 0.75, y_4 = 0.0258 : 510.9 secs : initdesign

[mbo] 0: source_weights=0.592,0.904,

[mbo] 0: source_weights=0.691,0.317,...; lr_sig_hub=0.56; gr_hub=0.373; ltf_cutoff=0.921; damping_factor=0.505 : y_1 = 0.603, y_2 = 0.0278, y_3 = 0.8, y_4 = 0.0412 : 515.6 secs : initdesign

[mbo] 0: source_weights=0.356,0.312,...; lr_sig_hub=0.112; gr_hub=0.0627; ltf_cutoff=0.954; damping_factor=0.463 : y_1 = 0.609, y_2 = 0.0283, y_3 = 0.765, y_4 = 0.0518 : 445.5 secs : initdesign

[mbo] 0: source_weights=0.217,0.813,...; lr_sig_hub=0.732; gr_hub=0.53; ltf_cutoff=0.973; damping_factor=0.34 : y_1 = 0.588, y_2 = 0.0222, y_3 = 0.72, y_4 = 0.0221 : 555.9 secs : initdesign

[mbo] 0: source_weights=0.279,0.0321...; lr_sig_hub=0.784; gr_hub=0.324; ltf_cutoff=0.975; damping_factor=0.422 : y_1 = 0.591, y_2 = 0.0331, y_3 = 0.776, y_4 = 0.0258 : 628.4 secs : initdesign

[mbo] 0: source_weights=0.77,0.999,0...; lr_sig_hub=0.673; gr_hub=0.469; ltf_cutoff=0.977; damping_factor=0.763 : y_1 = 0.592, y_2 = 0.0284, y_3 = 0.72, y_4 = 0.0189 : 605.6 secs : initdesign

[mbo] 0: source_weights=0.561,0.208,

[mbo] 2: source_weights=0.6,0.552,0....; lr_sig_hub=0.882; gr_hub=0.276; ltf_cutoff=0.962; damping_factor=0.686 : y_1 = 0.599, y_2 = 0.0314, y_3 = 0.78, y_4 = 0.0304 : 622.3 secs : infill_dib

[mbo] 2: source_weights=0.16,0.0743,...; lr_sig_hub=0.091; gr_hub=0.581; ltf_cutoff=0.956; damping_factor=0.347 : y_1 = 0.602, y_2 = 0.0236, y_3 = 0.78, y_4 = 0.0304 : 545.8 secs : infill_dib

[mbo] 2: source_weights=0.0667,0.698...; lr_sig_hub=0.39; gr_hub=0.25; ltf_cutoff=0.956; damping_factor=0.692 : y_1 = 0.61, y_2 = 0.0312, y_3 = 0.72, y_4 = 0.0246 : 507.4 secs : infill_dib

[mbo] 2: source_weights=0.503,0.354,...; lr_sig_hub=0.518; gr_hub=0.178; ltf_cutoff=0.911; damping_factor=0.801 : y_1 = 0.608, y_2 = 0.0285, y_3 = 0.8, y_4 = 0.0429 : 757.9 secs : infill_dib

[mbo] 2: source_weights=0.649,0.72,0...; lr_sig_hub=0.126; gr_hub=0.772; ltf_cutoff=0.94; damping_factor=0.631 : y_1 = 0.6, y_2 = 0.0161, y_3 = 0.73, y_4 = 0.0189 : 663.2 secs : infill_dib

[mbo] 2: source_weights=0.82,0.161,0...; l

[mbo] 7: source_weights=0.939,0.706,...; lr_sig_hub=0.845; gr_hub=0.101; ltf_cutoff=0.924; damping_factor=0.96 : y_1 = 0.609, y_2 = 0.031, y_3 = 0.82, y_4 = 0.0359 : 524.6 secs : infill_dib

[mbo] 7: source_weights=0.85,0.641,0...; lr_sig_hub=0.1; gr_hub=0.778; ltf_cutoff=0.988; damping_factor=0.316 : y_1 = 0.585, y_2 = 0.0217, y_3 = 0.737, y_4 = 0.0258 : 518.6 secs : infill_dib

[mbo] 7: source_weights=0.547,0.561,...; lr_sig_hub=0.215; gr_hub=0.875; ltf_cutoff=0.968; damping_factor=0.736 : y_1 = 0.602, y_2 = 0.0188, y_3 = 0.72, y_4 = 0.0161 : 586.5 secs : infill_dib

[mbo] 7: source_weights=0.546,0.252,...; lr_sig_hub=0.453; gr_hub=0.448; ltf_cutoff=0.929; damping_factor=0.861 : y_1 = 0.614, y_2 = 0.0329, y_3 = 0.684, y_4 = 0.0221 : 660.9 secs : infill_dib

[mbo] 7: source_weights=0.238,0.421,...; lr_sig_hub=0.928; gr_hub=0.881; ltf_cutoff=0.94; damping_factor=0.338 : y_1 = 0.59, y_2 = 0.0199, y_3 = 0.7, y_4 = 0.0179 : 825.3 secs : infill_dib

Mapping in parallel: mode = multicore; l