In [1]:
library(OmnipathR)
library(nichenetr)
library(tidyverse)
library(mlrMBO)
library(parallelMap)
library(dplyr)
library(community)

── [1mAttaching core tidyverse packages[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.4.4     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<ht

In [2]:
interactionFormatTransf <- function(InputDf, InteractionType){
  
    OutputInt <- tibble(from = character(), to = character(), 
        source = character(), database = character())  
    
    n <- nrow(InputDf)
    sources <- dplyr::pull(InputDf, sources)
    sourceNodes <- dplyr::pull(InputDf, from)
    targetNodes <- dplyr::pull(InputDf, to)
    
    for (i in seq(n)){
        currentSources <- unlist(strsplit(sources[i],";"))
        for (j in seq(length(currentSources))){
            OutputInt <- add_row(OutputInt, 
                from = sourceNodes[i] , 
                to = targetNodes[i],  
                # source = paste(currentSources[j], InteractionType, sep="_"),
                source = currentSources[j],
                database = currentSources[j]) 
        }
    }
    
    return(OutputInt)
}

In [6]:
data(LR_database)

In [7]:
lr <- LR_database

In [8]:
lr <- lr %>%
    select(Ligand,Receptor,sources) %>%
    rename(from=Ligand, to=Receptor) %>% 
    filter(from != to) %>% 
    distinct()

In [9]:
lr_Network_Omnipath <- 
    lr %>%
    interactionFormatTransf(InteractionType="LigrecExtra") %>%
    dplyr::distinct() 

In [10]:
saveRDS(lr_Network_Omnipath, 
    "NNET_Custom/lig_rec_sources.rds")

In [13]:
## We next get protein-protein interactions from the different datasets availabe
## in Omnipath
AllInteractions <- 
    import_post_translational_interactions(exclude = "ligrecextra") %>% 
    dplyr::select(source_genesymbol, target_genesymbol, sources) %>% 
    dplyr::rename(from=source_genesymbol, to=target_genesymbol) %>% 
    dplyr::filter(from != to) %>% 
    dplyr::distinct() 

In [None]:
ligands <- unique(pull(lr, from))

In [None]:
# sig_Network_Omnipath <- sig_Network_Omnipath_raw

In [None]:
# ## Original Omnipath interactions
# sig_Network_Omnipath <- 
#     interactionFormatTransf(AllInteractions, InteractionType="Signalling") %>%
#     dplyr::distinct() 

# ## I have to remove self-interactions in the signaling network
# sig_Network_Omnipath <- sig_Network_Omnipath %>% 
#     dplyr::filter(from != to)

# # ## I also have to remove interactions going to ligands. See Methods Nichenet 
# # ## paper
# # sig_Network_Omnipath <- sig_Network_Omnipath %>% 
# #     dplyr::filter(!(to %in% ligands))

# ## There are in addition some records containing not input gene, we remove them
# ## since they are giving problems with running the model.
# sig_Network_Omnipath <- sig_Network_Omnipath %>% 
#     dplyr::filter(from != "") %>% 
#     dplyr::filter(to != "")


# ## We also remove signaling interactions that are already in the lig-receptor 
# ## network. 
# sig_Network_Omnipath <- dplyr::anti_join(
#   sig_Network_Omnipath, 
#   lr_Network_Omnipath, 
#   by = c("from" = "from", "to" = "to"))

# nrow(sig_Network_Omnipath)

In [None]:
## Original Omnipath interactions
sig_Network_Omnipath <- 
    interactionFormatTransf(AllInteractions, InteractionType="Signalling") %>%
    dplyr::distinct() 

## I have to remove self-interactions in the signaling network
sig_Network_Omnipath <- sig_Network_Omnipath %>% 
    dplyr::filter(from != to)

# ## I also have to remove interactions going to ligands. See Methods Nichenet 
# ## paper
sig_Network_Omnipath <- sig_Network_Omnipath %>% 
    dplyr::filter(!(to %in% ligands))

## There are in addition some records containing not input gene, we remove them
## since they are giving problems with running the model.
sig_Network_Omnipath <- sig_Network_Omnipath %>% 
    dplyr::filter(from != "") %>% 
    dplyr::filter(to != "")


## We also remove signaling interactions that are already in the lig-receptor 
## network. 
sig_Network_Omnipath <- dplyr::anti_join(
  sig_Network_Omnipath, 
  lr_Network_Omnipath, 
  by = c("from" = "from", "to" = "to"))

nrow(sig_Network_Omnipath)

In [None]:
202063 == 163533

In [None]:
saveRDS(sig_Network_Omnipath, 
    "NNET_Custom/sig_Network.rds")

In [None]:
gr_Interactions_Omnipath <- 
    import_dorothea_interactions(dorothea_levels = c("A","B","C")) %>%  
    select(source_genesymbol, target_genesymbol, sources) %>%
    rename(from=source_genesymbol, to=target_genesymbol) %>% 
    filter(from != to) %>%
    distinct()  

gr_Network_Omnipath <- 
    interactionFormatTransf(
        gr_Interactions_Omnipath, 
        InteractionType="Dorothea") %>%
    dplyr::distinct() 
nrow(gr_Network_Omnipath)
## [1] 113897

In [None]:
82767

In [None]:
saveRDS(gr_Network_Omnipath,
    "NNET_Custom/gr_Network.rds")

In [None]:
expression_settings_validation <- readRDS(url("https://zenodo.org/record/8010790/files/expression_settings"))

# index <- which(!unlist(lapply(expression_settings_validation, 
#     function(x) any(x$from != "IFNA1"))))

# expression_settings_validation <- expression_settings_validation[-index]

The process described above requires an internet connection to retrieve data. However, our compute nodes on the server do not have an internet connection. Therefore, it is necessary to run the initial steps locally, where we have access to the internet.

Once these initial steps are completed, we can export the variables in a format that can be easily transferred to the server. For example, exporting the variables in .rds format ensures compatibility across different environments.

After exporting the variables, they can be transferred to the server and loaded for further processing. Since the subsequent steps are compute-intensive, running them on the server's dedicated compute nodes can take advantage of their higher processing power and resources.

In summary, the initial steps requiring an internet connection should be run locally, or on a server where internet access is available. The variables can then be exported and transferred to the server for subsequent compute-intensive steps. This approach allows us to leverage the server's resources while ensuring the availability of necessary data.

In [None]:
ls()

In [None]:
save(list=c("expression_settings_validation", "lr_Network_Omnipath", "sig_Network_Omnipath", "gr_Network_Omnipath"), 
     file="myvariables.RData")

In [None]:
# load("myvariables.RData")

In [None]:
All_sources <- unique(c(lr_Network_Omnipath$source,
    sig_Network_Omnipath$source, gr_Network_Omnipath$source))

my_source_weights_df <- 
     tibble(source = All_sources, weight = rep(1,length(All_sources)))

additional_arguments_topology_correction <- 
    list(source_names = my_source_weights_df$source %>% unique(), 
        algorithm = "PPR", 
        correct_topology = FALSE,
        lr_network = lr_Network_Omnipath, 
        sig_network = sig_Network_Omnipath, 
        gr_network = gr_Network_Omnipath, 
        settings = lapply(expression_settings_validation, 
            convert_expression_settings_evaluation), 
        secondary_targets = FALSE, 
        remove_direct_links = "no", 
        cutoff_method = "quantile")

nr_datasources <- additional_arguments_topology_correction$source_names %>% 
    length()

obj_fun_multi_topology_correction = makeMultiObjectiveFunction(name = "nichenet_optimization",
    description = "data source weight and hyperparameter optimization: expensive black-box function", 
    fn = model_evaluation_optimization, 
    par.set = makeParamSet(
        makeNumericVectorParam("source_weights", len = nr_datasources, 
            lower = 0, upper = 1, tunable = FALSE), 
        makeNumericVectorParam("lr_sig_hub", len = 1, lower = 0, upper = 1, 
            tunable = TRUE),  
        makeNumericVectorParam("gr_hub", len = 1, lower = 0, upper = 1, 
            tunable = TRUE),  
        makeNumericVectorParam("ltf_cutoff", len = 1, lower = 0.9, 
            upper = 0.999, tunable = TRUE),  
        makeNumericVectorParam("damping_factor", len = 1, lower = 0.01, 
            upper = 0.99, tunable =TRUE)), 
    has.simple.signature = FALSE,
    n.objectives = 4, 
    noisy = FALSE,
    minimize = c(FALSE,FALSE,FALSE,FALSE))

optimization_results = 
    lapply(1,mlrmbo_optimization, obj_fun = obj_fun_multi_topology_correction, 
           niter = 8, ncores = 8, nstart = 160, 
           additional_arguments = additional_arguments_topology_correction)

saveRDS(optimization_results, "NNET_Custom/Optimization_results.rds")

[mbo] 0: source_weights=0.151,0.166,...; lr_sig_hub=0.656; gr_hub=0.423; ltf_cutoff=0.969; damping_factor=0.919 : y_1 = 0.6, y_2 = 0.033, y_3 = 0.69, y_4 = 0.0189 : 581.7 secs : initdesign

[mbo] 0: source_weights=0.174,0.285,...; lr_sig_hub=0.0992; gr_hub=0.799; ltf_cutoff=0.915; damping_factor=0.943 : y_1 = 0.608, y_2 = 0.0175, y_3 = 0.76, y_4 = 0.0221 : 672.0 secs : initdesign

[mbo] 0: source_weights=0.952,0.0177...; lr_sig_hub=0.615; gr_hub=0.0845; ltf_cutoff=0.946; damping_factor=0.0838 : y_1 = 0.596, y_2 = 0.0224, y_3 = 0.78, y_4 = 0.026 : 517.3 secs : initdesign

[mbo] 0: source_weights=0.635,0.544,...; lr_sig_hub=0.867; gr_hub=0.418; ltf_cutoff=0.959; damping_factor=0.112 : y_1 = 0.591, y_2 = 0.0221, y_3 = 0.72, y_4 = 0.0258 : 517.1 secs : initdesign

[mbo] 0: source_weights=0.613,0.54,0...; lr_sig_hub=0.843; gr_hub=0.111; ltf_cutoff=0.955; damping_factor=0.539 : y_1 = 0.597, y_2 = 0.0297, y_3 = 0.75, y_4 = 0.0258 : 510.9 secs : initdesign

[mbo] 0: source_weights=0.592,0.904,

[mbo] 0: source_weights=0.691,0.317,...; lr_sig_hub=0.56; gr_hub=0.373; ltf_cutoff=0.921; damping_factor=0.505 : y_1 = 0.603, y_2 = 0.0278, y_3 = 0.8, y_4 = 0.0412 : 515.6 secs : initdesign

[mbo] 0: source_weights=0.356,0.312,...; lr_sig_hub=0.112; gr_hub=0.0627; ltf_cutoff=0.954; damping_factor=0.463 : y_1 = 0.609, y_2 = 0.0283, y_3 = 0.765, y_4 = 0.0518 : 445.5 secs : initdesign

[mbo] 0: source_weights=0.217,0.813,...; lr_sig_hub=0.732; gr_hub=0.53; ltf_cutoff=0.973; damping_factor=0.34 : y_1 = 0.588, y_2 = 0.0222, y_3 = 0.72, y_4 = 0.0221 : 555.9 secs : initdesign

[mbo] 0: source_weights=0.279,0.0321...; lr_sig_hub=0.784; gr_hub=0.324; ltf_cutoff=0.975; damping_factor=0.422 : y_1 = 0.591, y_2 = 0.0331, y_3 = 0.776, y_4 = 0.0258 : 628.4 secs : initdesign

[mbo] 0: source_weights=0.77,0.999,0...; lr_sig_hub=0.673; gr_hub=0.469; ltf_cutoff=0.977; damping_factor=0.763 : y_1 = 0.592, y_2 = 0.0284, y_3 = 0.72, y_4 = 0.0189 : 605.6 secs : initdesign

[mbo] 0: source_weights=0.561,0.208,

[mbo] 2: source_weights=0.6,0.552,0....; lr_sig_hub=0.882; gr_hub=0.276; ltf_cutoff=0.962; damping_factor=0.686 : y_1 = 0.599, y_2 = 0.0314, y_3 = 0.78, y_4 = 0.0304 : 622.3 secs : infill_dib

[mbo] 2: source_weights=0.16,0.0743,...; lr_sig_hub=0.091; gr_hub=0.581; ltf_cutoff=0.956; damping_factor=0.347 : y_1 = 0.602, y_2 = 0.0236, y_3 = 0.78, y_4 = 0.0304 : 545.8 secs : infill_dib

[mbo] 2: source_weights=0.0667,0.698...; lr_sig_hub=0.39; gr_hub=0.25; ltf_cutoff=0.956; damping_factor=0.692 : y_1 = 0.61, y_2 = 0.0312, y_3 = 0.72, y_4 = 0.0246 : 507.4 secs : infill_dib

[mbo] 2: source_weights=0.503,0.354,...; lr_sig_hub=0.518; gr_hub=0.178; ltf_cutoff=0.911; damping_factor=0.801 : y_1 = 0.608, y_2 = 0.0285, y_3 = 0.8, y_4 = 0.0429 : 757.9 secs : infill_dib

[mbo] 2: source_weights=0.649,0.72,0...; lr_sig_hub=0.126; gr_hub=0.772; ltf_cutoff=0.94; damping_factor=0.631 : y_1 = 0.6, y_2 = 0.0161, y_3 = 0.73, y_4 = 0.0189 : 663.2 secs : infill_dib

[mbo] 2: source_weights=0.82,0.161,0...; l

[mbo] 7: source_weights=0.939,0.706,...; lr_sig_hub=0.845; gr_hub=0.101; ltf_cutoff=0.924; damping_factor=0.96 : y_1 = 0.609, y_2 = 0.031, y_3 = 0.82, y_4 = 0.0359 : 524.6 secs : infill_dib

[mbo] 7: source_weights=0.85,0.641,0...; lr_sig_hub=0.1; gr_hub=0.778; ltf_cutoff=0.988; damping_factor=0.316 : y_1 = 0.585, y_2 = 0.0217, y_3 = 0.737, y_4 = 0.0258 : 518.6 secs : infill_dib

[mbo] 7: source_weights=0.547,0.561,...; lr_sig_hub=0.215; gr_hub=0.875; ltf_cutoff=0.968; damping_factor=0.736 : y_1 = 0.602, y_2 = 0.0188, y_3 = 0.72, y_4 = 0.0161 : 586.5 secs : infill_dib

[mbo] 7: source_weights=0.546,0.252,...; lr_sig_hub=0.453; gr_hub=0.448; ltf_cutoff=0.929; damping_factor=0.861 : y_1 = 0.614, y_2 = 0.0329, y_3 = 0.684, y_4 = 0.0221 : 660.9 secs : infill_dib

[mbo] 7: source_weights=0.238,0.421,...; lr_sig_hub=0.928; gr_hub=0.881; ltf_cutoff=0.94; damping_factor=0.338 : y_1 = 0.59, y_2 = 0.0199, y_3 = 0.7, y_4 = 0.0179 : 825.3 secs : infill_dib

Mapping in parallel: mode = multicore; l