In [1]:
### Script to compare the results from different MOFA runs

#############################################
# Prerequisites - Load Libraries

In [2]:
source('MS1_Functions.r')

In [3]:
### Inform about execution start
popup_function_pos('07_Compare_Models: Execution Started')

In [4]:
source('MS0_Libraries.r')

[1] "/opt/conda/envs/mofa_analysis/lib/R/library"



Attaching package: 'SeuratObject'


The following objects are masked from 'package:base':

    intersect, t



Attaching package: 'dplyr'


The following objects are masked from 'package:data.table':

    between, first, last


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


-- [1mAttaching core tidyverse packages[22m -------------------------------------------- tidyverse 2.0.0 --
[32mv[39m [34mforcats  [39m 1.0.0     [32mv[39m [34mreadr    [39m 2.1.4
[32mv[39m [34mggplot2  [39m 3.5.1     [32mv[39m [34mtibble   [39m 3.2.1
[32mv[39m [34mlubridate[39m 1.9.3     [32mv[39m [34mtidyr    [39m 1.3.1
[32mv[39m [34mpurrr    [39m 1.0.2     
-- [1mConflicts[22m -------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mbetween()[39m     masks [34mdata.table[39m::between()
[31mx

In [5]:
source('MS2_Plot_Config.r')

"[1m[22mThe `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
[36mi[39m Please use the `linewidth` argument instead."


###############################################
# Preqrequisites Configurations & Parameters

In [76]:
### Load the parameters that are set via the configuration files

In [77]:
### Load configurations file
global_configs = read.csv('configurations/Data_Configs.csv', sep = ',')

“incomplete final line found by readTableHeader on 'configurations/Data_Configs.csv'”


In [78]:
head(global_configs,2)

Unnamed: 0_level_0,parameter,value
Unnamed: 0_level_1,<chr>,<chr>
1,data_path,/lustre/groups/epigenereg01/workspace/projects/jove/input_data/
2,result_path,/lustre/groups/epigenereg01/workspace/projects/jove/results/


In [79]:
data_path = global_configs$value[global_configs$parameter == 'data_path']

In [80]:
data_path

In [81]:
result_path = global_configs$value[global_configs$parameter == 'result_path']

In [82]:
result_path

In [83]:
### Load the configuration file that specifies which MOFA models should be compared

In [84]:
comparison_configs = read.csv('configurations/07_Comparison_Configs.csv', sep = ',')

“incomplete final line found by readTableHeader on 'configurations/07_Comparison_Configs.csv'”


In [85]:
head(comparison_configs,2)

Unnamed: 0_level_0,mofa_result_name,compare_factors
Unnamed: 0_level_1,<chr>,<chr>
1,MI_v1_MOFA,"Factor1,Factor2,Factor3,Factor4,Factor5"
2,MI_v1_MOFA_10,"Factor1,Factor2,Factor3,Factor4,Factor5"


In [86]:
### Generate the result data directory if it does not exist yet
if(!file.exists(paste0(result_path, '07_results'))){
    dir.create(file.path(paste0(result_path, '07_results')))
    }

# Load Data 

## MOFA Models

In [87]:
### Load the trained MOFA Models

In [88]:
model_list = list()

In [89]:
comparison_configs$mofa_result_name

In [90]:
for(i in comparison_configs$mofa_result_name){
    model_name = paste0("03_MOFA_MODEL_", i, '.hdf5') # added number of factors in review to test different 
    outfile = file.path( paste0(result_path, '/03_results/',  model_name) )
    model_list[[as.character(i)]] <- load_model(outfile, verbose = TRUE)
    }

Loading data...

Loading expectations for 2 nodes...

Loading model options...

Loading training options and statistics...

Assigning names to the different dimensions...

Re-ordering factors by their variance explained...

Doing quality control...

Checking views names...

Checking groups names...

Checking samples names...

Checking features names...

Checking dimensions...

Checking there are no features with complete missing values...

Checking sample covariates...

Checking expectations...

Checking for intercept factors...

“Factor(s) 5, 9, 18 are strongly correlated with the total number of expressed features for at least one of your omics. Such factors appear when there are differences in the total 'levels' between your samples, *sometimes* because of poor normalisation in the preprocessing steps.
”
Checking for highly correlated factors...

Loading data...

Loading expectations for 2 nodes...

Loading model options...

Loading training options and statistics...

Assigning name

In [91]:
names(model_list)

In [92]:
#str(model_list)

In [93]:
print(length(model_list))

[1] 4


# Downstream Analysis of generated models

## Compare total explained variance

In [94]:
### Compare the explained variance per view between the models and plot the figure

In [95]:
lapply(model_list, function(x){
    mean(x@cache$variance_explained$r2_total[[1]])
    })
### 20 Factor model used: 35.7461964551804   

In [96]:
### compare explained variance per view

In [97]:
var_explained_per_view = lapply(model_list, function(x){
    x@cache[["variance_explained"]]$r2_total
    })

In [98]:
var_explained_per_view_data = data.frame()

for(i in names(var_explained_per_view)){
    data = var_explained_per_view[[i]]$group1
    data = as.data.frame(data)
    data$view = rownames(data)
    data$view = str_replace_all(data$view, ' ', '')
    data$view = str_replace_all(data$view, '\\.', '')
    data$configuration = i
    
    var_explained_per_view_data = rbind(data, var_explained_per_view_data)
    }

In [99]:
head(var_explained_per_view_data,2)

Unnamed: 0_level_0,data,view,configuration
Unnamed: 0_level_1,<dbl>,<chr>,<chr>
B.cell,40.33109,Bcell,MI_v1_MOFA_25
CD14.Mono,63.11763,CD14Mono,MI_v1_MOFA_25


In [100]:
unique(var_explained_per_view_data$configuration)

In [101]:
# Specifies the figure name
figure_name = paste0("FIG07_Variance_Model_Comparison")

In [102]:
# Specifies the sizes of the plot
width_par = 6
height_par =4

In [103]:
plot = ggplot(var_explained_per_view_data, aes(x=view, y=data, fill=configuration, order = configuration)) + 
    geom_bar(position="dodge", stat="identity") + ylab('amount_explained_variance') + plot_config + scale_fill_okabe_ito() + coord_flip() + ylab('Total percentage of explained variance')
  # TBD: improve coloring + selection

#plot
pdf(paste0('figures/07_figures/', figure_name, '.pdf'), width =width_par, height =height_par)
print( plot)
dev.off()

## Compare factor weights of samples

In [104]:
## Compare the sample factor weights by correlating them (it is necessary that the factors match between the different models)

In [105]:
factor_weights = lapply(model_list, function(x){
    get_factors(x, factors = 'all')$group1
    })

In [106]:
factor_weight_data = data.frame()

for(i in names(factor_weights)){
    data = factor_weights[[i]]
    data = as.data.frame(data)
    data$configuration = i
    data$sample = rownames(data)
    data = melt(data)
    ## filter on relevant factors
    data = data[data$variable %in% unlist(str_split(comparison_configs$compare_factors[comparison_configs$mofa_result_name == i], ',')),]
    
    
    factor_weight_data = rbind(data, factor_weight_data)
    }

Using configuration, sample as id variables

Using configuration, sample as id variables

Using configuration, sample as id variables

Using configuration, sample as id variables



In [107]:
factor_weight_data$configuration = paste0(factor_weight_data$variable, '_', factor_weight_data$configuration)

In [108]:
head(factor_weight_data,2)

Unnamed: 0_level_0,configuration,sample,variable,value
Unnamed: 0_level_1,<chr>,<chr>,<fct>,<dbl>
1,Factor1_MI_v1_MOFA_25,k1,Factor1,0.3985386
2,Factor1_MI_v1_MOFA_25,k10,Factor1,0.7256701


In [109]:
unique(factor_weight_data$variable)

In [110]:
cor_data = factor_weight_data %>% dcast(sample ~ configuration, value.var = 'value')

In [111]:
head(cor_data,2)

Unnamed: 0_level_0,sample,Factor1_MI_v1_MOFA,Factor1_MI_v1_MOFA_10,Factor1_MI_v1_MOFA_15,Factor1_MI_v1_MOFA_25,Factor2_MI_v1_MOFA,Factor2_MI_v1_MOFA_10,Factor2_MI_v1_MOFA_15,Factor2_MI_v1_MOFA_25,Factor3_MI_v1_MOFA,⋯,Factor3_MI_v1_MOFA_15,Factor3_MI_v1_MOFA_25,Factor4_MI_v1_MOFA,Factor4_MI_v1_MOFA_10,Factor4_MI_v1_MOFA_15,Factor4_MI_v1_MOFA_25,Factor5_MI_v1_MOFA,Factor5_MI_v1_MOFA_10,Factor5_MI_v1_MOFA_15,Factor5_MI_v1_MOFA_25
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,k1,0.1201561,1.0237531,0.005441715,0.3985386,-1.6391856,0.8016962,-2.085861,-1.031475,-0.4479716,⋯,-0.3240436,-0.2473916,0.6821012,-0.4546323,0.4506769,0.4819728,2.02569211,0.15213588,2.2563563,0.3875304
2,k10,0.6551561,0.8464392,0.598345285,0.7256701,-0.9849971,-0.9084269,-1.040898,-1.050191,0.3236092,⋯,0.304481,0.4881818,0.2551028,0.394553,0.3189836,0.3420605,-0.09817868,0.06452674,-0.1051354,-0.1249448


In [112]:
cor_data$sample = NULL
correlation = cor(cor_data, use = 'pairwise.complete.obs')

In [113]:
head(correlation,2)

Unnamed: 0,Factor1_MI_v1_MOFA,Factor1_MI_v1_MOFA_10,Factor1_MI_v1_MOFA_15,Factor1_MI_v1_MOFA_25,Factor2_MI_v1_MOFA,Factor2_MI_v1_MOFA_10,Factor2_MI_v1_MOFA_15,Factor2_MI_v1_MOFA_25,Factor3_MI_v1_MOFA,Factor3_MI_v1_MOFA_10,Factor3_MI_v1_MOFA_15,Factor3_MI_v1_MOFA_25,Factor4_MI_v1_MOFA,Factor4_MI_v1_MOFA_10,Factor4_MI_v1_MOFA_15,Factor4_MI_v1_MOFA_25,Factor5_MI_v1_MOFA,Factor5_MI_v1_MOFA_10,Factor5_MI_v1_MOFA_15,Factor5_MI_v1_MOFA_25
Factor1_MI_v1_MOFA,1.0,0.9917842,0.9992723,0.9976071,-0.04182525,-0.02340533,-0.05049043,-0.02899298,-0.002821845,-0.03182495,-0.003175061,0.02380544,-0.05582956,-0.10248938,-0.05283172,-0.05153494,0.0497603,-0.01926264,0.0520093,0.05587321
Factor1_MI_v1_MOFA_10,0.9917842,1.0,0.9917693,0.9946613,-0.0668267,-0.03018986,-0.07796975,-0.05027997,0.015652356,-0.01492875,0.017552735,0.04385107,-0.02073846,-0.07609096,-0.02079976,-0.01920835,0.06805772,-0.003919279,0.07214009,0.06063978


In [114]:
## Specify the name of the plot
figure_name = paste0("FIG07_Factor_Correlations")

In [115]:
# Specify the sizes of the plot
width_par = 8
height_par =5

In [116]:
options(repr.plot.width=40, repr.plot.height=20)
pdf(paste0('figures/07_figures/', figure_name, '.pdf'), width =width_par, height =height_par)
corrplot(correlation, method = 'number',  type = 'upper', col = COL2('BrBG'), tl.col = 'black', tl.cex = 0.6,   number.cex = 0.4, tl.offset = 0.2, diag = FALSE) + plot_config + theme(text = element_text(size = 5))
dev.off()

NULL

In [117]:
## Generate a correlation table and save it 

In [118]:
correlation = melt(correlation)

In [119]:
correlation = correlation[correlation$Var1 != correlation$Var2,]  # remove diagonal entries

In [120]:
colnames(correlation) = c('Factor_Config1', 'Factor_Config2', 'Pearson_Correlation')

In [121]:
head(correlation ,2)

Unnamed: 0_level_0,Factor_Config1,Factor_Config2,Pearson_Correlation
Unnamed: 0_level_1,<fct>,<fct>,<dbl>
2,Factor1_MI_v1_MOFA_10,Factor1_MI_v1_MOFA,0.9917842
3,Factor1_MI_v1_MOFA_15,Factor1_MI_v1_MOFA,0.9992723


In [122]:
## Save result
write.csv(correlation, paste0(result_path, '/07_results/07_Factor_Correlations',  '.csv'), row.names = FALSE)

## Compare feature weights

In [123]:
## Compare the feature factor weights by correlating them (it is necessary that the features match between the different models)

In [124]:
weights = lapply(model_list, function(x){
    get_weights(x, views = "all", factors = "all")
    })

In [125]:
weight_data = data.frame()

In [126]:
for(j in names(weights)){
    weights_factor_amount = weights[[j]]

    for (i in names(weights_factor_amount)){
        data = data.frame(weights_factor_amount[[i]])
        data$view = i
        data$configuration = j
        data$feature = rownames(data)
        
        data = melt(data)
        # filter on relevant factors for comparison
        data = data[data$variable %in% unlist(str_split(comparison_configs$compare_factors[comparison_configs$mofa_result_name == j], ',')),]
        
        weight_data = rbind(weight_data,data)
        }
    }

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, configuration, feature as id variables

Using view, 

In [127]:
head(weight_data,2)

Unnamed: 0_level_0,view,configuration,feature,variable,value
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<fct>,<dbl>
1,B.cell,MI_v1_MOFA,B.cell__ACTB,Factor1,-0.18622572
2,B.cell,MI_v1_MOFA,B.cell__ACTG1,Factor1,-0.05929283


In [128]:
weight_data$configuration = paste0(weight_data$variable, '_',weight_data$configuration)
weight_data$view_feature = paste0(weight_data$view, '__', weight_data$feature)
weight_data$view_feature =  str_replace_all(weight_data$view_feature, ' ', '')
weight_data$view_feature = str_replace_all(weight_data$view_feature, '\\.', '')

In [129]:
head(weight_data,2)

Unnamed: 0_level_0,view,configuration,feature,variable,value,view_feature
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<fct>,<dbl>,<chr>
1,B.cell,Factor1_MI_v1_MOFA,B.cell__ACTB,Factor1,-0.18622572,Bcell__Bcell__ACTB
2,B.cell,Factor1_MI_v1_MOFA,B.cell__ACTG1,Factor1,-0.05929283,Bcell__Bcell__ACTG1


In [130]:
cor_data = weight_data %>% dcast(view_feature ~ configuration, value.var = 'value')

In [131]:
head(cor_data,2)

Unnamed: 0_level_0,view_feature,Factor1_MI_v1_MOFA,Factor1_MI_v1_MOFA_10,Factor1_MI_v1_MOFA_15,Factor1_MI_v1_MOFA_25,Factor2_MI_v1_MOFA,Factor2_MI_v1_MOFA_10,Factor2_MI_v1_MOFA_15,Factor2_MI_v1_MOFA_25,Factor3_MI_v1_MOFA,⋯,Factor3_MI_v1_MOFA_15,Factor3_MI_v1_MOFA_25,Factor4_MI_v1_MOFA,Factor4_MI_v1_MOFA_10,Factor4_MI_v1_MOFA_15,Factor4_MI_v1_MOFA_25,Factor5_MI_v1_MOFA,Factor5_MI_v1_MOFA_10,Factor5_MI_v1_MOFA_15,Factor5_MI_v1_MOFA_25
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Bcell__Bcell__ACTB,-0.18622572,-0.14889293,-0.17715216,-0.1687454,0.2532893,0.28560919,0.2579995,0.2543167,0.1664119,⋯,0.1274168,0.1784684,-0.052115239,-0.08084082,-0.073455153,-0.05960435,-0.128509,0.03363583,-0.07004255,-0.1827076
2,Bcell__Bcell__ACTG1,-0.05929283,-0.02043719,-0.05333556,-0.0464,-0.0210286,0.01072397,-0.0209563,-0.0225373,0.129417,⋯,0.1283897,0.1553799,-0.006415541,-0.01441073,0.001627082,0.01020229,-0.1012587,-7.926349e-05,-0.06687017,-0.1193687


In [132]:
cor_data$view_feature = NULL
correlation = cor(cor_data, use = 'pairwise.complete.obs')

In [133]:
head(correlation,2)

Unnamed: 0,Factor1_MI_v1_MOFA,Factor1_MI_v1_MOFA_10,Factor1_MI_v1_MOFA_15,Factor1_MI_v1_MOFA_25,Factor2_MI_v1_MOFA,Factor2_MI_v1_MOFA_10,Factor2_MI_v1_MOFA_15,Factor2_MI_v1_MOFA_25,Factor3_MI_v1_MOFA,Factor3_MI_v1_MOFA_10,Factor3_MI_v1_MOFA_15,Factor3_MI_v1_MOFA_25,Factor4_MI_v1_MOFA,Factor4_MI_v1_MOFA_10,Factor4_MI_v1_MOFA_15,Factor4_MI_v1_MOFA_25,Factor5_MI_v1_MOFA,Factor5_MI_v1_MOFA_10,Factor5_MI_v1_MOFA_15,Factor5_MI_v1_MOFA_25
Factor1_MI_v1_MOFA,1.0,0.9951273,0.9996448,0.9982365,-0.04343067,-0.02189787,-0.05714933,-0.02801734,-0.04015491,-0.07291318,-0.04537929,-0.015137841,-0.06078813,-0.11835754,-0.06902499,-0.06229357,0.02361604,-0.04000875,0.03371613,0.02534697
Factor1_MI_v1_MOFA_10,0.9951273,1.0,0.9954975,0.9970414,-0.05843081,-0.03199902,-0.07216818,-0.0422653,-0.02925894,-0.0574544,-0.03375204,-0.003197549,-0.0255085,-0.08166777,-0.03528404,-0.02823617,0.02681204,-0.05048719,0.03692064,0.02795384


In [134]:
figure_name = paste0("FIG07_Feature_Correlations")

In [135]:
# Sizes of the plot
width_par = 8
height_par =5

In [136]:
options(repr.plot.width=40, repr.plot.height=20)
pdf(paste0('figures/07_figures/', figure_name, '.pdf'), width =width_par, height =height_par)
corrplot(correlation, method = 'number',  type = 'lower', col = COL2('BrBG'), tl.col = 'black', tl.cex = 0.6, tl.srt = 90, number.cex = 0.4, tl.offset = 0.2, diag = FALSE) + plot_config + theme(text = element_text(size = 5))
dev.off()

NULL

In [137]:
## Generate Table

In [138]:
correlation = melt(correlation)

In [139]:
correlation = correlation[correlation$Var1 != correlation$Var2,]  # remove diagonal entries

In [140]:
colnames(correlation) = c('Factor_Config1', 'Factor_Config2', 'Pearson_Correlation')

In [141]:
head(correlation ,2)

Unnamed: 0_level_0,Factor_Config1,Factor_Config2,Pearson_Correlation
Unnamed: 0_level_1,<fct>,<fct>,<dbl>
2,Factor1_MI_v1_MOFA_10,Factor1_MI_v1_MOFA,0.9951273
3,Factor1_MI_v1_MOFA_15,Factor1_MI_v1_MOFA,0.9996448


In [142]:
## Save result
write.csv(correlation, paste0(result_path, '/07_results/07_Feature_Correlations',  '.csv'), row.names = FALSE)

In [None]:
### Inform about execution finalization
popup_function_pos('07_Compare_Models: Execution Finished')

In [None]:
Sys.sleep(20)
popup_function_info('07_Compare_Models')