In [1]:
### Evaluate the results from different configurations and compare them

#############################################
# Prerequisites - Load Libraries

In [None]:
source('MS0_Libraries.r')

“incomplete final line found by readTableHeader on '../conda_environment/Environment_Configs.csv'”


[1] "/home/icb/corinna.losert/miniconda3/envs/jove_r//lib/R/library"


‘SeuratObject’ was built with package ‘Matrix’ 1.6.3 but the current
version is 1.6.4; it is recomended that you reinstall ‘SeuratObject’ as
the ABI for ‘Matrix’ may have changed


Attaching package: ‘SeuratObject’


The following object is masked from ‘package:base’:

    intersect




In [None]:
source('MS2_Plot_Config.r')

In [None]:
source('MS1_Functions.r')

###############################################
# Preqrequisites Configurations & Parameters

In [None]:
### Load configurations file
global_configs = read.csv('configurations/Data_Configs.csv', sep = ',')

In [None]:
head(global_configs,2)

In [None]:
data_path = global_configs$value[global_configs$parameter == 'data_path']

In [None]:
data_path

In [None]:
result_path = global_configs$value[global_configs$parameter == 'result_path']

In [None]:
result_path

In [None]:
###

In [None]:
comparison_configs = read.csv('configurations/07_Comparison_Configs.csv', sep = ',')

In [None]:
head(comparison_configs,2)

# Load Data 

## MOFA Models

In [None]:
model_list = list()

In [None]:
comparison_configs$mofa_result_name

In [None]:
for(i in comparison_configs$mofa_result_name){
    model_name = paste0("03_MOFA_MODEL_", i, '.hdf5') # added number of factors in review to test different 
    outfile = file.path( paste0(result_path, '/03_results/',  model_name) )
    model_list[[as.character(i)]] <- load_model(outfile, verbose = TRUE)
    }

In [None]:
names(model_list)

In [None]:
#str(model_list)

In [None]:
print(length(model_list))

# Downstream Analysis of generated models

## Compare total explained variance

In [None]:
lapply(model_list, function(x){
    mean(x@cache$variance_explained$r2_total[[1]])
    })
### 20 Factor model used: 35.7461964551804   

In [None]:
### compare explained variance per view

In [None]:
var_explained_per_view = lapply(model_list, function(x){
    x@cache[["variance_explained"]]$r2_total
    })

In [None]:
var_explained_per_view_data = data.frame()

for(i in names(var_explained_per_view)){
    data = var_explained_per_view[[i]]$group1
    data = as.data.frame(data)
    data$view = rownames(data)
    data$view = str_replace_all(data$view, ' ', '')
    data$view = str_replace_all(data$view, '\\.', '')
    data$configuration = i
    
    var_explained_per_view_data = rbind(data, var_explained_per_view_data)
    }

In [None]:
head(var_explained_per_view_data,2)

In [None]:
unique(var_explained_per_view_data$configuration)

In [None]:
figure_name = paste0("FIG07_Variance_Model_Comparison")

In [None]:
# Sizes of the plot
width_par = 6
height_par =4

In [None]:
plot = ggplot(var_explained_per_view_data, aes(x=view, y=data, fill=configuration, order = configuration)) + 
    geom_bar(position="dodge", stat="identity") + ylab('amount_explained_variance') + plot_config + scale_fill_okabe_ito() + coord_flip() + ylab('Total percentage of explained variance')
  # TBD: improve coloring + selection

#plot
pdf(paste0('figures/07_figures/', figure_name, '.pdf'), width =width_par, height =height_par)
print( plot)
dev.off()

## Compare factor weights of samples

In [None]:
factor_weights = lapply(model_list, function(x){
    get_factors(x, factors = 'all')$group1
    })

In [None]:
factor_weight_data = data.frame()

for(i in names(factor_weights)){
    data = factor_weights[[i]]
    data = as.data.frame(data)
    data$configuration = i
    data$sample = rownames(data)
    data = melt(data)
    ## filter on relevant factors
    data = data[data$variable %in% unlist(str_split(comparison_configs$compare_factors[comparison_configs$mofa_result_name == i], ',')),]
    
    
    factor_weight_data = rbind(data, factor_weight_data)
    }

In [None]:
factor_weight_data$configuration = paste0(factor_weight_data$variable, '_', factor_weight_data$configuration)

In [None]:
head(factor_weight_data,2)

In [None]:
unique(factor_weight_data$variable)

In [None]:
cor_data = factor_weight_data %>% dcast(sample ~ configuration, value.var = 'value')

In [None]:
head(cor_data,2)

In [None]:
cor_data$sample = NULL
correlation = cor(cor_data, use = 'pairwise.complete.obs')

In [None]:
head(correlation,2)

In [None]:
figure_name = paste0("FIG07_Factor_Correlations")

In [None]:
# Sizes of the plot
width_par = 8
height_par =5

In [None]:
options(repr.plot.width=40, repr.plot.height=20)
pdf(paste0('figures/07_figures/', figure_name, '.pdf'), width =width_par, height =height_par)
corrplot(correlation, method = 'number',  type = 'upper', col = COL2('BrBG'), tl.col = 'black', tl.cex = 0.6,   number.cex = 0.4, tl.offset = 0.2, diag = FALSE) + plot_config + theme(text = element_text(size = 5))
dev.off()

In [None]:
## Generate Table

In [None]:
correlation = melt(correlation)

In [None]:
correlation = correlation[correlation$Var1 != correlation$Var2,]  # remove diagonal entries

In [None]:
colnames(correlation) = c('Factor_Config1', 'Factor_Config2', 'Pearson_Correlation')

In [None]:
head(correlation ,2)

In [None]:
## Save result
write.csv(correlation, paste0(result_path, '/07_results/07_Factor_Correlations',  '.csv'), row.names = FALSE)

## Compare feature weights

In [None]:
weights = lapply(model_list, function(x){
    get_weights(x, views = "all", factors = "all")
    })

In [None]:
weight_data = data.frame()

In [None]:
for(j in names(weights)){
    weights_factor_amount = weights[[j]]

    for (i in names(weights_factor_amount)){
        data = data.frame(weights_factor_amount[[i]])
        data$view = i
        data$configuration = j
        data$feature = rownames(data)
        
        data = melt(data)
        # filter on relevant factors for comparison
        data = data[data$variable %in% unlist(str_split(comparison_configs$compare_factors[comparison_configs$mofa_result_name == j], ',')),]
        
        weight_data = rbind(weight_data,data)
        }
    }

In [None]:
head(weight_data,2)

In [None]:
weight_data$configuration = paste0(weight_data$variable, '_',weight_data$configuration)
weight_data$view_feature = paste0(weight_data$view, '__', weight_data$feature)
weight_data$view_feature =  str_replace_all(weight_data$view_feature, ' ', '')
weight_data$view_feature = str_replace_all(weight_data$view_feature, '\\.', '')

In [None]:
head(weight_data,2)

In [None]:
cor_data = weight_data %>% dcast(view_feature ~ configuration, value.var = 'value')

In [None]:
head(cor_data,2)

In [None]:
cor_data$view_feature = NULL
correlation = cor(cor_data, use = 'pairwise.complete.obs')

In [None]:
head(correlation,2)

In [None]:
figure_name = paste0("FIG07_Feature_Correlations")

In [None]:
# Sizes of the plot
width_par = 8
height_par =5

In [None]:
options(repr.plot.width=40, repr.plot.height=20)
pdf(paste0('figures/07_figures/', figure_name, '.pdf'), width =width_par, height =height_par)
corrplot(correlation, method = 'number',  type = 'lower', col = COL2('BrBG'), tl.col = 'black', tl.cex = 0.6, tl.srt = 90, number.cex = 0.4, tl.offset = 0.2, diag = FALSE) + plot_config + theme(text = element_text(size = 5))
dev.off()

In [None]:
## Generate Table

In [None]:
correlation = melt(correlation)

In [None]:
correlation = correlation[correlation$Var1 != correlation$Var2,]  # remove diagonal entries

In [None]:
colnames(correlation) = c('Factor_Config1', 'Factor_Config2', 'Pearson_Correlation')

In [None]:
head(correlation ,2)

In [None]:
## Save result
write.csv(correlation, paste0(result_path, '/07_results/07_Feature_Correlations',  '.csv'), row.names = FALSE)