In [587]:
### Run MOFA Model on the data generated in the previous script (02)

#############################################
# Prerequisites - Load Libraries

In [588]:
source('MS0_Libraries.r')

“incomplete final line found by readTableHeader on '../conda_environment/Environment_Configs.csv'”


[1] "/home/icb/corinna.losert/miniconda3/envs/mofa_analysis//lib/R/library"


In [589]:
source('MS1_Functions.r')

In [590]:
source('MS2_Plot_Config.r')

In [591]:
#py_config() # - To check the configuration which python package will be used for MOFA

###############################################
# Preqrequisites Configurations & Parameters

In [592]:
### Load the parameters that are set via the configuration files

In [593]:
### Load configurations file
global_configs = read.csv('configurations/Data_Configs.csv', sep = ',')

“incomplete final line found by readTableHeader on 'configurations/Data_Configs.csv'”


In [594]:
head(global_configs,2)

Unnamed: 0_level_0,parameter,value
Unnamed: 0_level_1,<chr>,<chr>
1,data_path,/lustre/groups/epigenereg01/workspace/projects/jove/example_data/
2,result_path,/lustre/groups/epigenereg01/workspace/projects/jove/example_results/


In [595]:
data_path = global_configs$value[global_configs$parameter == 'data_path']

In [596]:
data_path

In [597]:
result_path = global_configs$value[global_configs$parameter == 'result_path']

In [598]:
result_path

In [599]:
## MOFA Model Configurations

In [600]:
mofa_configs = read.csv( 'configurations/03_MOFA_Configs.csv', sep = ',')

“incomplete final line found by readTableHeader on 'configurations/03_MOFA_Configs.csv'”


In [601]:
head(mofa_configs,2)

Unnamed: 0_level_0,configuration_name,mofa_result_name,amount_of_factors,weighting_of_views,scale_views
Unnamed: 0_level_1,<chr>,<chr>,<int>,<lgl>,<lgl>
1,MI_v1,MI_v1_MOFA,10,False,True


In [602]:
### Generate the result data directory if it does not exist yet
if(!file.exists(paste0(result_path, '03_results'))){
    dir.create(file.path(paste0(result_path, '03_results')))
    }

# Load Data 

## Prepared combined data

In [603]:
### Load the data that was generated in the previous script using the name specified in the configuration file

In [604]:
input_data = list()

In [605]:
for(i in 1:nrow(mofa_configs)){
    path = paste0(result_path, '/02_results/02_Combined_Data_', mofa_configs$configuration_name[i] ,'_INTEGRATED.csv')
    data_long = read.csv(path)
    data_long$X = NULL
    print(path)
    print(file.info(path)$mtime)
    input_data[[i]]= data_long
    }

[1] "/lustre/groups/epigenereg01/workspace/projects/jove/example_results//02_results/02_Combined_Data_MI_v1_INTEGRATED.csv"
[1] "2024-05-10 22:27:00 CEST"


In [606]:
length(unique(input_data[[1]]$variable))

In [607]:
unique(input_data[[1]]$type)

In [608]:
length(unique(input_data[[1]]$sample_id))

# Train MOFA Model

## Prepare data list

In [609]:
### Adjust single-cell types to correspond to cell-types

In [610]:
head(input_data[[1]],2)

Unnamed: 0_level_0,sample_id,variable,value,type,gene
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<chr>,<chr>
1,D1,adipocyte__ABI1,-1.1243382,adipocyte,ABI1
2,D2,adipocyte__ABI1,-0.8572543,adipocyte,ABI1


In [611]:
### Prepare data list for MOFA (adjust format of input data to be used as input for MOFA)

In [612]:
input_data_list = list()

In [613]:
data_list = list()

In [614]:
input_data_list = lapply(input_data, function(x){

    for(i in unique(x$type)){
        samples = unique(x$sample_id) # necessary to have all samples in all dimensions
        data = x[x$type == i, ]

        data$type = NULL
        data$cell_type = NULL

        data = data %>% dcast(variable ~ sample_id, value  = "value")
        rownames(data) = data$variable
        colnames(data) = str_replace(colnames(data), 'value\\.', '')
        data$variable = NULL

        data[setdiff( samples, names(data))] = NA  # use all samples

        data = data[,order(colnames(data))]
        data = data[,colnames(data) %in% samples]

        data_list[[i]] = as.matrix(data)
        }
    
    return(data_list)
    })

In [615]:
head(input_data_list[[1]][[1]],2)

Unnamed: 0,D1,D2,D4,D5,D6,D7,DL2,DL3,DT4,H01,⋯,H51,H53,H55,H56,H57,H58,H59,H6,H67,H7
adipocyte__ABI1,-1.1243382,-0.8572543,-0.5751093,0.5119362,0.2197229,-0.6406669,-1.51239,,1.0271543,0.3911963,⋯,0.9388143,-1.02715427,-1.51239,,1.359737,,-0.3911963,-0.1642108,,-0.3330051
adipocyte__ABI2,-0.1642108,0.4507442,0.7810338,1.6040835,-0.5751093,-0.2197229,-1.074478,,0.5119362,0.3043391,⋯,-0.5119362,0.05451891,-1.074478,,-1.074478,,0.0,0.9388143,,1.0271543


In [616]:
#str(input_data_list)

## Create MOFA object

In [617]:
### Create a MOFA object to run the MOFA model on it

In [618]:
names(input_data_list[[1]])

In [619]:
mofa_object = lapply(input_data_list, function(x){
    MOFAobject = create_mofa(x)
    }
                     )

Creating MOFA object from a list of matrices (features as rows, sample as columns)...




In [620]:
### Plot the Data Overview showing the input used for the MOFA Model

In [621]:
# Specific Text Descriptions:
xlabel = xlab('Samples') 
ylabel = ylab('View')

In [622]:
# Sizes of the plot
width_par = 5
height_par =5

In [623]:
options(repr.plot.width=30, repr.plot.height=10)

mofa_overview = lapply(mofa_object, function(x){
    mofa_overview = plot_data_overview(x)
    mofa_overview = mofa_overview + plot_config + theme(axis.text.y = element_text(hjust = 0, vjust = 0.5)) +
                xlabel + ylabel + theme(axis.text.x = element_blank())
    })

In [624]:
#mofa_overview[[1]]

In [625]:
# Extract data -type colors (used by the function to align and use those colors in the next plots)
type_colors = list()
for(i in 1:length(mofa_overview)){
    color_extraction =  ggplot_build(mofa_overview[[i]])
    type_colors[[i]] = unique(color_extraction$data[[1]]["fill"][,1])
    type_colors[[i]] = type_colors[[i]][!type_colors[[i]] == 'grey']
    }
    

In [626]:
type_colors

In [627]:
figure_name = "FIG03_Overview_MOFA_Input_"

In [628]:

for(i in 1:length(mofa_overview)){
    pdf(paste0('figures/03_figures/', figure_name, mofa_configs$mofa_result[i],  '.pdf'), width =width_par, height =height_par)
    print(mofa_overview[[i]] )
    dev.off()
    }

## Set MOFA Training Options and run the Model Training

In [629]:
### Define the MOFA parameters for training and run the model training with the set parameters
### Some parameters are handed over by thhe configuration file
### Others are currently assigned fixed below but can be modified

In [630]:
model_result = list()

In [631]:
for(i in 1:length(mofa_object)){
    
    ## Set other parameters of MOFA Model
    mefisto_opts = get_default_mefisto_options(mofa_object[[i]])
    
    ## Data Options
    data_opts = get_default_data_options(mofa_object[[i]])
    data_opts$scale_views = mofa_configs$scale_views[i] # decide whether to scale the data
    data_opts$use_float32 = FALSE
    print(data_opts)
    
    ## Model Options
    model_opts = get_default_model_options(mofa_object[[i]])
    model_opts$num_factors = mofa_configs$amount_of_factors[i] # define number of factors
    model_opts$spikeslab_weights = TRUE
    # model_opts$likelihoods['neutrophil'] = 'poisson' - example to modify distribution for one specific view
    print(model_opts)
    
    ## Training Options
    train_opts  = get_default_training_options(mofa_object[[i]])
    train_opts$maxiter = 50000
    train_opts$verbose = TRUE
    train_opts$seed = 42
    train_opts$weight_views = mofa_configs$weighting_of_views[i]
    print(train_opts)
    
    ## Build and train the model
    MOFAobject = prepare_mofa(
      object = mofa_object[[i]],
      data_options = data_opts,
      model_options = model_opts,
      mefisto_options = mefisto_opts,
      training_options = train_opts #,
      #stochastic_options = stoch_options
    )
    
    model_name = paste0("03_MOFA_MODEL_", mofa_configs$mofa_result[i], '.hdf5')
    outfile = file.path( paste0(result_path, '/03_results/',  model_name) )
    print(outfile)
    MOFAobject.trained = run_mofa(MOFAobject, outfile, use_basilisk = FALSE)
    

    model_result[[i]] = MOFAobject.trained
    
    }
    

$scale_views
[1] TRUE

$scale_groups
[1] FALSE

$center_groups
[1] TRUE

$use_float32
[1] FALSE

$views
[1] "adipocyte"                    "cardiac.muscle.cell"         
[3] "cardiac.neuron"               "endothelial.cell"            
[5] "fibroblast.of.cardiac.tissue" "lymphocyte"                  
[7] "mast.cell"                    "mural.cell"                  
[9] "myeloid.cell"                

$groups
[1] "group1"

$likelihoods
                   adipocyte          cardiac.muscle.cell 
                  "gaussian"                   "gaussian" 
              cardiac.neuron             endothelial.cell 
                  "gaussian"                   "gaussian" 
fibroblast.of.cardiac.tissue                   lymphocyte 
                  "gaussian"                   "gaussian" 
                   mast.cell                   mural.cell 
                  "gaussian"                   "gaussian" 
                myeloid.cell 
                  "gaussian" 

$num_factors
[1] 10

$spikes

Checking data options...

Checking training options...

Checking model options...



[1] "/lustre/groups/epigenereg01/workspace/projects/jove/example_results//03_results/03_MOFA_MODEL_MI_v1_MOFA.hdf5"



Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... 
    Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE)
    If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'


“Factor(s) 2, 8 are strongly correlated with the total number of expressed features for at least one of your omics. Such factors appear when there are differences in the total 'levels' between your samples, *sometimes* because of poor normalisation in the preprocessing steps.
”


In [632]:
#str(model_result)

In [633]:
reticulate::py_config()

python:         /home/icb/corinna.losert/miniconda3/envs/mofa_analysis/bin/python
libpython:      /home/icb/corinna.losert/miniconda3/envs/mofa_analysis/lib/libpython3.9.so
pythonhome:     /home/icb/corinna.losert/miniconda3/envs/mofa_analysis:/home/icb/corinna.losert/miniconda3/envs/mofa_analysis
version:        3.9.18 | packaged by conda-forge | (main, Aug 30 2023, 03:49:32)  [GCC 12.3.0]
numpy:          /home/icb/corinna.losert/.local/lib/python3.9/site-packages/numpy
numpy_version:  1.26.4

NOTE: Python version was forced by RETICULATE_PYTHON

# Extract and prepare data for plots

In [634]:
### Extract generated data for the model to use for later downstream analysis

## Extract Variance decomposition

In [635]:
# Extract the total explained variance per view and factor

In [636]:
model_result[[1]]@cache[["variance_explained"]]$r2_total  # per view

In [637]:
rowMeans(model_result[[1]]@cache$variance_explained$r2_per_factor[[1]]) # per factor

In [638]:
# Mean total variance explained

In [639]:
mean(model_result[[1]]@cache$variance_explained$r2_total[[1]])

In [640]:
# Save the explained variance

In [641]:
for(i in 1:length(model_result)){
    write.csv(model_result[[i]]@cache$variance_explained$r2_per_factor[[1]], paste0(result_path, '/03_results/03_MOFA_Variance_Decomposition_',mofa_configs$mofa_result[i], '.csv'))
    }
    

## Extract factor and weight data

In [642]:
#### Extract sample factors  values and save

In [643]:
for(i in 1:length(model_result)){
    factors = get_factors(model_result[[i]], factors = "all")
    factors = factors$group1
    head(factors,2)
    
    factors = as.data.frame(factors)
    factors$sample_id = rownames(factors)
    
    # Save as csv
    write.csv(factors, paste0(result_path, '/03_results/03_Factor_Data_' , mofa_configs$mofa_result[i],  '.csv'), row.names = FALSE)
    }

In [644]:
### Extract weight data (feature factor weights) and save

In [645]:
for(j in 1:length(model_result)){
    weights = get_weights(model_result[[j]], views = "all", factors = "all")
    weight_data = data.frame()
    
    for (i in names(weights)){
        data = data.frame(weights[[i]])
        data$type = i
        weight_data = rbind(weight_data,data)
        }
    weight_data$variable_name = rownames(weight_data)
    
    # Save as csv
    write.csv(weight_data, paste0(result_path, '/03_results/03_Weight_Data_' ,mofa_configs$mofa_result[j], '.csv'), row.names = FALSE)
    }
    

# Diagnostic Result Plots

In [646]:
### Make the explained variance plot to analyze the model result

## Plot explained variance overview

In [647]:
## Prepare the data format

In [648]:
explained_variance = lapply(model_result, function(x) {
    data = x@cache$variance_explained$r2_per_factor[[1]]
    data = melt(data)
    
    total_variance = data.frame( view = rownames(x@cache[["variance_explained"]]$r2_total$group1,2),
                             total_variance = x@cache[["variance_explained"]]$r2_total$group1)
    data = merge(data, total_variance, by.x = 'Var2', by.y = 'view')
    data$Var2 = as.character(data$Var2)
    data$Var2 = factor(data$Var2, levels = sort(unique(data$Var2)))
    data = data[order(data$Var2),]
    }
                            )

In [649]:
#### Plot complete explained variance (Heatmap)

In [650]:
var_decomp = lapply(explained_variance, function(x){
    ggplot() + 
        scale_fill_gradient(low="white", high="black") + 
        xlabel + 
        ylabel +
        plot_config + theme(axis.text.x = element_text(angle = 90), legend.position = 'right')+ 
        geom_tile(data = x, mapping = aes(Var1,  Var2, fill= value))
    })

In [651]:
### Combine the plot with total variance barplot per dimension

In [652]:
# Specific Text Descriptions:
xlabel = xlab('View') 
ylabel = ylab('Explained Variance')

In [653]:
comp_variance = lapply(explained_variance, function(x){
    data = x
    plot_complete = unique(data[,c('Var2', 'total_variance')])
    comp_variance = ggplot(plot_complete, aes(x=Var2, y = total_variance, fill = Var2)) + 
                geom_bar(stat="identity") + coord_flip() + 
                xlabel + 
                ylabel +
                plot_config + scale_fill_manual(values = unlist(type_colors))  ## currently uses same coloring as MOFA oveview
    })

In [654]:
#comp_variance[[1]]

In [655]:
### Combine both visualization

In [656]:
figure_name = "FIG03_Overview_Variance_Decomposition"

In [657]:
# Sizes of the plot
width_par = 8.07
height_par = 4  # 2.8

In [658]:
for(i in 1:length(explained_variance)){
    legend = get_legend(var_decomp[[i]])
    
    combination1 = ggarrange(var_decomp[[i]] + theme(legend.position = 'none'),
                     comp_variance[[i]] + theme(axis.text.y = element_blank(),axis.ticks.y = element_blank(),axis.title.y = element_blank(), legend.position = 'none' ), 
                         align = 'h', nrow=1, widths = c(4,1))
    # Annotate Figure
    combination1_ann = annotate_figure(
      combination1,
      right = legend
    )
    
    pdf(paste0('figures/03_figures/', figure_name,  mofa_configs$mofa_result[i],  '.pdf'), width =width_par, height =height_par)
    print(combination1_ann)
    dev.off()
    print(combination1_ann)
    
    }
    

ERROR: Error in Cairo::Cairo(width, height, tf, "png", pointsize, bg, "transparent", : Graphics API version mismatch


plot without title

In [None]:
## Save view colors for further usage

In [None]:
write.csv(data.frame(color_code = unlist(type_colors)),
          paste0('configurations/03_Type_Color_Codes.csv'))