# This step will run EnDecon.

### Note this is R code runnning in jupyter!
### Use Kernel: R

In [7]:
library(EnDecon)

In [2]:
# Optional, only if you want to double check your library has been installed
library(devtools)
library (curl)
library (RcppGSL)
library(Rfast)
library(fastmatrix)
library(BiocManager)
library(Seurat)
library(CARD)
library(reticulate)
library(TOAST)

Loading required package: usethis

Using libcurl 7.68.0 with OpenSSL/1.1.1f

Loading required package: Rcpp


Attaching package: ‘Rcpp’


The following object is masked from ‘package:RcppGSL’:

    LdFlags


Loading required package: RcppZiggurat


Rfast:  2.0.8

 ___ __ __ __ __    __ __ __ __ __ _             _               __ __ __ __ __     __ __ __ __ __ __   
|  __ __ __ __  |  |  __ __ __ __ _/            / \             |  __ __ __ __ /   /__ __ _   _ __ __\  
| |           | |  | |                         / _ \            | |                        / /          
| |           | |  | |                        / / \ \           | |                       / /          
| |           | |  | |                       / /   \ \          | |                      / /          
| |__ __ __ __| |  | |__ __ __ __           / /     \ \         | |__ __ __ __ _        / /__/\          
|    __ __ __ __|  |  __ __ __ __|         / /__ _ __\ \        |_ __ __ __ _   |      / ___  /           
|

## Step 1 Input file
### adjust the ref_id, tail, sim_id depending on your scenarios

#### Input files for Scenario 2

In [2]:
ref_id = 'feff311061feb64e82aaf93071ed1d86'

# In the case of haveing multiple reference data, attaching tail for reference, otherwise, tail is empty
# tail_of_ref=['']  # If there is no tail for reference adata. eg. Reference_for_deconv.h5ad
tail_list <- c(
    '_Lafyatis_Rojas_2019',
    '_Teichmann_Meyer_2019',
    '_Krasnow_2020',
    '_Meyer_2019',
    '_Banovich_Kropski_2020'
    )

#sim_id = '49db3a23106485d79a11c5f9ec2be108'
#sim_id_list <- c('49db3a23106485d79a11c5f9ec2be108','4555a63ca602ee279f81713e3f30adfb','24bc51ffa07c227a8aaab7f616fa7166',)
sim_id_list <- c('b78ba209152da0bcf71ec2356d867145')

#### Input file for Scenario 1 or 3

In [4]:
# For scenario 3:
ref_id = '6702e4f7a944d6095c2c2d1496311866'
sim_id_list <- c(
    '43885de192f6b406821c09b949f8e5ea_mixlevel2',
    '43885de192f6b406821c09b949f8e5ea_mixlevel0',
    '43885de192f6b406821c09b949f8e5ea_mixlevel1',
    '43885de192f6b406821c09b949f8e5ea_mixlevel3'
)
## You need to change red_id and sim_id_list for scenario 1 


## Step 2 run EnDecon
Here you need to adjust the code to choose which deconv method to run. I suggest run cell2location seperately, since it will take a long time. In the case of kernel died during the run, you still have output of other methods saved. Usually takes 30-50 minutes for each dataset.

In [None]:
for (tail_of_ref in tail_list) {
print("NEW REFERENCE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!:") 
print("Now run EnDecon for ref_id:")   
    
print(tail_of_ref)
    
for (sim_id in sim_id_list) {
print("Now run EnDecon for sim_id:")   
print(sim_id)
    
# file location for synthetic data (processed for CARD/EnDecon)
spatial_path = paste0("/home/oneai/data/EnDecon_input/", ref_id, "/", "simulated_SRT_dataset_", sim_id, "/")
#reference data path (storing processed sc_meta.csv and sc_count.csv)
sc_path = paste0("/home/oneai/data/EnDecon_input/", ref_id, "/")
# Output path for EnDecon
output_path = "/home/oneai/data/EnDecon_output/"
##### path on ubuntu platform on our computer
python_env <- "~/envs/EnDecon_GPU_New/bin/python" #JX's path adjusted  
    

    
#STEP1 single cell transcriptomics data     
# This step takes longer time. Read in the sc reference data
sc_count_syn <- read.csv(paste0(sc_path, "sc_count", tail_of_ref,".csv"), header = TRUE)
sc_count_syn <- as.matrix(sc_count_syn)
# Use first column (gene name as row name)
row.names(sc_count_syn) <- sc_count_syn[, 1] 
# Remove the first column from the data (optional)
sc_count_syn <- sc_count_syn[, -1]
# Transfer the value to integer before sparseMatrix
mode(sc_count_syn) = "integer"   #It was "numeric" for set 1 dataset, for mix reference or atlas, it is not integer, has to be change to integer
sc_count_syn[1:6,0:10]

#### STEP2 single cell RNAseq ((scRNA-seq)) data, 
# read the cellID and cell_type, store as named character vector (same format as sc_meta). 
# Use sc_meta_syn column "cell_id" as the unique names or identifiers (CIDs), use "cell_type" coumn as corresponding values or annotations.
sc_meta_syn <- read.csv(paste0(sc_path, "sc_meta", tail_of_ref,".csv"), header = TRUE)
sc_meta_syn <- setNames(sc_meta_syn$cell_type, sc_meta_syn$cellID)
sc_meta_syn[1:3]    

#### STEP3 spatial transcriptomics count data    
spatial_count_syn <- read.csv(paste0(spatial_path, "spatial_count.csv"), header = TRUE)
spatial_count_syn <- as.matrix(spatial_count_syn)
# Use first column (gene name) as row name
row.names(spatial_count_syn) <- spatial_count_syn[, 1] 
# Remove the first column from the data (optional)
spatial_count_syn <- spatial_count_syn[, -1]
# Transfer the value to integer 
mode(spatial_count_syn) = "integer"   
spatial_count_syn[1:5,1:5]

#### STEP4 spatial location
spatial_location_syn <- read.csv(paste0(spatial_path, "spatial_location.csv"), header = TRUE)
# Use first column (gene name as row name)
row.names(spatial_location_syn) <- spatial_location_syn[, 1] 
# Remove the first column from the data (optional)
spatial_location_syn <- spatial_location_syn[, -1]
# Revert y axis!! This y is the real x-y axis. R and python are reverted!!"
spatial_location_syn$y <- rev (spatial_location_syn$y)
spatial_location_syn[1:4,]

    
    
################################################################Make sure get overlapped genes before run!!!
# Get the overlapping row names
common_row_names <- intersect(rownames(sc_count_syn), rownames(spatial_count_syn))
# Keep only the common rows in sc_count_syn and spatial_count
sc_count_syn <- sc_count_syn[common_row_names, ]
spatial_count_syn <- spatial_count_syn[common_row_names, ]        
    
   
    
#### Run EnDecon for all method, for synthetic data, not Cell2location due to error!!
Results.dec.mouse <- EnDecon_individual_methods(sc_exp = sc_count_syn,
                                                sc_label = sc_meta_syn, spot_exp = spatial_count_syn,
                                                spot_loc = spatial_location_syn, python_env = python_env,
                                                use_gpu = TRUE,gene_det_in_min_cells_per = 0.01,
                                                RCTD.CELL_MIN_INSTANCE = 5, saving_results = FALSE, 
                                                
                                                #SCDC = TRUE, RCTD = TRUE, DeconRNASeq = TRUE,
                                                #DestVI = TRUE, DWLS = FALSE, SPOTlight = TRUE, SpatialDWLS = FALSE,
                                                #cell2location = FALSE, CARD = TRUE, STdeconvolve = TRUE,
                                                
                                                SCDC = TRUE, RCTD = TRUE, DeconRNASeq = TRUE,
                                                DestVI = TRUE, DWLS = TRUE, SPOTlight = TRUE, SpatialDWLS = TRUE,
                                                cell2location = TRUE, CARD = TRUE, STdeconvolve = TRUE,
                                                
                                                
                                                Stereoscope = FALSE, MuSiC = FALSE, #These two methods wont work
                                                
                                                cell2location.sc_max_epoches = 800, cell2location.sc_lr = 0.002, cell2location.st_N_cells_per_location = 10,
                                                cell2location.st_detection_alpha = 200.00, cell2location.st_max_epoches = 8000
                                               )    
    



print(Results.dec.mouse[[2]])


#### Save RData and Runtime into folder: /home/oneai/data/EnDecon_output/RData
rdata = Results.dec.mouse[[1]]
runtime = Results.dec.mouse[[2]]
df <- data.frame(Method = names(runtime), Runtime = as.character(runtime), stringsAsFactors = FALSE)
save(rdata, file = paste0(output_path, "/RData/simulated_SRT_dataset_", sim_id, tail_of_ref, "_Results_Deconv_C2L.RData"))
write.csv(df, file = paste0(output_path,"/RData/simulated_SRT_dataset_",  sim_id, tail_of_ref, "_Runtime_C2L.csv"), row.names = FALSE)       
                                                             


#### Save method-prop.csv
for (i in 1:length(Results.dec.mouse[[1]])){
    print (i)
    write.csv(Results.dec.mouse[[1]][i], file = paste0(output_path, "simulated_SRT_dataset_", sim_id, tail_of_ref, "_", names(Results.dec.mouse[[1]][i]), "_prop.csv"), row.names = TRUE)
    }    
    
    
    
}
}