# Settings

In [1]:
# Load Reticulate function
Sys.setenv(RETICULATE_PYTHON="/home/luca/anaconda3/envs/reticulate/bin/python")
library(reticulate)
reticulate::use_python("/home/luca/anaconda3/envs/reticulate/bin/python")
reticulate::use_condaenv("/home/luca/anaconda3/envs/reticulate")
reticulate::py_module_available(module='anndata') #needs to be TRUE
reticulate::import('anndata') #good to make sure this doesn't error
reticulate::py_module_available(module='leidenalg') #needs to be TRUE
reticulate::import('leidenalg') #good to make sure this doesn't error

Module(anndata)

Module(leidenalg)

In [2]:
## Patch for annotations in R4.1
# BiocManager::install("Bioconductor/GenomeInfoDb",lib = "/home/luca/R/x86_64-pc-linux-gnu-library/4.1",force = TRUE)
# library(GenomeInfoDb,lib.loc="/home/luca/R/x86_64-pc-linux-gnu-library/4.1")

In [3]:
# Load packages
pacman::p_load(dplyr, stringr, data.table, tidyr, data.table, Matrix,
               hdf5r, Seurat, Signac,harmony, knitr, SoupX,
               EnsDb.Hsapiens.v86, 
               logr, parallel, 
               ggplot2, ggpubr, ggrepel, ggbreak, gridExtra)

In [9]:
# Set directories
base.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/"
assets.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/Assets/"

cell.ranger.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/cellranger.symlinks/"

step1.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/1_preprocessing/"
step2.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/2_PeaksReformat/"
step3.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/Analysys/3_SoupX/"
step4.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/Analysys/4_Doublet_cleanup/scrublet/"
log.dir = "/nfs/lab/projects/mega_heart/CAREHF/multiome/log/"

In [5]:
# start log
options("logr.on" = TRUE, "logr.notes" = TRUE)
options("logr.autolog" = TRUE)
options("logr.compact" = TRUE)
options("logr.traceback" = TRUE)
log.file = paste(base.dir, Sys.Date(),".06_Scrublet.log", sep="")

In [6]:
log_open(log.file)

# Prepare files for scrublet

In [10]:
# Load sample info
sample.info = read.table(paste(assets.dir, "sample.info", sep = ""), sep = "\t", header = TRUE)
# Build variables
sample.ls = sample.info$ID
cellranger.outs.ls = paste0(sample.info$CellRanger,
                            sample.info$Chamber, "/",
                            sample.info$ID, "/outs/")

# Check the paths r right
sample.ls[1]
cellranger.outs.ls[1]

In [11]:
for (i in seq_along(sample.ls)){
    gc(reset = TRUE)
    # Set sample variable
    sample = sample.ls[i]
    log_print(paste("Processing sample: ", sample))
    # Load data
    adata <- readRDS(file = paste(step3.dir, sample, "_pre.filt.peaks.SoupX.rds", sep = ""))
    
    #pull out RNA counts to a separate object and writeMM
    DefaultAssay(adata) <- 'RNA'
    rna.counts <- GetAssayData(adata,slot='counts')       
    file <- paste(step4.dir, sample, "_matrix.mtx", sep = "")
    writeMM(rna.counts, file)
    
    #also export the gene list    
    file <- paste(step4.dir, sample, "_genes.tsv", sep = "")
    write(row.names(rna.counts),file,sep='\n')
    
    # (and barcodes just in case) 
    file <- paste(step4.dir, sample, "_barcodes.tsv", sep = "")              
    write(colnames(rna.counts),file,sep='\n')                     
}

[1] "Processing sample:  KA_49_1_2_KA_45_1_2"


"[1m[22mThe `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
[36mℹ[39m Please use the `layer` argument instead."


[1] "Processing sample:  QY_1880_1_2_QY_1879_1_2"
[1] "Processing sample:  QY_1882_1_2_QY_1881_1_2"
[1] "Processing sample:  QY_1920_1_2_QY_1919_1_2"
[1] "Processing sample:  QY_1971_1_2_QY_1970_1_2"
[1] "Processing sample:  QY_2015_1_2_QY_2014_1_2"
[1] "Processing sample:  QY_2039_1_2_QY_2038_1_2"
[1] "Processing sample:  QY_2047_1_2_QY_2046_1_2"
[1] "Processing sample:  QY_2051_1_2_QY_2050_1_2"
[1] "Processing sample:  JB_631_1_2_JB_627_1_2"
[1] "Processing sample:  JB_632_1_2_JB_628_1_2"
[1] "Processing sample:  JB_633_1_2_JB_629_1_2"
[1] "Processing sample:  JB_634_1_2_3_JB_630_1_2_3"
[1] "Processing sample:  MM_507_1_2_MM_512_1_2"
[1] "Processing sample:  QY_1856_1_2_QY_1855_1_2"
[1] "Processing sample:  QY_1854_1_2_QY_1853_1_2"
[1] "Processing sample:  QY_1858_1_2_QY_1857_1_2"
[1] "Processing sample:  QY_1860_1_2_QY_1859_1_2"
[1] "Processing sample:  KA_47_1_2_KA_43_1_2"
[1] "Processing sample:  KA_50_1_2_KA_46_1_2"
[1] "Processing sample:  QY_1886_1_2_QY_1885_1_2"
[1] "Processin