# Settings

In [1]:
# Load Reticulate function
Sys.setenv(RETICULATE_PYTHON="/home/luca/anaconda3/envs/reticulate/bin/python")
library(reticulate)
reticulate::use_python("/home/luca/anaconda3/envs/reticulate/bin/python")
reticulate::use_condaenv("/home/luca/anaconda3/envs/reticulate")
reticulate::py_module_available(module='anndata') #needs to be TRUE
reticulate::import('anndata') #good to make sure this doesn't error
reticulate::py_module_available(module='leidenalg') #needs to be TRUE
reticulate::import('leidenalg') #good to make sure this doesn't error

Module(anndata)

Module(leidenalg)

In [2]:
## Patch for annotations in R4.1
# BiocManager::install("Bioconductor/GenomeInfoDb",lib = "/home/luca/R/x86_64-pc-linux-gnu-library/4.1",force = TRUE)
# library(GenomeInfoDb,lib.loc="/home/luca/R/x86_64-pc-linux-gnu-library/4.1")

In [3]:
# Load packages
pacman::p_load(dplyr, stringr, data.table, tidyr, data.table, Matrix,
               hdf5r, Seurat, Signac,harmony, knitr, SoupX,
               logr, parallel, 
               ggplot2, ggpubr, ggrepel, ggbreak, gridExtra)

In [4]:
# Set directories
fastq.dir = "/nfs/lab/projects/COVID_mouse/raw_data/230616_A01535_0344_BH7LF2DSX7_2/"
base.dir = "/nfs/lab/projects/COVID_mouse/"
assets.dir = "/nfs/lab/projects/COVID_mouse/assets/"
cell.ranger.dir = paste(base.dir, "cell.ranger/", sep = "")
seurat.dir = paste(base.dir, "seurat/", sep = "")
step1.dir = paste(seurat.dir, "1_pre.processing/", sep = "")
step2.dir = paste(seurat.dir, "2_SoupX/", sep = "")
step3.dir = paste(seurat.dir, "3_scrublet/", sep = "")

In [5]:
# start log
options("logr.on" = TRUE, "logr.notes" = TRUE)
options("logr.autolog" = TRUE)
options("logr.compact" = TRUE)
options("logr.traceback" = TRUE)
log.file = paste(base.dir, Sys.Date(),".03_Scrublet_SamplePrep.log", sep="")

In [6]:
log_open(log.file)

# Prepare files for scrublet

In [8]:
# Load sample list
setwd(cell.ranger.dir)
sample.ls = dir()
sample.ls

In [9]:
for (i in seq_along(sample.ls)){
    gc(reset = TRUE)
    # Set sample variable
    sample = sample.ls[i]
    sample.ID = sample.ls[i]
    log_print(paste("Processing sample: ", sample))
    # Load data
    adata <- readRDS(file = paste(step2.dir, sample.ID, "_SoupX_filt.rds", sep = ""))
    
    #pull out RNA counts to a separate object and writeMM
    DefaultAssay(adata) <- 'RNA'
    rna.counts <- GetAssayData(adata,slot='counts')       
    file <- paste(step3.dir, sample.ID, "_matrix.mtx", sep = "")
    writeMM(rna.counts, file)
    
    #also export the gene list    
    file <- paste(step3.dir, sample.ID, "_genes.tsv", sep = "")
    write(row.names(rna.counts),file,sep='\n')
    
    # (and barcodes just in case) 
    file <- paste(step3.dir, sample.ID, "_barcodes.tsv", sep = "")              
    write(colnames(rna.counts),file,sep='\n')                     
}

[1] "Processing sample:  1_GFP1"
[1] "Processing sample:  10_G1C1"
[1] "Processing sample:  11_G1C2"
[1] "Processing sample:  12_G1C3"
[1] "Processing sample:  2_GFP2"
[1] "Processing sample:  3_GFP3"
[1] "Processing sample:  4_RBD1"
[1] "Processing sample:  5_RBD2"
[1] "Processing sample:  6_RBD3"
[1] "Processing sample:  7_G1B1"
[1] "Processing sample:  8_G1B2"
[1] "Processing sample:  9_G1B3"
