# How to set up pipeline to run CellBender with modified settings

This documentation contains instructions on how to set up CellBender pipeline with modified settings. This is because we not only ran CellBender with default settings as part of the preprocessing pipeline (see `1_run_processing_pipeline.ipynb`), but also run with modified settings to correct for ambient RNA further.

## Step 1: create a modified CellBender Nextflow pipeline

The modified pipeline is in `scripts/cellbender_modified`, along with instructions on how to run the pipeline.

## Step 2: example code to create a library.config file for the pipeline

In [1]:
suppressPackageStartupMessages(library("plyr"))
suppressPackageStartupMessages(library("dplyr"))

In [2]:
df <- read.table("/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/nCells_defaultCB.txt", header = F, sep = "\t")
colnames(df) <- c("sample", "expected_cells")
head(df)

Unnamed: 0_level_0,sample,expected_cells
Unnamed: 0_level_1,<chr>,<int>
1,HPAP-019,2804
2,HPAP-020,6460
3,HPAP-021,3740
4,HPAP-022,2694
5,HPAP-023,442
6,HPAP-024,1355


In [3]:
samples <- read.table("/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/results/samplesForOpt.txt", header = T, sep = "\t")
df <- df[df$sample %in% samples$sample,]
df <- inner_join(df, samples[, c("sample", "Source")])
df$sample <- paste0(df$sample, "-hg38")
head(df)

[1m[22mJoining with `by = join_by(sample)`


Unnamed: 0_level_0,sample,expected_cells,Source
Unnamed: 0_level_1,<chr>,<int>,<chr>
1,HPAP-029-hg38,1692,HPAP
2,HPAP-035-hg38,1207,HPAP
3,HPAP-036-hg38,1271,HPAP
4,HPAP-037-hg38,2439,HPAP
5,HPAP-040-hg38,1731,HPAP
6,HPAP-043-hg38,935,HPAP


In [4]:
df$learning_rate <- 0.0001 # default learning rate, adjust if needed

In [5]:
# add solo_out
df$solo_out <- paste0("/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/starsolo/", df$sample, "/", df$sample, ".Solo.out/")
head(df)

Unnamed: 0_level_0,sample,expected_cells,Source,learning_rate,solo_out
Unnamed: 0_level_1,<chr>,<int>,<chr>,<dbl>,<chr>
1,HPAP-029-hg38,1692,HPAP,0.0001,/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/starsolo/HPAP-029-hg38/HPAP-029-hg38.Solo.out/
2,HPAP-035-hg38,1207,HPAP,0.0001,/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/starsolo/HPAP-035-hg38/HPAP-035-hg38.Solo.out/
3,HPAP-036-hg38,1271,HPAP,0.0001,/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/starsolo/HPAP-036-hg38/HPAP-036-hg38.Solo.out/
4,HPAP-037-hg38,2439,HPAP,0.0001,/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/starsolo/HPAP-037-hg38/HPAP-037-hg38.Solo.out/
5,HPAP-040-hg38,1731,HPAP,0.0001,/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/starsolo/HPAP-040-hg38/HPAP-040-hg38.Solo.out/
6,HPAP-043-hg38,935,HPAP,0.0001,/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/starsolo/HPAP-043-hg38/HPAP-043-hg38.Solo.out/


In [6]:
# total_droplets_included = mean(end cliff, inflection)
total_droplets_included <- c()
for (i in df$sample) {
    s <- gsub("-hg38", "", i)
    tmp <- df[df$sample == i,]
    knee <- read.table(paste0("/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/results/emptyDrops/results/", s, "_knee.txt"), header = T, sep = "\t")
    total_droplets_included <- c(total_droplets_included, mean(c(knee$inflection_rank, knee$end_cliff_rank)))
}
df$total_droplets_included <- round_any(total_droplets_included, 1, ceiling)
head(df)

Unnamed: 0_level_0,sample,expected_cells,Source,learning_rate,solo_out,total_droplets_included
Unnamed: 0_level_1,<chr>,<int>,<chr>,<dbl>,<chr>,<dbl>
1,HPAP-029-hg38,1692,HPAP,0.0001,/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/starsolo/HPAP-029-hg38/HPAP-029-hg38.Solo.out/,3116
2,HPAP-035-hg38,1207,HPAP,0.0001,/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/starsolo/HPAP-035-hg38/HPAP-035-hg38.Solo.out/,2401
3,HPAP-036-hg38,1271,HPAP,0.0001,/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/starsolo/HPAP-036-hg38/HPAP-036-hg38.Solo.out/,2252
4,HPAP-037-hg38,2439,HPAP,0.0001,/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/starsolo/HPAP-037-hg38/HPAP-037-hg38.Solo.out/,5557
5,HPAP-040-hg38,1731,HPAP,0.0001,/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/starsolo/HPAP-040-hg38/HPAP-040-hg38.Solo.out/,3272
6,HPAP-043-hg38,935,HPAP,0.0001,/nfs/turbo/umms-scjp-pank/vthihong/0_cellbender_optimize/data/starsolo/HPAP-043-hg38/HPAP-043-hg38.Solo.out/,2518
