# CNV Calling with InferCNV

Based on the InferCNV package tutorials https://github.com/broadinstitute/infercnv/

In [None]:
reference <- Sys.getenv("SNAKEMAKE_REFERENCE_LIST")
input_file <- Sys.getenv("SNAKEMAKE_INPUT_FILE")  # Input h5ad file
gene_order_file <- Sys.getenv("SNAKEMAKE_GENE_ORDER_FILE")  # Gene order file for InferCNV
celltype_column <- Sys.getenv("SNAKEMAKE_CELLTYPE_COLUMN", "celltype")  # Column in adata with cell type information
output_dir <- Sys.getenv("SNAKEMAKE_OUTPUT_DIRECTORY")  # Output directory for InferCNV results
threads <- as.integer(Sys.getenv("SNAKEMAKE_NUM_THREADS", "1"))  # Number of threads to use

# Check that the file exists
if (!file.exists(input_file)) {
  stop(paste("Input file does not exist:", input_file))
}

if (!file.exists(gene_order_file)) {
  stop(paste("Gene order file does not exist:", gene_order_file))
}

cat(paste("Reference cell types:", reference, "\n"))
cat(paste("Input file:", input_file, "\n"))
cat(paste("Gene order file:", gene_order_file, "\n"))
cat(paste("Cell type column:", celltype_column, "\n"))
cat(paste("Output directory:", output_dir, "\n"))
cat(paste("Number of threads:", threads, "\n"))

In [None]:
# Load the input data as a SingleCellExperiment object
library(zellkonverter)
setZellkonverterVerbose(TRUE)
adata <- readH5AD(input_file)
adata

In [None]:
# Generate inputs
# First, grab the expression matrix and make sure it is dense
library(Matrix)
if (is(adata$X, "dgCMatrix")) {
  expression <- as.matrix(adata$X)
} else {
    expression <- adata$X
}
# Write the annotations file (tab-separated, no header cell barcode in first column, cell type in second)
annotations <- data.frame(
  cell = colnames(adata$X),
  celltype = adata[[celltype_column]]
)
write.table(
  annotations,
  file = "annotations.txt",
  sep = "\t",
  row.names = FALSE,
  col.names = FALSE,
  quote = FALSE
)
# Clear the adata object to free up memory
rm(adata)

In [None]:
# Create the object for InferCNV
library(infercnv)
infercnv_obj <- CreateInfercnvObject(
  raw_counts_matrix = expression,
  gene_order_file = gene_order_file,
  annotations_file = "annotations.txt",
  ref_group_names = strsplit(reference, ",")[[1]],
  gene_mode = "gene_symbol",
  delim = "\t",
)

In [None]:
# Perform the CNV inference
infercnv_obj <- infercnv::run(
  infercnv_obj,
  cutoff = 0.1,  # Set the cutoff for CNV detection (0.1 is a common value for 10X Genomics data)
  out_dir = output_dir,  # Output directory
  cluster_by_groups = TRUE,  # Cluster by cell type groups
  denoise = TRUE,  # Denoise the data
  HMM = TRUE,
  analysis_mode='subclusters',  # https://github.com/broadinstitute/inferCNV/wiki/infercnv-tumor-subclusters
  num_threads = threads,  # Number of threads to use
  output_format= "pdf",  # Output format for the plots
)
infercnv_obj

In [None]:
sessionInfo()