R environment

In [None]:
library(Seurat)
library(dplyr)
library(future)
library(data.table)
set.seed(1)
plan("multiprocess", workers = 8)
options(future.globals.maxSize = 1000 * 1024^5)

Attaching SeuratObject


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘data.table’


The following objects are masked from ‘package:dplyr’:

    between, first, last




In [None]:
gbm <- readRDS('data/mapped_Ruiz2021_to_core_GBmap.rds')

In [None]:
# writing tsv file for inferCNV  
write.table(as.matrix(GetAssayData(object = gbm, slot = "counts")), 
            'data/gbm-count-matrix.tsv', sep = '\t', row.names = T, 
            col.names = T, quote = F)

In [None]:
Idents(gbm) <- gbm@meta.data$predicted.high_hierarchy

In [None]:
anno_file <- setDT(as.data.frame(Idents(gbm)), keep.rownames = TRUE)

In [None]:
write.table(anno_file, 'data/gbm-anno-file.tsv', sep = '\t', col.names = F, row.names = F, quote = F)

### inferCNV analysis

In [None]:
library(infercnv)
sessionInfo()

R version 4.0.3 (2020-10-10)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)

Matrix products: default
BLAS/LAPACK: /hpc/pmc_stunnenberg/cruiz/miniconda3/envs/r_pHGG_project/lib/libopenblasp-r0.3.12.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] infercnv_1.9.0

loaded via a namespace (and not attached):
  [1] nlme_3.1-152                bitops_1.0-7               
  [3] matrixStats_0.59.0          doParallel_1.0.16          
  [5] RColorBrewer_1.1-2          GenomeInfoDb_1.26.7        
  [7] repr_1.1.3              

In [None]:
# create the infercnv object
infercnv_obj = CreateInfercnvObject(raw_counts_matrix='data/gbm-count-matrix.tsv',
                                    annotations_file='data/gbm-anno-file.tsv',
                                    delim="\t",
                                    gene_order_file="data/gencode_v38_gene_pos.txt",
                                    ref_group_names=c('CD4/CD8', 'DC', 'Endothelial', 'Mono',
                                                     'Oligodendrocyte','Pericyte', 
                                                     'TAM-BDM', 'TAM-MG')
                                    # Did not include predicted OPC, neuron and astrocytes to test whether
                                    # the assigment of normal cells was correct 
                                    )

INFO [2021-07-25 09:48:02] Parsing matrix: data/gbm-count-matrix.tsv
INFO [2021-07-25 09:55:55] Parsing gene order file: /hpc/pmc_stunnenberg/cruiz/scRNA/markers-and-databases/gencode_v38_gene_pos.txt
INFO [2021-07-25 09:55:55] Parsing cell annotations file: data/gbm-anno-file.tsv
INFO [2021-07-25 09:55:55] ::order_reduce:Start.
INFO [2021-07-25 09:56:00] .order_reduce(): expr and order match.
INFO [2021-07-25 09:56:05] ::process_data:order_reduce:Reduction from positional data, new dimensions (r,c) = 25387,38830 Total=227223243 Min=0 Max=4059.
INFO [2021-07-25 09:56:10] num genes removed taking into account provided gene ordering list: 823 = 3.24181667782723% removed.
INFO [2021-07-25 09:56:10] -filtering out cells < 100 or > Inf, removing 0 % of cells
INFO [2021-07-25 09:56:46] validating infercnv_obj


In [None]:
# perform infercnv operations to reveal cnv signal
infercnv_obj = infercnv::run(infercnv_obj,
                             cutoff=0.1, 
                             out_dir="infercnv_FINAL", 
                             cluster_by_groups=F, 
                             denoise=TRUE,
                             HMM=TRUE,
                             num_threads = 8
                             )

INFO [2021-07-25 09:56:46] ::process_data:Start
INFO [2021-07-25 09:56:46] Checking for saved results.
INFO [2021-07-25 09:56:46] Trying to reload from step 17
INFO [2021-07-25 09:57:01] Using backup HMM from step 17
INFO [2021-07-25 09:57:01] Trying to reload from step 15
INFO [2021-07-25 09:57:28] Using backup from step 15
INFO [2021-07-25 09:57:28] 

	STEP 1: incoming data

INFO [2021-07-25 09:57:28] 

	STEP 02: Removing lowly expressed genes

INFO [2021-07-25 09:57:28] 

	STEP 03: normalization by sequencing depth

INFO [2021-07-25 09:57:28] 

	STEP 04: log transformation of data

INFO [2021-07-25 09:57:28] 

	STEP 08: removing average of reference data (before smoothing)

INFO [2021-07-25 09:57:28] 

	STEP 09: apply max centered expression threshold: 3

INFO [2021-07-25 09:57:28] 

	STEP 10: Smoothing data per cell by chromosome

INFO [2021-07-25 09:57:28] 

	STEP 11: re-centering data across chromosome after smoothing

INFO [2021-07-25 09:57:28] 

	STEP 12: removing average of re

In [None]:
saveRDS(infercnv_obj, 'data/Ruiz2021_infercnv.rds')