Set up working environment

In [None]:
import scanpy as sc
import infercnvpy as cnv
import matplotlib as plt
import pandas as pd
import numpy as np
import scipy as sp

sc.set_figure_params(figsize=(4,4))
sc.set_figure_params(dpi=200)
sc.settings.n_jobs = 1 #nCores/CPUs for scanpy

# for white background of figures (only for docs rendering)
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
%config InlineBackend.figure_format='retina'

#hpc figures
%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" # print multiple outputs per code cell (not just last)

### Step 0: prepare input data

In [None]:
h5_path = "/scratch/gent/vo/000/gvo00027/projects/Single_Cell_Neuroblastoma/NBAtlas/01_Import_Preprocessing_NBAtlas/h5objects/nb_matrix_NBAtlas.h5"
adata = sc.read_10x_h5(h5_path)
adata

In [None]:
meta_path = "/scratch/gent/vo/000/gvo00027/projects/Single_Cell_Neuroblastoma/NBAtlas/03c_post_scVI_R_plots/Tables/03c_post_scVI_R_plots_covSample_MetaDataForInferCNV.csv"
metaData = pd.read_csv(meta_path, index_col = 0)

In [None]:
adata.obs = metaData

adata.obs.index = adata.obs.index.astype(str)

adata

In [None]:
#save raw counts reparately

adata.layers["counts"] = adata.X

In [None]:
#normalize and transform

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
#annotate highly variable genes

sc.pp.highly_variable_genes(
    adata,
    n_top_genes=5000,
    subset=False,
    layer="counts",
    flavor="seurat",
    batch_key="Study"
)

In [None]:
#PCA

sc.tl.pca(adata)
sc.pl.pca_variance_ratio(adata, n_pcs=50)

In [None]:
sc.pp.neighbors(adata, n_pcs=20, n_neighbors=20)
sc.tl.umap(adata, min_dist=0.3)

In [None]:
#compare cell colored by study and by cell type

fig, (ax1, ax2) = plt.pyplot.subplots(1, 2, figsize=(15, 5), gridspec_kw=dict(wspace=1))

sc.pl.umap(
    adata,
    color = ["Study"],
    frameon=True,
    show = False, 
    ax=ax1
)

sc.pl.umap(
    adata,
    color = ["annot_NBN_iCNV"],
    ncols=2,
    show= False, 
    ax = ax2
)

In [None]:
#chack cell types

adata.obs["annot_NBN_iCNV"].value_counts()

In [None]:
ref_cells = ["T/NK cell", "Myeloid", "B cell", "Endothelial", "Stromal other", "pDC", "Plasma"]

### Step 1: annotate anndata genes with their position in the chromosomes


In [None]:
#annotate genomic location of genes

cnv.io.genomic_position_from_gtf(
    gtf_file= "/kyukon/scratch/gent/vo/000/gvo00027/projects/CBIGR/22VZZ_singlecellNB/metaanalysis_atlas/data/annot_files_and_more/gencode.v43.annotation.gtf.gz", 
    adata=adata, 
    gtf_gene_id="gene_name")

### Step 2: run CNV inference

In [None]:
#actual calculation of CNVs happens here

%%time

cnv.tl.infercnv(
    adata,
    reference_key="annot_NBN_iCNV",
    reference_cat= ref_cells,
    window_size=250, 
    n_jobs=2, 
    step=1
)

In [None]:
#if needed, include chromosome positions based on gene ordering

adata.uns["cnv"] = {'chr_pos': {'chr1': 0,
  'chr2': 2361,
  'chr3': 3928,
  'chr4': 5329,
  'chr5': 6259,
  'chr6': 7355,
  'chr7': 8587,
  'chr8': 9703,
  'chr9': 10488,
  'chr10': 11356,
  'chr11': 12328,
  'chr12': 13608,
  'chr13': 14825,
  'chr14': 15389,
  'chr15': 16223,
  'chr16': 16953,
  'chr17': 17893,
  'chr18': 19207,
  'chr19': 19560,
  'chr20': 21053,
  'chr21': 21699,
  'chr22': 21972}}

In [None]:
#Save or load inferCNV result
#sp.sparse.save_npz("NBAtlas_X_cnv_step1_wind250.npz", adata.obsm["X_cnv"], compressed=True)

adata.obsm["X_cnv"] = sp.sparse.load_npz("NBAtlas_X_cnv_step1_wind250.npz")

In [None]:
adata

In [None]:
#save or load results
adata_dest = "/kyukon/scratch/gent/vo/000/gvo00027/projects/CBIGR/22VZZ_singlecellNB/metaanalysis_atlas/data/NBAtlas_h5ad/NBAtlas_CNVpydata.h5ad"
#adata.write(adata_dest)
#adata = sc.read(adata_dest)

In [None]:
adata

In [None]:
import session_info
session_info.show(html=False, dependencies=True)