In [1]:
# Author: Antti Kiviaho
# Date: 20.1.2023
# A script for running normalization and sample integration clustering.
# Uses the scbi integration environment and pipeline:
#
#
# 1. Cell and gene filtering
# 2. scran normalization through R interface using
# 3. batch-aware scaling with scib
# 4. batch-aware HVGs with scib
# 5. scanorama integration into PCA, clustering, UMAP

In [2]:
import os
os.chdir('/lustre/scratch/kiviaho/prostate_spatial/')
import numpy as np
import anndata as ad
import scanpy as sc
import pandas as pd
import seaborn as sns
import scib
#import scanorama

import matplotlib.pyplot as plt
from scripts.utils import get_sample_ids, save_to_pickle

In [3]:
# Download data from spaceranger output files in bulk
samples = get_sample_ids()
adata_dict = {}
for idx,sample_id in enumerate(samples):
    adata_sample = sc.read_visium('./results/'+sample_id+'/outs/',library_id=sample_id)
    adata_sample.var_names_make_unique()
    adata_dict[sample_id] = adata_sample

  utils.warn_names_duplicates("var")


In [4]:
# Produce QC plots as done at https://scanpy-tutorials.readthedocs.io/en/latest/spatial/integration-scanorama.html
# Save the QC plots to a path
from pathlib import Path
dir_path = './plots/qc-plots-for-spatial-sections'
Path(dir_path).mkdir(parents=True, exist_ok=True)
for name in samples:
    adata = adata_dict[name]
    sc.pp.calculate_qc_metrics(adata, inplace=True)
    fig, axs = plt.subplots(1, 4, figsize=(24, 6))
    fig.suptitle(f"Covariates for filtering: {name}")

    sns.distplot(adata.obs["total_counts"], kde=False, ax=axs[0])
    sns.distplot(
        adata.obs["total_counts"][adata.obs["total_counts"] < 20000],
        kde=False,
        bins=40,
        ax=axs[1],
    )
    sns.distplot(adata.obs["n_genes_by_counts"], kde=False, bins=60, ax=axs[2])
    sns.distplot(
        adata.obs["n_genes_by_counts"][adata.obs["n_genes_by_counts"] < 4000],
        kde=False,
        bins=60,
        ax=axs[3],
    )
    fig.tight_layout()
    fig.set_dpi(200)
    plt.savefig(dir_path+'/'+name+'_qc_metrics'+'.png')
    plt.clf()


`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  if sys.path[0] == "":

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  app.launch_new_instance()

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level funct

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

<Figure size 4800x1200 with 0 Axes>

In [5]:
def qc_and_normalize(adata):
    # QC and normalize
    sc.pp.filter_genes(adata, min_cells=5)
    sc.pp.filter_cells(adata, min_counts=500)
    scib.preprocessing.normalize(adata,precluster=False)
    return adata

In [6]:
for sample_id in samples:
    adata = adata_dict[sample_id]
    adata = qc_and_normalize(adata)

    # add ids to the data for use after data concatenation
    adata_dict[sample_id].obs['sample_id'] = sample_id
    adata_dict[sample_id].obs_names = sample_id + '_' + adata_dict[sample_id].obs_names

  values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K"))


Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalization.
Note! Performing log1p-transformation after normalizatio

In [7]:
save_to_pickle(adata_dict,'./data/normalized_visium_data.pickle')

In [9]:
""" adata_concat = sc.concat(adata_dict)
adata_concat.obs.sample_id = adata_concat.obs.sample_id.astype('category')
scib.preprocessing.scale_batch(adata_concat,batch='sample_id')
hvg_list = scib.preprocessing.hvg_batch(adata_concat,batch_key='sample_id',target_genes=3000,flavor='seurat',adataOut=False)

adata_scanorama = scib.integration.scanorama(adata_concat,batch='sample_id',hvg=hvg_list)

save_to_pickle(adata_scanorama,'./data/visium_after_scanorama.pickle') """

" adata_concat = sc.concat(adata_dict)\nadata_concat.obs.sample_id = adata_concat.obs.sample_id.astype('category')\nscib.preprocessing.scale_batch(adata_concat,batch='sample_id')\nhvg_list = scib.preprocessing.hvg_batch(adata_concat,batch_key='sample_id',target_genes=3000,flavor='seurat',adataOut=False)\n\nadata_scanorama = scib.integration.scanorama(adata_concat,batch='sample_id',hvg=hvg_list)\n\nsave_to_pickle(adata_scanorama,'./data/visium_after_scanorama.pickle') "