In [1]:
# Author: Antti Kiviaho
# Date: 20.1.2023
# A script for running normalization and sample integration clustering.
# Uses the scbi integration environment and pipeline:
#
#
# 1. Cell and gene filtering
# 2. scran normalization through R interface using
# 3. batch-aware scaling with scib
# 4. batch-aware HVGs with scib

In [1]:
import os
os.chdir('/lustre/scratch/kiviaho/prostate_spatial/')
import numpy as np
import anndata as ad
import scanpy as sc
import pandas as pd
import seaborn as sns
import scib
#import scanorama

import matplotlib.pyplot as plt

# Added spot exclusion information 28.2.2024
from scripts.utils import get_sample_ids_reorder, save_to_pickle, get_include_exclude_info

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-zplkst6x because the default path (/run/cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [2]:
def qc_and_normalize(adata):
    # QC and normalize
    sc.pp.filter_genes(adata, min_cells=5)
    sc.pp.filter_cells(adata, min_counts=500)
    scib.preprocessing.normalize(adata,precluster=False)
    return adata

In [3]:
# Revised on 28.2.2024:
# Remove regions with exclude/include annotation (exclude_info)
# Saved each sample separately for better memory management

# Download data from spaceranger output files in bulk
samples = get_sample_ids_reorder()

# Only keep sample from the Tampere cohort
samples = [s for s in samples if 'P320' not in s]

exclude_info = get_include_exclude_info()


  utils.warn_names_duplicates("var")
  adata.var['n_cells'] = number
  values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K"))


Note! Performing log1p-transformation after normalization.
BPH_665 done


In [3]:
## Revised on 27.4.2024
## Process only the METs

samples = ['MET_A3','MET_GP12','MET_A14','MET_A16']

In [6]:
#adata_dict = {}
for sample_id in samples:
    
  adata_sample = sc.read_visium('./results/'+sample_id+'/outs/',library_id=sample_id)
  adata_sample.var_names_make_unique()

  # add ids to the data for use after data concatenation
  adata_sample.obs_names = sample_id + '_' + adata_sample.obs_names

  # Only use this with Tampere cohort samples
  # Subset spots from a single sample
  #sample_exclude_info = exclude_info.loc[adata_sample.obs_names].copy()
  #sample_spots_to_keep =  sample_exclude_info[~sample_exclude_info['Pathology'].isin(['Exclude','Lumen'])].index
  # Subset the sample with spots to keep
  #adata_sample = adata_sample[sample_spots_to_keep]
  
  # QC and normalize – this filters out more spots
  adata_sample = qc_and_normalize(adata_sample)

  # Save the object
  adata_sample.write_h5ad('data/normalized_visium/'+sample_id+'_normalized.h5ad')
  
  print(sample_id + ' done')
  del adata_sample
#   adata_dict[sample_id].obs['sample_id'] = sample_id
#   adata_dict[sample_id].obs_names = sample_id + '_' + adata_dict[sample_id].obs_names


#save_to_pickle(adata_dict,'./data/normalized_visium_data.pickle')

  values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K"))


Note! Performing log1p-transformation after normalization.
MET_A3 done


  utils.warn_names_duplicates("var")


Note! Performing log1p-transformation after normalization.
MET_GP12 done
Note! Performing log1p-transformation after normalization.
MET_A14 done
Note! Performing log1p-transformation after normalization.
MET_A16 done


In [41]:
# Load and save the ARNEO samples individually
from scripts.utils import load_from_pickle
# Add in the arneo data
adata_vis_arneo = load_from_pickle('./arneo/data/normalized_arneo_visium_data.pickle')

for sample_id in adata_vis_arneo:
    adata_vis_arneo[sample_id].write_h5ad('data/normalized_visium/'+sample_id+'_normalized.h5ad')