In [None]:
# Author: Antti Kiviaho
# Date: 20.1.2023
# A script for running normalization and sample integration clustering.
# Uses the scbi integration environment and pipeline:
#
#
# 1. Cell and gene filtering
# 2. scran normalization through R interface using


In [None]:
import os
os.chdir('/lustre/scratch/kiviaho/prostate_spatial/')
import numpy as np
import anndata as ad
import scanpy as sc
import pandas as pd
import seaborn as sns
import scib

import matplotlib.pyplot as plt

# Added spot exclusion information 28.2.2024
from scripts.utils import get_sample_ids_reorder, save_to_pickle, get_include_exclude_info

In [None]:
def qc_and_normalize(adata):
    # QC and normalize
    sc.pp.filter_genes(adata, min_cells=5)
    sc.pp.filter_cells(adata, min_counts=500)
    scib.preprocessing.normalize(adata,precluster=False)
    return adata


In [None]:
# This non-pathology-filtered dictionary was used for cell2location
# As the annotation wasn't available at the time of mapping

samples = get_sample_ids_reorder(['BPH','untreated','bicalutamide','goserelin','CRPC'])
samples_dict = {} # A data structure for saving data
for sample_id in samples:
  adata_sample = sc.read_visium('./results/'+sample_id+'/outs/',library_id=sample_id)
  adata_sample.var_names_make_unique()
  adata_sample.obs_names = sample_id + '_' + adata_sample.obs_names # add ids to the data for use after data concatenation
  adata_sample = qc_and_normalize(adata_sample) # QC and normalize – this filters out more spots
  samples_dict[sample_id] = adata_sample.copy()
  print(sample_id + ' done')
  del adata_sample

save_to_pickle(samples_dict,'./data/normalized_no_pathology_filter_visium_data.pkl')



In [None]:
# Revised on 28.2.2024:
# Remove regions with exclude/include annotation (exclude_info)
# Save each sample separately for better memory management

samples = get_sample_ids_reorder()
exclude_info = get_include_exclude_info()


obs_data_list = [] # A data structure for saving info on valid spots
for sample_id in samples:
    
  adata_sample = sc.read_visium('./results/'+sample_id+'/outs/',library_id=sample_id)
  adata_sample.var_names_make_unique()

  # add ids to the data for use after data concatenation
  adata_sample.obs_names = sample_id + '_' + adata_sample.obs_names

  # Only use this with Tampere cohort samples
  # Subset spots from a single sample
  sample_exclude_info = exclude_info.loc[adata_sample.obs_names].copy()
  sample_spots_to_keep =  sample_exclude_info[~sample_exclude_info['Pathology'].isin(['Exclude','Lumen'])].index
  # Subset the sample with spots to keep
  adata_sample = adata_sample[sample_spots_to_keep]
  
  # QC and normalize – this filters out more spots
  adata_sample = qc_and_normalize(adata_sample)

  # Save the object
  adata_sample.write_h5ad('data/normalized_visium/'+sample_id+'_normalized.h5ad')
  
  # Save the obs data to a list
  obs_data_list.append(adata_sample.obs)

  print(sample_id + ' done')
  del adata_sample

# Save the ids of all spots that passed the qc (110681)
pd.DataFrame(index=pd.concat(obs_data_list).index).to_csv('./data/post_qc_and_pathology_annot_valid_spots.csv')

