In [None]:
# 18.1.2023
# A space for functions and snippets that are not quite dead and dusted
# but appear redundant.

In [None]:
import numpy as np
import anndata as ad
import scanpy as sc
import squidpy as sq
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
from pathlib import Path
Image.MAX_IMAGE_PIXELS = 1000000000 

from  scripts.utils import get_sample_ids

In [None]:
adata_spotclean_dict = {}
for sample_id in samples:
    adata = adata_dict[sample_id]
    spotclean_data = pd.read_csv('./results/after_spotclean/'+sample_id+'_counts_after_spotclean.csv')
    spotclean_data.columns = spotclean_data.columns.str.replace('.','-',regex=True)
    adata_spotclean = ad.AnnData(X=spotclean_data.T)

    if (adata.obs_names == adata_spotclean.obs_names).all():
        print(str(sample_id) + ' has matching spot order. Copying spatial information...')
        adata_spotclean.obs = adata.obs
        adata_spotclean.uns = adata.uns
        adata_spotclean.obsm = adata.obsm
        adata_spotclean_dict[sample_id] = adata_spotclean

In [None]:
# This is for doing post-spotclean clustering. The data should have less variation present

# Change the spatial proximity weight (weight) & resolution
# weight ~0.1-0.3
# resolution ~0.3-1.0

resolution = 0.8
for sample_id in samples:
    adata_spotclean = adata_spotclean_dict[sample_id]
    
    # Since spotclean data is already normalized, there is only need to look for spatial neighbors and 
    # do PCA + clustering
    sq.gr.spatial_neighbors(adata_spotclean, n_rings=2, coord_type="grid", n_neighs=6,transform='cosine')
    sc.pp.pca(adata_spotclean, n_comps=15)
    sc.pp.neighbors(adata_spotclean)

    for weight in [0.1,0.15,0.2,0.25,0.3]:
        dir_path = './plots/spotclean_spatial_weight_'+str(weight)+'_resolution_'+str(resolution)
        Path(dir_path).mkdir(parents=True, exist_ok=True)
        adata_spotclean = joint_cluster(adata_spotclean,proximity_weight=weight,res=resolution)
        try:
            sq.pl.spatial_scatter(adata_spotclean,color=['joint_leiden_clusters'],size=1.2,figsize=(8,8),dpi=120)
        except:
            adata_spotclean.uns.pop('joint_leiden_clusters_colors')
            sq.pl.spatial_scatter(adata_spotclean,color=['joint_leiden_clusters'],size=1.2,figsize=(8,8),dpi=120)
        plt.savefig(dir_path+'/'+sample_id+'_clusters_proximity_weight_'+str(weight)+'_resolution_'+str(resolution)+'.png')
        plt.close()
        

In [None]:
# This is for doing saving clustering results of one chosen parameter combination. 
# The results is exported in a format compatible with Loupe browser
# resolution: 0.8
# weight_ 0.2
resolution = 0.8
weight = 0.2
for sample_id in samples:
    #sample_id = 'PC_7875OIK'
    adata = adata_dict[sample_id]
    adata = calculate_neighbors(adata)


    dir_path = './clustering/'
    Path(dir_path).mkdir(parents=True, exist_ok=True)
    adata = joint_cluster(adata,proximity_weight=weight,res=resolution)
    try:
        sq.pl.spatial_scatter(adata,color=['joint_leiden_clusters'],size=1.2,figsize=(8,8),dpi=120)
        plt.savefig(dir_path+'/'+sample_id+'_section_space_clusters_proximity_weight_'+str(weight)+'_resolution_'+str(resolution)+'.png')
        plt.close()
    #    sc.pl.scatter(adata,color=['joint_leiden_clusters'],size=1.2,figsize=(8,8),dpi=120)
    #    plt.savefig(dir_path+'/'+sample_id+'_UMAP_space_clusters_proximity_weight_'+str(weight)+'_resolution_'+str(resolution)+'.png')
    except:
        adata.uns.pop('joint_leiden_clusters_colors')
        sq.pl.spatial_scatter(adata,color=['joint_leiden_clusters'],size=1.2,figsize=(8,8),dpi=120)
        plt.savefig(dir_path+'/'+sample_id+'_section_space_clusters_proximity_weight_'+str(weight)+'_resolution_'+str(resolution)+'.png')
        plt.close()
    #    sc.pl.scatter(adata,color=['joint_leiden_clusters'],size=1.2,figsize=(8,8),dpi=120)
    #    plt.savefig(dir_path+'/'+sample_id+'_UMAP_space_clusters_proximity_weight_'+str(weight)+'_resolution_'+str(resolution)+'.png')
    df_to_save = pd.DataFrame({'Barcode':adata.obs.index,'Joint Leiden': ['Cluster '+ cl for cl in adata.obs.joint_leiden_clusters]})
    df_to_save.to_csv(dir_path+'/'+sample_id+'_clusters_proximity_weight_'+str(weight)+'.csv',index=False)


In [None]:
# Insert sample ids to barcodes and another column
# Normalize samples according to scanpy standard pipeline
for k in adata_dict.keys():
    print(k)
    adata_dict[k].obs['sample_id'] = k
    adata_dict[k].obs_names = k + '_' + adata_dict[k].obs_names
    sc.pp.filter_genes(adata_dict[k],min_cells=50)
    sc.pp.filter_cells(adata_dict[k],min_genes=500)
    sc.pp.normalize_total(adata_dict[k], target_sum=1e4)
    sc.pp.log1p(adata_dict[k])
    sc.pp.highly_variable_genes(adata_dict[k], min_mean=0.0125, max_mean=3, min_disp=0.5)

# Aggregate data to a single adata object
adata_concat = sc.concat(adata_dict)

In [None]:
def calculate_neighbors(adata):
    # normalize and calculate leiden clustering
    sq.gr.spatial_neighbors(adata, n_rings=2, coord_type="grid", n_neighs=6,transform='cosine')
    sc.pp.filter_genes(adata, min_cells=3)
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
    sc.pp.pca(adata, n_comps=15)
    sc.pp.neighbors(adata, random_state=42)
    return adata

def joint_cluster(adata,proximity_weight=0.0,res=1):
    # Define the joint adjacency weighting
    joint_adj = adata.obsp['spatial_connectivities']*proximity_weight + adata.obsp['connectivities']
    sc.tl.leiden(adata,adjacency=joint_adj,key_added='joint_leiden_clusters',resolution=res,random_state=42)
    return adata

In [None]:
# Calculate spatial graph structures from individual Visium experiments just to be sure
for s in samples:
    adata_scan = adata_scanorama[adata_scanorama.obs['sample_id']==s]
    if (adata_scan.obs_names == normalized_adata[s].obs_names).all():
        sq.gr.spatial_neighbors(normalized_adata[s], n_rings=2, coord_type="grid", n_neighs=6,transform='cosine')

# Insert these results into the scanorama adata structure for later use
adata = ad.concat(normalized_adata,pairwise=True)
adata = adata[adata.obs.sort_index().index]
if (adata.obs_names == adata_scanorama.obs_names).all():
    adata_scanorama.obsp['spatial_connectivities'] = adata.obsp['spatial_connectivities'].copy()
    adata_scanorama.obsp['spatial_distances'] = adata.obsp['spatial_distances'].copy()
del adata

# adata_scanorama should now have connectivities and distances for both scanorama integrated expression and spatial data
adata_scanorama

In [None]:
# Visualize a scanorama-based cluster on a spatial section
sample = 'BPH_651'
cl_nmbr = [cluster]

adata_vis = normalized_adata[sample].copy()
adata_vis.obs = pd.merge(adata_vis.obs,adata_scanorama.obs['leiden'],left_index=True,right_index=True)
adata_vis.obs['visualization'] = adata_vis.obs['leiden'].cat.set_categories(cl_nmbr)
sq.pl.spatial_scatter(adata_vis,color=['visualization'],size=1.2,figsize=(6,6),dpi=120,legend_loc='')


In [None]:
meta = adata.obs[['broad_celltypes','dataset']]
meta['count'] = 1

grouped_meta = meta.groupby(['dataset','broad_celltypes'],axis=0).sum()
grouped_meta.reset_index(inplace=True)
grouped_meta = grouped_meta.pivot(index='dataset',columns='broad_celltypes',values='count')
grouped_meta = grouped_meta.loc[['dong_2020','chen_2021','song_2022','hirz_2023']]
#grouped_meta.set_index('dataset',inplace=True)
#grouped_meta
grouped_meta.plot.bar(stacked=True,grid=False,yticks=(0,2e4,4e4,14e4),)