In [None]:
from anndata import AnnData
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import leidenalg

from numpy.random import default_rng

import matplotlib.pyplot as plt

from matplotlib.pyplot import figure

figure(figsize=(20, 20), dpi=80)

In [None]:
re=pd.read_excel("/mnt/c/Users/Sasha/Desktop/test/SCRINSHOT-Healthy-Atlas-all-samples.xlsx")

In [None]:
re.columns

In [None]:
re

In [None]:
#Filter cells for more than 25 marker dots
resub=re[re.iloc[:,14:68].sum(axis=1)>25]

In [None]:
resub

In [None]:
#define metadata and data of interest (genes of all datasets)
metadata=resub.iloc[:,0:14]
expdata=resub.iloc[:,14:68]

In [None]:
expdata

In [None]:
metadata

In [None]:
adata = sc.AnnData(expdata)
adata.obs=metadata

In [None]:
adata.obs_names = pd.RangeIndex(stop=adata.n_obs)
adata

In [None]:
adataraw=adata

In [None]:
sc.pp.normalize_total(adata)

In [None]:
adatanorm=adata

In [None]:
expdata_norm=(adatanorm)

In [None]:
adatanorm

In [None]:
adatanorm.obs=metadata

In [None]:
adatanorm.obs_names = pd.RangeIndex(stop=adatanorm.n_obs)
adatanorm

In [None]:
sc.pp.neighbors(adatanorm, n_neighbors=20, n_pcs=7)
sc.tl.leiden(adatanorm,resolution=1.5,key_added='clusters-pc7')

In [None]:
adatanorm.obs['clusters-pc7'].unique()

In [None]:
sc.tl.paga(adatanorm, groups='clusters-pc7')
sc.pl.paga(adatanorm, color=['clusters-pc7'])

In [None]:
sc.tl.umap(adatanorm,min_dist=0.3)#,init_pos='paga')
sc.set_figure_params(scanpy=True, dpi=200)

In [None]:
#the following plots are to visualize the data
sc.pl.umap(adatanorm,color=['clusters-pc7'],cmap='viridis')#,save='.svg')

In [None]:
sc.pl.umap(adatanorm,color=adata.var.index[0:75],cmap='viridis',vmax=10)

In [None]:
sc.pl.umap(adatanorm,color=adata.var.index[0:75],cmap='viridis',vmax=20)

In [None]:
sc.pl.umap(adatanorm,color=['Region'], s=1)

In [None]:
sc.pl.umap(adatanorm,color=['Sample'], s=1)

In [None]:
sc.pl.umap(adatanorm,color=['Position'], s=1)

In [None]:
sc.pl.umap(adatanorm,color=['Donor'], s=1)

In [None]:
sc.pl.umap(adatanorm,color=['Cell.class'], s=1)

In [None]:
sc.pl.umap(adatanorm,color=['Cell.type'], s=1)

In [None]:
sc.pl.umap(adatanorm,color=['Cell.subtype'], s=1)

In [None]:
#to check if clustering is affected by sample
sc.tl.rank_genes_groups(adatanorm,groupby='Sample')

In [None]:
sc.pl.rank_genes_groups_dotplot(adatanorm)

In [None]:
sc.pl.rank_genes_groups(adatanorm)

In [None]:
#to check if clustering is affected by donor
sc.tl.rank_genes_groups(adatanorm,groupby='Donor')

In [None]:
sc.pl.rank_genes_groups_dotplot(adatanorm)

In [None]:
sc.pl.rank_genes_groups(adatanorm)

In [None]:
#to define differentially expressed genes by cluster
sc.tl.rank_genes_groups(adatanorm, groupby='clusters-pc7', method='wilcoxon')
sc.tl.dendrogram(adatanorm,groupby='clusters-pc7')
sc.pl.rank_genes_groups_dotplot(adatanorm, n_genes=5, swap_axes=True)#,save='deg.pdf')

In [None]:
#for pre-defined cell types only
sc.tl.rank_genes_groups(adatanorm, groupby='Cell.type', method='wilcoxon')
sc.tl.dendrogram(adatanorm,groupby='Cell.type')
sc.pl.rank_genes_groups_dotplot(adatanorm, n_genes=5, swap_axes=True)#,save='deg.pdf')

In [None]:
writer = pd.ExcelWriter('All-SCRINSHOT-clusters-pc7-upd.xlsx')
# write dataframe to excel
adatanorm.obs.to_excel(writer)
# save the excel
writer.save()

In [None]:
#to visulaize clusters by position
colors=dict(zip(np.unique(adatanorm.obs['clusters-pc7'].astype(int)),adatanorm.uns['clusters-pc7_colors']))
#cl.apply(lambda x: colors[x])
cl=adatanorm.obs['clusters-pc7'].astype(int)

In [None]:
import seaborn as sns
import matplotlib as plt
plt.pyplot.figure(figsize=(40,40))
plt.pyplot.scatter(re.X,re.Y,s=20,c='#FFFFFF',linewidths=0,edgecolors=None)
plt.pyplot.scatter(x=adatanorm.obs.X,y=adatanorm.obs.Y,c=cl.apply(lambda x: colors[x]),s=20,linewidths=0, edgecolors=None)
ax=plt.pyplot.gca()                            # get the axis
ax.set_ylim(ax.get_ylim()[::-1])        # invert the axis
ax.xaxis.tick_top()                     # and move the X-Axis      
ax.yaxis.tick_left() 

In [None]:
#to create subclusters 
adatasub1=adatanorm[adatanorm.obs["clusters-pc7"].isin(["13"])]

In [None]:
sc.pp.neighbors(adatasub1, n_neighbors=10, n_pcs=0)
sc.tl.leiden(adatasub1,resolution=0.8,key_added='Level2-cluster13')

In [None]:
sc.tl.paga(adatasub1, groups='Level2-cluster13')
sc.pl.paga(adatasub1, color=['Level2-cluster13'])

In [None]:
sc.tl.umap(adatasub1,min_dist=0.3)#,init_pos='paga')
sc.set_figure_params(scanpy=True, dpi=200)

In [None]:
#to visualize subclusters of cluster 13
sc.pl.umap(adatasub1,color=['Level2-cluster13'],cmap='viridis')#,save='epith_clusters_0.svg')

In [None]:
sc.pl.umap(adatasub1,color=adatasub1.var.index[0:74],cmap='viridis',vmax=10)

In [None]:
sc.tl.rank_genes_groups(adatasub1, groupby='Level2-cluster13', method='wilcoxon')
sc.tl.dendrogram(adatasub1,groupby='Level2-cluster13')
sc.pl.rank_genes_groups_dotplot(adatasub1, n_genes=10, swap_axes=True)#,save='deg.pdf')

In [None]:
sc.tl.rank_genes_groups(adatasub1,groupby='Position')

In [None]:
sc.pl.rank_genes_groups_dotplot(adatasub1)

In [None]:
writer = pd.ExcelWriter('cluster13-Lev2.xlsx')
# write dataframe to excel
adatasub1.obs.to_excel(writer)
# save the excel
writer.save()