***Import libraires and data, set up stuff***

In [None]:
import scanpy as sc
import pandas as pd
import scvelo as scv
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
import anndata as ad

In [None]:
plt.rcParams['figure.figsize']=(6,6) #rescale figures

In [None]:
sample = sc.read('../../../Data/notebooks_data/crypto_123.filt.norm.red.h5ad')

Clustering

In [None]:
markers = dict() #make an empty dictionary
### SPERMATOCYTOGENESIS
markers['SpermatogoniaA'] = ['ID4']
markers['SpermatogoniaB'] = ['MKI67','DMRT1','STRA8'] 
markers['SpermatocytesI'] = ['MEIOB','SYCP1','TEX101']
markers['SpermatocytesII'] = ['PIWIL1','SPATA16','CLGN']
### SPERMIOGENESIS
markers['Round.Spt'] = ['SPATA9','SPAM1'] #Round spermatids
markers['Elong.Spt'] = ['PRM1','PRM2','PRM3','AKAP4'] #Elongated spermatids
### SOMATIC CELLS
markers['Sertoli'] = ['VIM','CTSL']
markers['Macroph'] = ['CD14']
markers['Leydig'] = ['CFD']
markers['Endothelial'] = ['CD34']
markers['Myoid'] = ['ACTA2']

plt.rcParams['figure.figsize'] = (6,6) #reduce figure size

In [None]:
for i in list( markers.keys() ):
    markers[i] = np.intersect1d(markers[i], sample.var_names)

In [None]:
sc.plotting.umap(sample, color=markers['SpermatogoniaA'], vmin=-1, vmax=3, s=30)

In [None]:
sc.plotting.umap(sample, color=markers['SpermatogoniaB'], vmin=-1, vmax=3, s=30)

In [None]:
sc.plotting.umap(sample, color=markers['SpermatocytesI'], vmin=-1, vmax=3, s=30)

In [None]:
sc.plotting.umap(sample, color=markers['SpermatocytesII'], vmin=-1, vmax=3, s=30)

In [None]:
sc.plotting.umap(sample, color=markers['Round.Spt'], vmin=-1, vmax=3, s=30)

In [None]:
sc.plotting.umap(sample, color=markers['Elong.Spt'], vmin=0, vmax=5, s=30)

In [None]:
sc.plotting.umap(sample, color=markers['Sertoli'], vmin=-1, vmax=5, s=30)

In [None]:
sc.plotting.umap(sample, color=markers['Macroph'], vmin=-1, vmax=5, s=30)

In [None]:
sc.plotting.umap(sample, color=markers['Endothelial'], vmin=-1, vmax=3, s=30)

In [None]:
sc.plotting.umap(sample, color=markers['Myoid'], vmin=-1, vmax=3, s=30)

In [None]:
sc.plotting.umap(sample, color=markers['Leydig'], vmin=-1, vmax=3, s=30)

In [None]:
sc.tl.leiden(sample, resolution=.4, random_state=12345)

In [None]:
sc.plotting.umap(sample, color=['leiden'], legend_loc='on data', legend_fontsize=20)

In [None]:
clusters = pd.Categorical(sample.obs['leiden'])

In [None]:
clusters.categories

In [None]:
new_names = {
    '0':'SpermatogoniaA',
    '1':'RoundSpermatids',
    '2':'SpermatocitesI',
    '3':'SpermatocitesII',
    '4':'Myoid',
    '5':'ElongSpermatids',
    '6':'SpermatogoniaB',
    '8':'Endothelial'
}

In [None]:
clusters=clusters.rename_categories(new_names)

In [None]:
cluster_array = np.array(clusters)
split_array = [ i.split('.')[0] for i in cluster_array ]
clusters = pd.Categorical(split_array)

In [None]:
sample.obs['clusters']=clusters.copy()

In [None]:
sample = sample[sample.obs['clusters'] != '7'].copy()

In [None]:
sc.plotting.umap(sample, color=['clusters'], legend_loc='on data')

In [None]:
sample.write('../../../Data/notebooks_data/crypto_123.filt.norm.red.clst.h5ad')

In [None]:
sample.X = sample.layers['umi_sct'].copy()
sc.pp.log1p(sample)

In [None]:
sc.tl.rank_genes_groups(sample, groupby='clusters', key_added='DE_clusters', 
                        use_raw=False, n_genes=10, method='wilcoxon')

In [None]:
pd.DataFrame(sample.uns['DE_clusters']['names'])

In [None]:
result = sample.uns['DE_clusters']
groups = result['names'].dtype.names
X = pd.DataFrame(
    {group + '_' + key[:1].upper(): result[key][group]
    for group in groups for key in ['names', 'pvals_adj','logfoldchanges']})
X

In [None]:
X.to_csv('../../../Data/results/diff_expression_clusters_crypto.csv', header=True, index=False)

In [None]:
SUBGROUPS = ['SpermatogoniaB','SpermatocitesI','SpermatocitesII'] 

In [None]:
markers['Leptotene'] = ['SYCE2','SCML1']
markers['Zygotene'] = ['LY6K', 'SYCP1']
markers['Pachytene'] = ['PIWIL1','CCDC112']
markers['Diplotene'] = ['OVOL2','CCNA1', 'CDK1','AURKA']

In [None]:
sc.pl.umap( sample[ [i in SUBGROUPS for i in sample.obs['clusters']] ], 
           color=markers['Leptotene'])

In [None]:
sc.pl.umap( sample[ [i in SUBGROUPS for i in sample.obs['clusters']] ], 
           color=markers['Zygotene'])

In [None]:
sc.pl.umap( sample[ [i in SUBGROUPS for i in sample.obs['clusters']] ], 
           color=markers['Pachytene'])

In [None]:
sc.pl.umap( sample[ [i in SUBGROUPS for i in sample.obs['clusters']] ], 
           color=markers['Diplotene'])

In [None]:
sc.tl.leiden(sample, resolution=.28, key_added='clusters_spc',
            restrict_to=('clusters', SUBGROUPS),
            random_state=12345)

In [None]:
sc.pl.umap(sample[ [i in SUBGROUPS for i in sample.obs['clusters']] ], 
           color=['clusters_spc'], legend_fontsize=15)

In [None]:
clusters = sample.obs['clusters_spc']
cluster_array = np.array(clusters)
split_array = [ i.split(',')[1] if ',' in i else i for i in cluster_array]
clusters = pd.Categorical(split_array)
sample.obs['clusters_spc']=clusters.copy()

In [None]:
sc.pl.umap(sample[ [i in SUBGROUPS for i in sample.obs['clusters']] ], 
           color=['clusters_spc'], legend_loc='on data', legend_fontsize=18)

In [None]:
sample.X = sample.layers['umi_sct'].copy()
sc.pp.log1p(sample)

In [None]:
sc.tl.rank_genes_groups(sample, groupby='clusters_spc', key_added='DE_clusters_spc',
                        use_raw=False, n_genes=20, method='wilcoxon')

In [None]:
pd.DataFrame(sample.uns['DE_clusters_spc']['names'])

In [None]:
result = sample.uns['DE_clusters_spc']
groups = result['names'].dtype.names
X = pd.DataFrame(
    {group + '_' + key[:1].upper(): result[key][group]
    for group in groups for key in ['names', 'pvals_adj','logfoldchanges']})

In [None]:
X[ ['3_N','3_L','3_P'] ]

In [None]:
clusters = pd.Categorical(sample.obs['clusters_spc'])

In [None]:
clusters.categories

In [None]:
new_names = {
    '0':'Zygotene',
    '1':'Diplotene.1',
    '2':'Dyplotene.2',
    '3':'Leptotene',
    '4':'Pachytene',
}

In [None]:
clusters=clusters.rename_categories(new_names)

In [None]:
cluster_array = np.array(clusters)
split_array = [ i.split('.')[0] for i in cluster_array ]
clusters = pd.Categorical(split_array)

In [None]:
sample.obs['clusters_spc']=clusters.copy()

In [None]:
sc.plotting.umap(sample, color=['clusters','clusters_spc'], legend_loc='on data')

In [None]:
sample.X = sample.layers['umi_sct'].copy()
sc.pp.log1p(sample)
sc.tl.rank_genes_groups(sample, groupby='clusters_spc', key_added='DE_clusters_spc',
                        use_raw=False, n_genes=30, method='wilcoxon')

In [None]:
result = sample.uns['DE_clusters_spc']
groups = result['names'].dtype.names
X = pd.DataFrame(
    {group + '_' + key[:1].upper(): result[key][group]
    for group in groups for key in ['names', 'pvals_adj','logfoldchanges']})
X.to_csv('../../../Data/results/diff_expression_subclusters_crypto.csv', header=True, index=False)

In [None]:
sample.write('../../../Data/notebooks_data/crypto_123.filt.norm.red.clst.2.h5ad')