In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
#import episcanpy as epi
import anndata as ad
import matplotlib.pyplot as plt
import seaborn as sns

import episcanpy as epi


import time
import pickle
import os
import scipy
import glob

from pathlib import Path

#plt.style.use('seaborn')
plt.rcParams["figure.figsize"]=(6, 5)
plt.rcParams["figure.dpi"]=600

# settings for the plots
sc.set_figure_params(scanpy=True, dpi=80, dpi_save=250,
                     frameon=True, vector_friendly=True,
                     color_map="YlGnBu", format='pdf', transparent=False,
                     ipython_format='png2x')

In [None]:
adata = ad.read('gex.h5ad')
adata

In [None]:
adata

In [None]:
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)

In [None]:
adata.var['mt'] = adata.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)


In [None]:
adata

In [None]:
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)


In [None]:
adata

In [None]:
sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')

In [None]:
sc.pp.normalize_total(adata, target_sum=1e4)



In [None]:


sc.pp.log1p(adata)



In [None]:
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)

In [None]:
sc.tl.leiden(adata)

In [None]:
sc.pl.umap(adata, color='leiden')

In [None]:
adata

In [None]:
adata.obs['predicted_doublet'] = [str(x) for x in adata.obs['predicted_doublet']]

In [None]:
sc.pl.umap(adata, color=['doublet_score', 'predicted_doublet'])

In [None]:
adata.write("processed_data_for_annotation.h5ad")

In [None]:
adata= ad.read("processed_data_for_annotation.h5ad")
adata

In [None]:
#oligodendroctye_genes = ["OLIG1", "OLIG2", "PDGFRA"]
#sc.pl.umap(adata, color=oligodendroctye_genes+['leiden'])

In [None]:
astro_genes = ["GFAP", "AQP4"]
sc.pl.umap(adata, color=astro_genes+['leiden'])

In [None]:
microglia_genes = ["CD68", "CX3CR1", 'CD14', 'CD80', 'SALL1', 'OLFML3']
sc.pl.umap(adata, color=microglia_genes+['leiden'])

In [None]:
neuronal_genes = ["SATB2", ]
sc.pl.umap(adata, color=neuronal_genes+['leiden'])

In [None]:
other_genes=["WNT2B", "RSPO1", "RSPO3", "SLFN13", "CALCB", #"DYTN","ATOH1", "IL22",
               "IGF1", "NRK", "CALB1"]
other_genes2 =['SST', 'TLL2']#"KRT74"
sc.pl.umap(adata, color=other_genes+['leiden'])
sc.pl.umap(adata, color=other_genes2+['leiden'])

In [None]:
# markers from the Macoscko paper 
Purkinje_genes =['Ppp1r17', 'Prkcd', 'Klhl1']
Granule_gene = ['Gabra6']
UBC_gene = ['Eomes']
Golgi_gene = ['Lgi2']
bergman_gene=['Gdf10']
ODC_gene=['Mobp']
OPC_gene=["Ppfibp1"]
other_cell_types = ['Dcn', 'Kcnj8', 'Ttr', 'Mrc1', 'C1qa', 'Fit1', 'Foxj1']

#other_cell_types = ['DCN', 'KCNJ8', 'TTR', 'MRC1', 'C1QA', 'FIT1', 'FOXJ1']
#sc.pl.umap(adata, color=other_cell_types+['leiden'])

In [None]:
Rl_markers = ['PAX6', 'LMX1A', 'EOMES']
sc.pl.umap(adata, color=Rl_markers+['leiden'])

In [None]:
# Purkinje cells 
PC_cells_markers = ['ITPR1', 'FOXP2', 'CALB1', 'BCL11A']# 'CAB', 'SKOR2'
sc.pl.umap(adata, color=PC_cells_markers+['leiden'])

In [None]:
genes=['ITPR1', #PC -->  Purkinje cell 
        #"MKI67",#"OTX2",#RL --> Rhombic lip
       'RBFOX3', #GCP ad GN
       'LMX1A',#eCN/UBC
       "MEIS2",# iCN
       "PAX2", #PIP 
       "GRIA1", #BG
       "AQP4", "SOX2", "GFAP",# Astro
        "SATB2",
       "CSF1R", # microglia
       #"FLT1" # endothelial

       #"BCAS" # committed OPC
       #"TTR"
       "DNAH6", #"HOXB3"
       "DNAH11",
       "PTPRK", #MLI
       "PDGFRB", #pericytes
       "PDGFRA", #OPC
       "COL3A1",
       "OLIG1",
       "OLIG2",
       #"OLIG3"
       "NXPH2",
       "TLE2"
       
    ]
sc.pl.umap(adata, color=genes+['leiden'])

In [None]:
inhibitory_neuron_markers=['PVALB','SST','MAF', 'TAC3']
sc.pl.umap(adata, color=inhibitory_neuron_markers+['leiden'])

In [None]:
label = []
for cluster in adata.obs['leiden']:
    if cluster in ['10', '2']:
        label.append('astrocyte')
    elif cluster in ['5']:
        label.append('oligodendrocyte')
    elif cluster in ['13']:
        label.append('microglia')
    elif cluster in ['14']:
        label.append('neuronal cell - Purkinje cell -FOXP2')
    elif cluster in ['8']:
        label.append('neuronal cell - Purkinje cell -ITPR1')
    #elif cluster in ['']:
    #    label.append("iCN - inhibitory cerebellar nuclei ")
    elif cluster in ['0', '1', '3', '4', '6','15']:
        label.append("interneuron - MLI - molecular layer interneurons")
    elif cluster in ['7']:
        label.append('astrocyte (progenitor)')
    elif cluster in ['12']:
        label.append('Purkinje layer neuron')

    elif cluster in ['18', '17']:
        label.append('inhibitory neuron')
    elif cluster in ['11']:
        label.append('inhibitory neuron - PVALB+ SST+')
    elif cluster in ['9']:
        label.append('inhibitory neuron - MAF+')
    elif cluster in ['16']:
        label.append('NA')
    else:
        label.append(cluster)
adata.obs['celltype'] = label
sc.pl.umap(adata, color=['leiden', 'celltype'], wspace=0.7)

In [None]:
sc.pl.dotplot(adata, var_names=genes, groupby='leiden')

In [None]:
sc.pl.dotplot(adata, var_names=genes, groupby='celltype')

In [None]:
sc.pl.umap(adata, color=['leiden', 'celltype'], wspace=0.7)

In [None]:
del adata.uns['log1p']

In [None]:
sc.tl.rank_genes_groups(adata, groupby='leiden')
sc.pl.rank_genes_groups_stacked_violin(adata)

In [None]:
sc.pl.rank_genes_groups_matrixplot(adata, save="_top_leiden_markers.pdf")

In [None]:
adata.obs['celltype2'] = adata.obs['celltype'] 
sc.tl.rank_genes_groups(adata, groupby='celltype2')
sc.pl.rank_genes_groups_matrixplot(adata, save="_top_celltype_markers.pdf" )

In [None]:
adata

In [None]:
sc.pl.umap(adata, color='celltype')

In [None]:
adata.write('annotated_gex.h5ad')