In [1]:
%load_ext autoreload
%autoreload 2
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format='retina'


In [2]:
import sys


In [3]:
import numpy as np
import scanpy as sc
import pandas as pd
import anndata as ad
import seaborn as sns
sns.set_style('white')

In [4]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80, facecolor='white', frameon=False, figsize=(5,5))

In [5]:
adata = ad.read_h5ad("/faststorage/brain_aging/rna_analysis/adata_finalclusts.h5ad")

In [6]:
adata = adata[adata.obs.final_clusts != 'NA']
adata = adata[adata.obs.total_counts < 50000]

In [7]:
# remove clusters with < 100 cells
good_clusts = [i for i in adata.obs.final_clusts.unique() if np.sum(adata.obs.final_clusts==i)>200]

In [8]:
adata = adata[adata.obs.final_clusts.isin(good_clusts)]

In [9]:
A = adata[~adata.obs.neuronal].copy()

In [None]:
n_pcs = 20
sc.pp.highly_variable_genes(A, n_top_genes=2000)
A.raw = A
A = A[:, A.var.highly_variable]
print('regressing out')
sc.pp.regress_out(A, ['total_counts'])
print('scaling')
sc.pp.scale(A, max_value=10)
print('pca')
sc.tl.pca(A, svd_solver='arpack', n_comps=n_pcs)
print('neighbors')
sc.pp.neighbors(A, n_neighbors=25, n_pcs=n_pcs)
#sc.external.pp.bbknn(A,batch_key='age',n_pcs=n_pcs)
print('umap')
sc.tl.umap(A)
print('leiden')
sc.tl.leiden(A,resolution=0.6)


In [None]:
sc.pl.umap(A,color=['age', 'area','final_clusts','mouse_id'],use_raw=True,palette=sns.color_palette('gist_ncar',20))

In [None]:
sc.pl.dotplot(A, [
                  'Csf1r', 'C1qa', 'Hexb', 'Cx3cr1', 'P2ry12', 'Tmem119', 'Tnf', 'Ccl4', # microglia
                                 'Sox10','Cldn11', 'Mog', 'Plp1', # oligo
                                  'Aqp4', 'Aldh1l1','Gfap', 'Aldoc', # astrocyte
                                  'Vtn', 'Flt1', 'Pecam1','Cldn5', 'Adgrf5', # pericyte
                                 'Mgp' ,'Slc47a1', 'Dapl1', 'Igf2', 'Sema3g', 'Acta2',  # vascular
                                 'Pdgfra', 'Vcan', 'Cspg4', 'Olig1', # OPC
                                 'Ccdc153', 'Tmem212', 'Hdc', 'Kcnj8',# ependymal
                                 'Pf4', 'Cd74', 'Cxcl2', 'Lyz2', 'Ms4a7',
], groupby='final_clusts',use_raw=True)

In [None]:
# gene sets
# microglia -- Hexb, Csf1r, C1qa, P2ry12
# OPCs -- Pdgfra, Vcan, Cspg4, Olig1
# Endo -- Vtn, Flt1, Cldn5
# Oligo -- Plp1, Mpb, Cldn11, Mog

In [None]:
nonneuronal_mapping = {'N.0' : 'Oligodendrocyte', 
                       'N.1' : 'Oligodendrocyte', 
                       'N.2':'Oligodendrocyte', 
                       'N.3':'Astrocyte',
                       'N.4': 'Astrocyte',
                       'N.5': 'Astrocyte', 
                       'N.6' : 'OPC',
                       'N.7':'Microglia', 
                       'N.8' : 'Microglia',
                        'N.9' : 'NA',
                       'N.10' : 'NA',
                       'N.11':'NA', 
                        'N.12' : 'NA',
                       'N.13' : 'Vascular',
                      'N.14' : 'Oligodendrocyte',
                      'N.15' : 'Vascular', # vascular endoethelial cells
                      'N.16' : 'Oligodendrocyte',
                      'N.17' : 'Astrocyte',
                      'N.18' : 'Immune', # perivascular macrophage
                      'N.19' : 'Vascular', # pericyte
                      'N.20' : 'Vascular' # vascular leptomeningeal cells
                      }

finer_nonneuronal_mapping = {'N.0' : 'Olig1', 
                       'N.1' : 'Olig2', 
                       'N.2':'Olig3', 
                       'N.3':'Astro1',
                       'N.4': 'Astro2',
                       'N.5': 'Astro3', 
                       'N.6' : 'OPC',
                       'N.7':'Micro1', 
                       'N.8' : 'Micro2',
                        'N.9' : 'NA',
                       'N.10' : 'NA',
                       'N.11':'NA', 
                        'N.12' : 'NA',
                       'N.13' : 'Vlmc1',
                      'N.14' : 'Olig4',
                      'N.15' : 'Peri1', # 
                      'N.16' : 'Olig5',
                      'N.17' : 'Astro4',
                      'N.18' : 'Macro', # perivascular macrophage
                      'N.19' : 'Peri2', # pericyte
                      'N.20' : 'Vlmc2' # vascular leptomeningeal cells
                      }



In [None]:
sc.pl.umap(adata, color=['final_clusts','Tac1', 'Tshz1', 'Cxcl14', 'Pdyn','Penk', 'Drd1', 'Drd2', 'Adora2a', 'Calb1','Pthlh'])

In [None]:
# identify striatal neurons
sc.pl.dotplot(adata, ['Otof', 'Cacng5', 'Th','Ppp1r1b', 'Drd1','Tac1', 'Tshz1', 'Pdyn', 'Drd2','Penk','Adora2a', 'Calb1','Pthlh','Cxcl14','Chat'], groupby='final_clusts',use_raw=True)

In [None]:
striatal_celltypes = {
    'H.I.7' : 'StD1M1',
    'H.I.8' : 'StD1M2',
    'H.I.20': 'StD2M1',
    'H.I.27': 'StD1M3',
    'P.I.0' : 'StD1M4',
    'P.I.1' : 'StD1M5',
    'P.I.2' : 'StD2M2',
    'P.I.3' : 'StD2M3',
    'P.I.4' : 'StD1M6',
    'P.I.5' : 'StD2M4',
    'P.I.10': 'StD1M7',
    'P.I.18': 'StD2M5',
    'P.I.19': 'StD1M8'
}

In [None]:
cell_types = list(adata.obs.final_clusts.copy())
for i,k in enumerate(cell_types):
    if k in nonneuronal_mapping:
        cell_types[i] = nonneuronal_mapping[k]
    else:
        #if 'N' in k:
        #    pass
        #else:
        cell_types[i] = 'Neuron'
adata.obs['cell_type'] = cell_types

cell_types_fine = list(adata.obs.final_clusts.copy())
for i,k in enumerate(cell_types_fine):
    if k in finer_nonneuronal_mapping:
        cell_types_fine[i] = finer_nonneuronal_mapping[k]
    elif k in striatal_celltypes:
        cell_types_fine[i] = striatal_celltypes[k]
    else:
        curr_cell_type = k.split(".")
        if curr_cell_type[0] == "H":
            curr_area = "Hy"
        else:
            curr_area = "Fr"
        if curr_cell_type[1] == "I":
            curr_type = "In"
        else:
            curr_type = "Ex"
        cell_types_fine[i] = curr_area + curr_type + str(int(curr_cell_type[2])+1)
adata.obs['clust_label'] = cell_types_fine#pd.Series(cell_types_fine,dtype='category')
adata.obs.clust_label = adata.obs.clust_label.astype('category')

In [None]:
# remove bad non neuronal clusters
adata = adata[~adata.obs.cell_type.isin(['NA'])]

In [None]:
# reprocess
adata = adata.raw.to_adata()
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=3000)
adata.raw = adata
adata = adata[:, adata.var.highly_variable]


In [None]:
sc.pp.regress_out(adata, ['total_counts'])

In [None]:
sc.pp.scale(adata, max_value=10)
sc.tl.pca(adata, n_comps=50,svd_solver='arpack')

sc.pl.pca_variance_ratio(adata, log=True,n_pcs=50)

sc.pp.neighbors(adata, n_neighbors=10, n_pcs=50)
sc.tl.umap(adata)

In [None]:
# final final clust information
#adata.obs.to_csv("final_clusts.csv")

In [None]:
#fig,ax = plt.subplots(figsize=(10,10))
sc.pl.umap(adata, color=['doublet_score','total_counts'],size=1,add_outline=False)

In [None]:
fig,ax = plt.subplots(figsize=(10,10))
sc.pl.umap(adata, color='cell_type',palette=sns.color_palette('Pastel1'), ax=ax,size=10,add_outline=True)

In [None]:
def gen_light_palette(prefix, color_name, uniq_clusts):
    n = np.sum([1 if prefix in i else 0 for i in uniq_clusts])
    return sns.light_palette(color_name, n_colors=n+2)[2:]

def gen_dark_palette(prefix, color_name, uniq_clusts):
    n = np.sum([1 if prefix in i else 0 for i in uniq_clusts])
    return sns.dark_palette(color_name, n_colors=n+2)[2:]

uniq_clusts = np.sort(adata.obs.clust_label.unique())


print("Prefrontal excite")
fr_ex_pal = gen_light_palette("FrEx", "darkgreen", uniq_clusts) #sns.cubehelix_palette(start=0, rot=0.2, dark=0.25, light=.9, n_colors=n_pe)

print("Prefrontal inhib")
fr_in_pal = gen_light_palette("FrIn", "navy", uniq_clusts)#sns.cubehelix_palette(start=0, rot=0.5, dark=0.25, light=.95, n_colors=n_pi)

print("Striatal")
st_pal = gen_light_palette("St", "indigo", uniq_clusts) #sns.cubehelix_palette(start=0, rot=0.5, dark=0.5, light=.95, n_colors=n_st)


print("Microglial")
micro_pal = gen_light_palette('Micro', 'dodgerblue', uniq_clusts)

print("Macro")
macro_pal = gen_light_palette('Macro', 'blue', uniq_clusts)

print("Astrocyte")
astro_pal = gen_light_palette('Astro', 'darkorange', uniq_clusts)

print("Peri")
peri_pal = gen_light_palette('Peri', 'lime', uniq_clusts)

print("VLMC")
vlmc_pal = gen_light_palette('Vlmc', 'aqua', uniq_clusts)

print("OPC")
opc_pal = gen_dark_palette('OPC', 'black', uniq_clusts)

print("Oligo")
oligo_pal = gen_light_palette('Olig', 'darkgray', uniq_clusts)

pals = [astro_pal, fr_ex_pal, fr_in_pal, hy_ex_pal, hy_in_pal,  macro_pal,  micro_pal, opc_pal, oligo_pal, peri_pal, st_pal, vlmc_pal]
for i in pals:
    sns.palplot(i)

In [None]:
from cycler import cycler
#pal = cycler(color=)

pal = cycler(color=np.vstack(pals))

label_colors = {}
for i, c in enumerate(iter(pal)):
    label_colors[uniq_clusts[i]] = c['color']

In [None]:
fig,ax = plt.subplots(figsize=(10,10))
sc.pl.umap(adata, color='clust_label',palette=pal,ax=ax,size=10,add_outline=True)

In [None]:
fig,ax = plt.subplots(figsize=(10,10))
sc.pl.umap(adata, color='age',ax=ax,size=10,add_outline=True,palette=sns.color_palette('Set2',2))

In [None]:
fig,ax = plt.subplots(figsize=(10,10))
sc.pl.umap(adata, color='area',ax=ax,size=10,add_outline=True)

In [None]:
#adata.write("adata_finalclusts_annot.h5ad")