# Mouse Hippocampus snRNA Integrative Analysis
### Data
- [Hippocampus data table](https://github.com/erebboah/ENC4_Mouse_SingleCell/blob/master/snrna/ref/hippocampus_minimal_metadata.tsv)

### Aims
1. Reads in pre-processed Parse and 10x data and merge counts matrices across experiments (within the same technology) for each tissue.
2. Combine Parse standard, Parse deep, and 10x data by CCA integration.
3. Use an [external 10x brain atlas](https://portal.brain-map.org/atlases-and-data/rnaseq/mouse-whole-cortex-and-hippocampus-10x) to predict celltype labels.
4. Manual celltype annotation by assigning each cluster to the celltype predicted for the majority of cells in the cluster, then adjusting the labels as we see fit.

### Results
- Seurat CCA works pretty well for integrating the 3 types of experiments: Parse standard, Parse deep, and 10x multiome. 
- We decided on 3 levels of annotation: `gen_celltypes` or general celltypes (e.g. "Neuron"), `celltypes` for higher resolution (e.g. "Inhibitory"), and finally `subtypes` for the highest resolution of celltype annotations (e.g. "Pvalb"). 
- The external atlas did not separate their oligodendrocytes into OPCs, MFOLs, and MOLs, but we use our expertise with the brain to check marker genes and assign cell type labels.


In [10]:
library(Matrix)
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(viridis))
library(glmGamPoi)
library(RColorBrewer)
options(future.globals.maxSize = 10000 * 1024^2)
future.seed=TRUE

setwd("/share/crsp/lab/seyedam/share/enc4_mouse/snrna/")

In [11]:
system("mkdir plots/hippocampus")

# Plotting: check integration and clustering

In [3]:
combined.sct = readRDS("seurat/hippocampus_Parse_10x_integrated.rds")


In [4]:
nclusters = length(unique(combined.sct$seurat_clusters))
cluster_cols = colorRampPalette(brewer.pal(9,"Set1"))(nclusters)

In [5]:
pdf(file="plots/hippocampus/UMAP_Parse_10x.pdf",
    width = 20, height = 8)
p1 <- DimPlot(combined.sct, reduction = "umap", group.by = "technology")
p2 <- DimPlot(combined.sct, reduction = "umap", label = TRUE, repel = TRUE, cols = cluster_cols)
p1 + p2

dev.off()

In [6]:
pdf(file="plots/hippocampus/Parse_10x_experiment_distribution.pdf",
    width = 20, height = 6)
DimPlot(combined.sct, reduction = "umap", group.by = "seurat_clusters",split.by = "depth2", label = TRUE, label.size = 6, repel = TRUE, shuffle = T,cols = cluster_cols)

ggplot(combined.sct@meta.data, aes(x=seurat_clusters, fill=depth2)) + geom_bar(position = "fill") & 
theme(text = element_text(size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20))

dev.off()


In [7]:
combined.sct$sample = factor(combined.sct$sample, levels=paste0("HC_",rep(c("10","14","25","36","2m","18m"),each=4),rep(c("_M","_F"),each=2),c("_1","_2")))

pdf(file="plots/hippocampus/UMAP_cluster_sample_barplot.pdf",
    width = 20, height = 10)
p1=DimPlot(combined.sct, reduction = "umap", group.by = "seurat_clusters", label = TRUE, label.size = 8, repel = TRUE, 
          cols = cluster_cols)
p2=ggplot(combined.sct@meta.data, aes(x=seurat_clusters, fill=sample)) + geom_bar(position = "fill") +
theme(text = element_text(size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20)) & coord_flip()
gridExtra::grid.arrange(
  p1, p2,
  widths = c(2,1.6),
  layout_matrix = rbind(c(1, 2)))

dev.off()

In [8]:
pdf(file="plots/hippocampus/age_sex_barplot.pdf",
    width = 18, height = 19)
p1=DimPlot(combined.sct, reduction = "umap", group.by = "timepoint", label = TRUE, label.size = 5, repel = TRUE)
p2 = ggplot(combined.sct@meta.data, aes(x=seurat_clusters, fill=timepoint)) + geom_bar(position = "fill") & 
theme(text = element_text(size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20)) & coord_flip()

p3=DimPlot(combined.sct, reduction = "umap", group.by = "sex", label = TRUE, label.size = 5, repel = TRUE, shuffle = T)
p4 = ggplot(combined.sct@meta.data, aes(x=seurat_clusters, fill=sex)) + geom_bar(position = "fill") & 
theme(text = element_text(size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20)) & coord_flip()
gridExtra::grid.arrange(
  p1, p2, p3, p4,
  widths = c(2,1),
  layout_matrix = rbind(c(1, 2),
                        c(3, 4)))

dev.off()

In [9]:
# I want Vip+ and Sncg+ clusters to be separate, and Sst+ and Pvalb+.
pdf(file="plots/hippocampus/inhib_neuron_featureplots.pdf",
    width = 35, height = 20)
DefaultAssay(combined.sct) = "SCT" # do NOT use integrated assay to visualize gene expression
FeaturePlot(combined.sct, pt.size = 0.1, order = T,
            features =c("Sst","Pvalb",
                        "Vip","Sncg"), ncol =2)  & scale_colour_gradientn(colours = viridis(11)) & 
                        NoAxes()& 
                        theme(text = element_text(size = 20))

dev.off()

Scale for 'colour' is already present. Adding another scale for 'colour',
which will replace the existing scale.

Scale for 'colour' is already present. Adding another scale for 'colour',
which will replace the existing scale.

Scale for 'colour' is already present. Adding another scale for 'colour',
which will replace the existing scale.

Scale for 'colour' is already present. Adding another scale for 'colour',
which will replace the existing scale.



# Plotting: check predicted celltypes

In [173]:
pdf(file="plots/hippocampus_umaps_predictions.pdf",
    width = 15, height = 12)
nclusters = length(unique(combined.sct$atlas_predictions))
DimPlot(combined.sct, reduction = "umap", group.by = "atlas_predictions",
        label = TRUE, label.size = 6, repel = TRUE,cols = colorRampPalette(brewer.pal(9,"Set1"))(nclusters)) + NoLegend()
dev.off()

In [174]:
pdf(file="plots/hippocampus_qc_featureplots.pdf",
    width = 25, height = 10)
FeaturePlot(combined.sct, pt.size = 0.1, order = T,
            features =c("nFeature_RNA",
                        "percent.mt",
                        "percent.ribo",
                        "doublet_scores",
                        "G2M.Score"), ncol =3)  & scale_colour_gradientn(colours = viridis(11)) & 
                        NoAxes()& 
                        theme(text = element_text(size = 20))

dev.off()


Scale for 'colour' is already present. Adding another scale for 'colour',
which will replace the existing scale.

Scale for 'colour' is already present. Adding another scale for 'colour',
which will replace the existing scale.

Scale for 'colour' is already present. Adding another scale for 'colour',
which will replace the existing scale.

Scale for 'colour' is already present. Adding another scale for 'colour',
which will replace the existing scale.

Scale for 'colour' is already present. Adding another scale for 'colour',
which will replace the existing scale.



# Rename clusters based on maximum predicted celltype

In [175]:
Idents(combined.sct) = "seurat_clusters"
mat = as.matrix(table(Idents(combined.sct), combined.sct$atlas_predictions))
ct = colnames(mat)[max.col(mat)]
names(ct) = 0:(length(ct)-1)

# basically add the cluster info to the maximum predicted celltype
for (i in 1:length(unique(Idents(combined.sct))))
{
    search = paste0("\\<",names(ct)[i],"\\>")
    replace = paste0(ct[i],".",names(ct)[i])
    Idents(combined.sct) = gsub(search,replace,Idents(combined.sct))
}

combined.sct[["atlas_celltypes"]] <- Idents(combined.sct)

In [176]:
pdf(file="plots/hippocampus_umaps_maximum_predictions.pdf",
    width = 15, height = 12)
nclusters = length(unique(combined.sct$atlas_celltypes))
DimPlot(combined.sct, reduction = "umap", group.by = "atlas_celltypes",
        label = TRUE, label.size = 6, repel = TRUE,cols = colorRampPalette(brewer.pal(9,"Set1"))(nclusters))  & NoLegend()

dev.off()

# Manual celltype annotation

In [178]:
combined.sct$subtypes = combined.sct$atlas_celltypes

In [179]:
# dot plot of some marker genes
pdf(file="plots/hippocampus_dotplot.pdf",
    width = 12, height = 12)
Idents(combined.sct) = "subtypes"
DotPlot(combined.sct, features = c("Dnah6","Dnah12", # ependymal
                                   "Prox1", # early DG
                                   "Pcdh15","Sox6", # OPC
                                   "Plp1","Mbp", # MFOL
                                  "Mag","Mog",# MOL
                                  "Tmem119","Csf1r","Cx3cr1")) # microglia
dev.off()

## Fix oligodendrocyte clusters

In [180]:
combined.sct$subtypes = gsub("\\<Oligo.20\\>","OPC.20",combined.sct$subtypes) 
combined.sct$subtypes = gsub("\\<Oligo.11\\>","OPC.11",combined.sct$subtypes) 
combined.sct$subtypes = gsub("\\<Oligo.23\\>","MFOL.23",combined.sct$subtypes) 
combined.sct$subtypes = gsub("\\<Oligo.9\\>","MOL.9",combined.sct$subtypes) 
combined.sct$subtypes = gsub("\\<Oligo.44\\>","MOL.44",combined.sct$subtypes) 


## Add early DG and ependymal clusters

In [181]:
combined.sct$subtypes = gsub("\\DG.8\\>","DG_early.8",combined.sct$subtypes)
combined.sct$subtypes = gsub("\\<Vip.7\\>","DG_early.7",combined.sct$subtypes)
combined.sct$subtypes = gsub("\\<Micro-PVM.41\\>","DG_early.41",combined.sct$subtypes)

combined.sct$subtypes = gsub("\\<CA3.39\\>","Ependymal.39",combined.sct$subtypes) 
combined.sct$subtypes = gsub("\\<Sncg.45\\>","Ependymal.45",combined.sct$subtypes) 


In [182]:
# get rid of cluster #
combined.sct$subtypes = do.call("rbind", strsplit(as.character(combined.sct$subtypes), "[.]"))[,1]

In [183]:
combined.sct$subtypes = gsub("\\<Astro\\>","Astrocyte",combined.sct$subtypes) 
combined.sct$subtypes = gsub("\\<Endo\\>","Endothelial",combined.sct$subtypes) 
combined.sct$subtypes = gsub("\\<Sst Chodl\\>","Sst",combined.sct$subtypes) 


In [184]:
pdf(file="plots/hippocampus_prelim_subtypes.pdf",
    width = 15, height = 10)
# clusters and celltypes
nclusters = length(unique(combined.sct$subtypes))
DimPlot(combined.sct, reduction = "umap", group.by = "subtypes", label = TRUE, label.size = 8, repel = TRUE, 
          cols = colorRampPalette(brewer.pal(9,"Set1"))(nclusters))
dev.off()

# Add celltypes and gen_celltypes metadata
Based on the subtypes annotation, we can group the cells into broader categories.

In [185]:
combined.sct$celltypes = combined.sct$subtypes

combined.sct$celltypes = gsub("\\<CA1-ProS\\>","Excitatory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<CA3\\>","Excitatory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<CR\\>","Excitatory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<CT SUB\\>","Excitatory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<Car3\\>","Excitatory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<DG\\>","Excitatory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<DG_early\\>","Excitatory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<L2 IT ENTm\\>","Excitatory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<L2/3 IT PPP\\>","Excitatory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<L3 IT ENT\\>","Excitatory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<L6 CT CTX\\>","Excitatory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<NP SUB\\>","Excitatory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<SUB-ProS\\>","Excitatory",combined.sct$celltypes)

combined.sct$celltypes = gsub("\\<Lamp5\\>","Inhibitory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<Pvalb\\>","Inhibitory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<Sncg\\>","Inhibitory",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<Sst\\>","Inhibitory",combined.sct$celltypes)

combined.sct$celltypes = gsub("\\<MFOL\\>","Oligodendrocyte",combined.sct$celltypes)
combined.sct$celltypes = gsub("\\<MOL\\>","Oligodendrocyte",combined.sct$celltypes)

combined.sct$celltypes = gsub("\\<SMC-Peri\\>","Smooth_muscle",combined.sct$celltypes)

combined.sct$celltypes = gsub("\\<Micro-PVM\\>","Microglia",combined.sct$celltypes)


In [186]:
table(combined.sct$celltypes)


      Astrocyte     Endothelial       Ependymal      Excitatory      Inhibitory 
           4561             521             244           29383            3157 
      Microglia             OPC Oligodendrocyte   Smooth_muscle            VLMC 
            575            1849            1935             118             270 

In [187]:
combined.sct$gen_celltype = combined.sct$celltypes

combined.sct$gen_celltype = gsub("\\<Excitatory\\>","Neuron",combined.sct$gen_celltype)
combined.sct$gen_celltype = gsub("\\<Inhibitory\\>","Neuron",combined.sct$gen_celltype)
combined.sct$gen_celltype = gsub("\\<Astrocyte\\>","Glial",combined.sct$gen_celltype)
combined.sct$gen_celltype = gsub("\\<OPC\\>","Glial",combined.sct$gen_celltype)
combined.sct$gen_celltype = gsub("\\<Oligodendrocyte\\>","Glial",combined.sct$gen_celltype)
combined.sct$gen_celltype = gsub("\\<Microglia\\>","Myeloid",combined.sct$gen_celltype)
combined.sct$gen_celltype = gsub("\\<Ependymal\\>","Stromal",combined.sct$gen_celltype)



In [188]:
table(combined.sct$gen_celltype)


  Endothelial         Glial       Myeloid        Neuron Smooth_muscle 
          521          8345           575         32540           118 
      Stromal          VLMC 
          244           270 

# Plotting the 3 levels of annotations

In [189]:
color_ref = read.csv("ref/enc4_mouse_snrna_celltypes_c2c12.csv")
gen_celltype_colors = unique(color_ref[color_ref$X...tissue == "Hippocampus",c("gen_celltype","gen_celltype_color")])
rownames(gen_celltype_colors) = gen_celltype_colors$gen_celltype
gen_celltype_colors = gen_celltype_colors[sort(unique(combined.sct$gen_celltype)),]

pdf(file="plots/hippocampus_gen_celltype.pdf",
    width = 15, height = 10)

DimPlot(combined.sct, reduction = "umap", 
        group.by = "gen_celltype", 
        label = TRUE, label.size = 8, repel = TRUE,
       cols = gen_celltype_colors$gen_celltype_color)

dev.off()

In [190]:
celltype_colors = unique(color_ref[color_ref$X...tissue == "Hippocampus",c("celltypes","celltype_color")])
rownames(celltype_colors) = celltype_colors$celltypes
celltype_colors = celltype_colors[sort(unique(combined.sct$celltypes)),]

pdf(file="plots/hippocampus_celltypes.pdf",
    width = 15, height = 10)

DimPlot(combined.sct, reduction = "umap", 
        group.by = "celltypes", 
        label = TRUE, label.size = 8, repel = TRUE,
       cols = celltype_colors$celltype_color)

dev.off()

In [191]:
subtype_colors = unique(color_ref[color_ref$X...tissue == "Hippocampus",c("subtypes","subtype_color")])
rownames(subtype_colors) = subtype_colors$subtypes
subtype_colors = subtype_colors[sort(unique(combined.sct$subtypes)),]

pdf(file="plots/hippocampus_subtypes.pdf",
    width = 15, height = 10)

DimPlot(combined.sct, reduction = "umap", 
        group.by = "subtypes", 
        label = TRUE, label.size = 8, repel = TRUE,
       cols = celltype_colors$subtype_color)

dev.off()

## Proportion plot of celltypes over timepoint

In [192]:
combined.sct_parse = subset(combined.sct,subset= technology == "Parse")

samples = sort(unique(combined.sct_parse$timepoint))
dflist = list()
for (i in 1:length(unique(combined.sct_parse$timepoint))){
  tp=combined.sct_parse@meta.data[combined.sct_parse@meta.data$timepoint == samples[i],]
  #tp=tp[complete.cases(tp),]
  tp_df=as.data.frame(table(tp$celltypes))
  tp_df$percentage=tp_df$Freq/nrow(tp)
  tp_df$timepoint=rep(i,nrow(tp_df))
  dflist[[i]]=tp_df
}
df = do.call(rbind, dflist)
df <- df[order(df$timepoint),]
colnames(df)= c("celltypes","Freq","percentage","timepoint")



In [193]:
pdf(file="plots/hippocampus_timepoint_celltypes_proportions.pdf",
    width = 15, height = 10)

ggplot(df, aes(x=timepoint, y=percentage, fill=celltypes)) + 
  geom_area()  +
  scale_fill_manual(values= celltype_colors$celltype_color) + 
  scale_x_continuous(breaks = c(1,2,3,4,5,6),labels= c("PND_10","PND_14",
                                                         "PND_25","PND_36","PNM_02","PNM_18-20"))+
  scale_y_continuous(breaks = c(0,0.1,0.2,0.3,0.4,0.5,
                                0.6,0.7,0.8,0.9,1.0),labels= c("0%","10%","20%","30%","40%","50%","60%","70%","80%","90%","100%")) + 
theme_minimal()+theme(text = element_text(size = 30)) + 
theme(axis.text.x = element_text(size = 30))  + 
theme(axis.text.y = element_text(size = 30))   + 
theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
dev.off()

In [167]:
saveRDS(combined.sct,file="seurat/hippocampus_Parse_10x_integrated.rds")
