In [None]:
# Load required libraries
library(dplyr)
library(Seurat)
library(patchwork)
library(DoubletFinder)
library(DropletUtils)
library(ggplot2)
library(RColorBrewer)

In [None]:
object <- readRDS("/path/to/postLabel/object.rds")

In [None]:
# Look at common statistics of the major data object
head(object@meta.data)
as.data.frame(table(object@meta.data$orig.ident))
DimPlot(object, reduction="umap")

In [None]:
# Subset to isolate the major cell type you are wishing to subcluster
    # Replace "CELLTYPE" (globally) with the full name of the cell type you would like to isolate
    # Replace "ABR" with the abbreviation of this cell type
ABR <- subset(object, subset = cellType == "CELLTYPE")
data.frame(table(ABR@meta.data$model))
head(ABR@meta.data)

In [None]:
# Look at current clustering of the cell type
DimPlot(ABR, reduction = "umap", label = TRUE)
DimPlot(ABR, reduction = "umap", group.by="orig.ident")
DimPlot(ABR, reduction = "umap", group.by="model")

In [None]:
# Recluster the data to see if smaller subpopulations (likely subtypes) cluster
ABR <- RunPCA(object = ABR, features=VariableFeatures(object=ABR))
ElbowPlot(ABR)

In [None]:
# Continue clustering replace the variables below:
    # X: replace with the max number of principal components to use based on the elbow plot above
    # Y: replace with your desired resolution
ABR <- FindNeighbors(ABR, dims=1:X)
ABR <- FindClusters(ABR, resolution = Y)
ABR <- RunUMAP(object = ABR, reduction = "pca", dims = 1:X)

In [None]:
# Look at clustering before evaluating subtype markers
    # check to see if any replicates/genotypes are clustering separately. This can
    # point to contamination
DimPlot(ABR, reduction = "umap", label = TRUE)
DimPlot(ABR, reduction = "umap", group.by="orig.ident")
DimPlot(ABR, reduction = "umap", group.by="model")

In [None]:
DimPlot(ABR, reduction = "umap", label = TRUE)
#pdf("plots/CELLTYPESubclusters_bySample_DATE.pdf")
DimPlot(ABR, reduction = "umap", group.by="orig.ident")
#dev.off()
#pdf("plots/CELLTYPESubclusters_byGenotype_DATE.pdf")
DimPlot(ABR, reduction = "umap", group.by="model")
#dev.off()
#pdf("plots/CELLTYPESubclusters_byGenotype_DATE.pdf")
DimPlot(ABR, group.by="predicted.subclass_DropViz")
#pdf("plots/CELLTYPESubclusters_bySubtype_DATE.pdf")
DimPlot(ABR, group.by="predicted.subclass_1M")
#dev.off()

In [None]:
# Evaluate subtype marker genes to label clusters by their subtype
    # The below example shows the subtype marker genes for Hippocampal Excitatory Neuron subtypes
    # Please email me (anicolel@broadinstitute.org) for a spreadsheet of marker genes for 
    # other cell types and brain regions

# CA1:
FeaturePlot(ABR, features = c("Mpped1"), label=TRUE) 
FeaturePlot(ABR, features = c("Fibcd1"), label=TRUE) 
FeaturePlot(ABR, features = c("Ndst3"), label=TRUE)

# CA2:
FeaturePlot(ABR, features = c("Map3k15"), label=TRUE) 
FeaturePlot(ABR, features = c("Lsm11"), label=TRUE) 
FeaturePlot(ABR, features = c("Homer3"), label=TRUE) 

# CA3: 
FeaturePlot(ABR, features = c("Cdh24"), label=TRUE) 

# DG: 
FeaturePlot(ABR, features = c("Prox1"), label=TRUE) 
FeaturePlot(ABR, features = c("C1ql2"), label=TRUE) 
FeaturePlot(ABR, features = c("Dsp"), label=TRUE) 
FeaturePlot(ABR, features = c("Npnt"), label=TRUE) 
FeaturePlot(ABR, features = c("Dgkh"), label=TRUE) 

In [None]:
# Assign the subtype labels to each cluster 
    # new.cluster.ids2: a list of the cluster subtypes in order
    # i.e. if cluster 0 is DG, cluster 1 is CA1, and cluster 2 is CA2, new.cluster.ids2 would
    # be: c("DG","CA1","CA2")
new.cluster.ids2 <- c()
names(new.cluster.ids2) <- levels(ABR)
ABR <- RenameIdents(ABR, new.cluster.ids2)
DimPlot(ABR, reduction = "umap", label = TRUE)

In [None]:
# Save plots of the labeled subtypes
    # make sure you have created a "plots" subfolder before running this
pdf("plots/CELLTYPESubclusters_bySample_DATE.pdf")
DimPlot(ABR, reduction = "umap", group.by="orig.ident")
dev.off()
pdf("plots/CELLTYPESubclusters_byGenotype_DATE.pdf")
DimPlot(ABR, reduction = "umap", group.by="model")
dev.off()
pdf("plots/CELLTYPESubclusters_bySubtype_DATE.pdf")
DimPlot(ABR, reduction = "umap", label = TRUE)
dev.off()

In [None]:
ABR@meta.data$cellType = Idents(ABR)
pt <- as.data.frame(table(ABR$cellType, ABR$model))
pt$cellType <- as.character(pt$Var1)
pt$Model <- pt$Var2
dim(pt)
pt

# Calculate percentage of each genotype made up by each cell type
    # Choose the below percentage assingment based on how many genotypes you have
    # You will need to adjust the row numbers in the calculation if you do not have the same 
    # number of subtypes (four in this example)
pt$Percentage <- c(pt$Freq[1:4]/sum(pt$Freq[1:4]), pt$Freq[5:8]/sum(pt$Freq[5:8]), pt$Freq[9:12]/sum(pt$Freq[9:12]))
pt

# Reorder data to be in ascending order by subtype frequency.
    # You will need to add your subtype names in order of decending frequency to the 
    # "my_levels2" list below
my_levels2 <- c()
factor(pt$cellType, levels= my_levels2)
pt$cellType <- factor(pt$cellType, levels= my_levels2)
#pdf("plots/project_ABRPercentSubcluster_byGenotype_DATE.pdf")
ggplot(pt, aes(x = Model, y = Percentage, fill = cellType, label=round(Percentage,2))) +
    geom_bar(stat="identity") +
    geom_text(size=3, position=position_stack(vjust=0.5))
#dev.off()

In [None]:
# Create subtype distribution plot by replicate
ABR@meta.data$cellType = Idents(ABR)
pt <- as.data.frame(table(ABR$cellType, ABR$orig.ident))
pt$cellType <- as.character(pt$Var1)

# Map the "orig.ident" to genotype
    # "from": list of the replicate names stored in "object@meta.data$orig.ident"
    # "to": list of the corresponding shortened names of each replicate
    # i.e.
        # replicate <- plyr::mapvalues(
        #    x = pt$Var2, 
        #    from = c("Exp_HT1", "Exp_KO1", "Exp_WT1"), 
        #    to = c("HT1", "KO1", "WT1")
        # )  
replicate <- plyr::mapvalues(
    x = pt$Var2, 
    from = c(), 
    to = c()
)
replicate
pt$Sample <- replicate
pt
# Adjust to align with number of samples/subtypes
pt$Percentage <- c(pt$Freq[1:4]/sum(pt$Freq[1:4]), 
                   pt$Freq[5:8]/sum(pt$Freq[5:8]), 
                   pt$Freq[9:12]/sum(pt$Freq[9:12]),
                   pt$Freq[13:16]/sum(pt$Freq[13:16]),
                   pt$Freq[17:20]/sum(pt$Freq[17:20]),
                   pt$Freq[21:24]/sum(pt$Freq[21:24]),
                   pt$Freq[25:28]/sum(pt$Freq[25:28]),
                   pt$Freq[29:32]/sum(pt$Freq[29:32]),
                   pt$Freq[33:36]/sum(pt$Freq[33:36]),
                   pt$Freq[37:40]/sum(pt$Freq[37:40]),
                   pt$Freq[41:44]/sum(pt$Freq[41:44]),
                   pt$Freq[45:48]/sum(pt$Freq[45:48]),
                   pt$Freq[49:52]/sum(pt$Freq[49:52]),
                   pt$Freq[53:56]/sum(pt$Freq[53:56])
                  )
pt
pt

my_levels2 <- c()
factor(pt$cellType, levels= my_levels2)
pt$cellType <- factor(pt$cellType, levels= my_levels2)
#pdf("plots/project_ABRPercentSubcluster_byReplicate_DATE.pdf")
ggplot(pt, aes(x = Sample, y = Percentage, fill = cellType, label=round(Percentage,2))) +
    geom_bar(stat="identity") +
    geom_text(size=3, position=position_stack(vjust=0.5))
#dev.off()

In [None]:
# Save cell type object with subtype labels
saveRDS(ABR, "project_CELLTYPESubclusters_DATE.rds")
#ABR <- readRDS("project_CELLTYPESubclusters_DATE.rds")