# Standard pre-processing

The present study is based on the 10X scRNA-seq dataset published by the Allen Institute for Brain Science and publicly available at: https://portal.brain-map.org/atlases-and-data/RNA-seq/mouse-whole-cortex-and-hippocampus-10x. The data was then clustered, and cluster names were assigned based on the Allen Institute proposal for cell type nomenclature (https://portal.brain-map.org/explore/classes/nomenclature). The topology of the taxonomy allowed to define the sex of the mouse from which the cells were isolated, the regions of interest, cell classes (glutamatergic, GABAergic or Non-Neuronal) and subclasses. This information was stored in the metadata table. 

# Description

Here we describe how we used the metadata to subset cells of the hippocampus region from the gene expression matrix. We selected for 13 subclasses of hippocampal cells. The hippocampus gene count matrix consisted of 77001 cells for 26139 genes, and was pre-processed in R v3.6.1 according to the Seurat v3.1.5 standard pre-processing workflow for quality control, normalization, and analysis of scRNA-seq data, as also described here below. We performed principal component analysis (PCA), and we selected the top 50 PCs as input for the t-distributed stochastic neighbor embedding (t-SNE) dimensional reduction.

# Data availability

The matrix and the metadata are available for download at: https://portal.brain-map.org/atlases-and-data/RNA-seq/mouse-whole-cortex-and-hippocampus-10x

# Required packages

In [None]:
# Required libraries
library(vroom)
library(dplyr)
library(Seurat)
library(tidyverse)
library(ggplot2)

# 1. Create a matrix with only cells from the hippocampus

In [None]:
# Load matrix and metadata (csv. files) dowloaded from the portal brain map
metadata <- vroom("metadata_10X.csv")
matrix <- vroom("matrix_10X.csv")

### 1.1 Prepare matrix

In [None]:
# Look at the design of the matrix and the metadata
head(matrix)[1:4]
head(metadata)[1:4]

# Change rownames with "sample_name"(= cell id)
matrix <- matrix %>% remove_rownames %>% column_to_rownames(var="sample_name")

### 1.2 Subset the hippocampal cells from the matrix using metadata

In [None]:
# Create a list with cell names that belong to the hippocampus
HIP <- metadata$sample_name[metadata$region_label == "HIP"]

# Use the list HIP to subset the hippocampal cells from the all matrix 
matrix_hip <- matrix[HIP,]
dim(matrix_hip)  # Dimensions should be 77434 31053

# Change underscores to dashes for matrix rownames
rownames(matrix_hip) <- gsub("_","-", rownames(matrix_hip))

# Create seurat object of the matrix_hip
hip.seurat <- CreateSeuratObject(counts = t(matrix_hip), min.cells = 1, min.features = 100)

### 1.3 Prepare and subset the metadata before proceeding to AddMetaData

In [None]:
# Change rownames with "sample_name"(= cell id)
metadata <- metadata %>% column_to_rownames(var="sample_name")

# Use the list HIP to subset the hippocampal cells from the all matrix 
metadata_hip <- metadata[HIP,]

# Change underscores to dashes for metadata rownames
rownames(metadata_hip) <- gsub("_","-", rownames(metadata_hip))

### 1.4 AddMetaData

In [None]:
# Class
classLabels <- metadata_hip$class_label
names(classLabels) <- row.names(metadata_hip)
hip.seurat <- AddMetaData(hip.seurat, classLabels, col.name='class')
# Subclass
subclassLabels <- metadata_hip$subclass_label
names(subclassLabels) <- row.names(metadata_hip)
hip.seurat <- AddMetaData(hip.seurat, subclassLabels, col.name='subclass')
# Cluster
clusterLabels <- metadata_hip$cluster_label
names(clusterLabels) <- row.names(metadata_hip)
hip.seurat <- AddMetaData(hip.seurat, clusterLabels, col.name='cluster')
# Sex donor
sexLabels <- metadata_hip$donor_sex_label
names(sexLabels) <- row.names(metadata_hip)
hip.seurat <- AddMetaData(hip.seurat, sexLabels, col.name='sex')

### 1.5 Save files

In [None]:
# Save Seurat object as RDS file
saveRDS(hip.seurat, file = "/path/file_name.rds")
# Save subset of metadata as tsv file
write.table(metadata_hip, "/path/file_name.tsv", sep='\t', quote=FALSE, col.names=TRUE, row.names=TRUE)

# 2. Quality control, normalization, and dimensional reduction of the hippocampus dataset

### 2.1 Load previously obtained unprocessed seurat object and metadata (see above)

In [None]:
# Load object
hip.seurat <- readRDS("/path/file_name.rds")
md <- read.delim2("path/file_name.tsv", sep ='\t')

# Check dimensions, rownames (genes) and colnames (cells) of the matrix
dim(hip.seurat)
rownames(hip.seurat)[1:3]
colnames(hip.seurat)[1:3]

### 2.2 Clean hippocampus subset

In [None]:
# Add a column sample_name to use metadata to subset cells
md$sample_name <- rownames(md)
head(md)

# What are the different levels of 'subclass':
table(md$subclass_label)

# Unselect the cells we are NOT interested in based on subclasses:
selected_cells <- md$sample_name[md$subclass_label !='' 
                                       & md$subclass_label !='CR' 
                                       & md$subclass_label !='CT SUB' 
                                       & md$subclass_label !='L2 IT RHP' 
                                       & md$subclass_label !='L2/3 IT CTX-1'
                                       & md$subclass_label !='L2/3 IT ENTl' 
                                       & md$subclass_label !='L2/3 IT PPP' 
                                       & md$subclass_label !='L4/5 IT CTX' 
                                       & md$subclass_label !='L5 IT TPE-ENT' 
                                       & md$subclass_label !='L5 PT CTX' 
                                       & md$subclass_label !='L6 CT CTX' 
                                       & md$subclass_label !='L6 IT CTX' 
                                       & md$subclass_label !='L6b CTX' 
                                       & md$subclass_label !='NP SUB'
                                       & md$subclass_label !='SMC-Peri'
                                       & md$subclass_label !='Sst Chodl'
                                       & md$subclass_label !='SUB-ProS' 
                                       & md$subclass_label !='V3d'
                                       & md$subclass_label !='VLMC'] 
# Subset the seurat object:
hip.seurat <- subset(hip.seurat, cells = selected_cells)
dim(hip.seurat) # 433 cells were excluded the final dimensions should be 26139/77001

# What are the new levels of 'subclass':
table(hip.seurat@meta.data$subclass) 
# Astro, Oligo, Endo, Micro-PVM, Lamp5, Pvalb, Sncg, Sst, Vip, CA1-ProS, CA2, CA3, DG

### 2.3 QC metrics

In [None]:
# Visualization of QC metrics
VlnPlot(object = hip.seurat, features = c("nFeature_RNA", "nCount_RNA"),ncol = 2,cols = 'white')

# FeatureScatter to visualize feature-feature relationships.
FeatureScatter(object = hip.seurat, feature1 = "nCount_RNA", 
                                     feature2 = "nFeature_RNA", cols = 'black')

### 2.4 Normalisation

In [None]:
# Normalization of the data
hip.seurat <- NormalizeData(hip.seurat, normalization.method = "LogNormalize", scale.factor = 10000)

In [None]:
# Identification of the high variable features (genes)
hip.seurat <- FindVariableFeatures(hip.seurat, selection.method = "vst", nfeatures = 2000)

In [None]:
# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(hip.seurat), 10)

# Plot variable features with and without labels
plot1 <- VariableFeaturePlot(hip.seurat)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge = 0, ynudge = 0)

### 2.5 Scaling and linear dimensional reduction

In [None]:
# Scaling the data
all.genes <- rownames(hip.seurat)
hip.seurat <- ScaleData(hip.seurat, features = all.genes)

In [None]:
# Perform linear dimensional reduction
hip.seurat <- RunPCA(hip.seurat, features = VariableFeatures(object = hip.seurat))

In [None]:
# PCA
PCA <- DimPlot(hip.seurat, reduction = "pca", cols = 'black')

### 2.6 Clustering

In [None]:
# Cluster the cells
hip.seurat <- FindNeighbors(hip.seurat, dims = 1:50)
hip.seurat <- FindClusters(hip.seurat, resolution = 0.5)

# Annotate the cell populations with subclasses
Idents(object = hip.seurat) <- "subclass"

### 2.7 t-SNE and final object

In [None]:
# t-SNE reduction
hip.seurat <- RunTSNE(hip.seurat, reduction = "pca", dims = 1:30, perplexity = 30, max_iter = 1000)

In [None]:
# Have a look at the t-SNE reduction
DimPlot(hip.seurat, reduction = "tsne", label = FALSE, pt.size=0.5, 
                          cols=c('CA1-ProS'='skyblue', 'CA2'='lightseagreen', 'CA3'='steelblue', 
                                 'DG'='slategray2','Lamp5'='violetred4','Pvalb'='mediumvioletred',
                                 'Sncg'='palevioletred1','Sst'='pink1','Vip'='palevioletred3', 
                                 'Endo'='forestgreen', 'Micro-PVM'='yellowgreen', 'Oligo'='orange2', 
                                 'Astro'='sienna3')) + xlim(-40,40) + ylim(-40,40)

In [None]:
# Save new Seurat object as RDS file
saveRDS(hip.seurat, file = "/path/file_name.rds")

The final processed and clustered Seurat object is available for download at: