# Gene co-expression

The present study is based on the 10X scRNA-seq dataset published by the Allen Institute for Brain Science and publicly available at: https://portal.brain-map.org/atlases-and-data/RNA-seq/mouse-whole-cortex-and-hippocampus-10x. The data was then clustered, and cluster names were assigned based on the Allen Institute proposal for cell type nomenclature (https://portal.brain-map.org/explore/classes/nomenclature). The topology of the taxonomy allowed to define the sex of the mouse from which the cells were isolated, the regions of interest, cell classes (glutamatergic, GABAergic or Non-Neuronal) and subclasses. This information was stored in the metadata table. The metadata was used to subset cells of the hippocampus region from the gene expression matrix. We selected for 13 subclasses of hippocampal cells. The hippocampus gene count matrix was pre-processed in R v3.6.1 according to the Seurat v3.1.5 standard pre-processing workflow for quality control, normalization, and analysis of scRNA-seq data (cf. 10XHip2021_Pre.Processing). 

# Description
Here we describe how we obtained the matrices and figures for gene co-expression.

# Data availability

cf. README to download the processed and clustered Seurat object '10XHip2021_seurat.object.rds'

### Load data and required packages

In [None]:
# Required libraries
library(dplyr)
library(Seurat)
library(tidyverse)
library(ggplot2)
library(ComplexHeatmap)
library(circlize)

In [None]:
# Seurat object
hip <- readRDS("10XHip2021_seurat.object.rds")

### Description of the protocol

1. Based of gene average expression in one cell population
x = value of the average expression for gene 1 (Gr or Mr)
y = value of the average expression for gene 2 (from genes)

2. Add-One smoothing (additive smoothing or pseudo-count)
X = x + 1
Y = y + 1 
coupling.score = X*Y

3. Log scaling
coupling.score = log10(X*Y)
mat.gr = matrix of all coupling scores for Gr vs. genes
mat.mr = matrix of all coupling scores for Mr vs. genes

4. Normalisation/Scaling -> data scaled from 0 to 1
max.value = max(mat.gr,mat.mr)
min.value = min(mat.gr,mat.mr)
mat.gr <- (mat.gr-min.value)/(max.value-min.value)
mat.mr <- (mat.mr-min.value)/(max.value-min.value)

### Preparation data for co-expression matrices 

In [None]:
# Data for Nr3c1 (Gr) and Nr3c2 (Mr) and cell population order
gr <- DotPlot(hip, features = "Nr3c1")$data
mr <- DotPlot(hip, features = "Nr3c2")$data
cells <- c("DG","CA3","CA2","CA1-ProS","Lamp5","Vip","Pvalb","Sncg","Sst","Endo","Micro-PVM","Astro","Oligo")

In [None]:
# Colors heatmaps - Coupling score legend
cols = colorRamp2(c(0,0.2,0.4,0.6,0.8,1), c('#edf8fb','#bfd3e6','#9ebcda','#8c96c6','#8856a7','#810f7c'))

### 1. Co-expression of Nr3c1 (Gr) and Nr3c2 (Mr)

In [None]:
# Average expression of Nr3c1 (Gr) and Nr3c2 (Mr) for each cell type 
exp.gr <- c(0.8301064,0.4413372,0.2891454,1.7785128,
            0.6756368714889,0.8505117,0.5964793,0.2967290,0.3134967,
            2.6091935,2.0191561,1.8355961,1.5751184)
exp.mr <- c(7.5588002,4.8497023,12.6156684,5.9415883,
            1.7342106,3.0058529,1.2976207,3.6555187,0.6621729,
            2.1395174,1.0176402,2.2710803,1.0245950)

# Annotations average expression Nr3c1 (Gr) and Nr3c2 (Mr)
col_ha_gr = colorRamp2(c(0,15), c("white", "#08519c"))
col_ha_mr = colorRamp2(c(0,15), c("white", "#08519c"))
expgr <- rowAnnotation(Nr3c1 = exp.gr, col = list(Nr3c1 = col_ha_gr))
expmr <- rowAnnotation(Nr3c2 = exp.mr, col = list(Nr3c2 = col_ha_mr))
ha_exp = rowAnnotation(Nr3c1 = exp.gr, Nr3c2 = exp.mr, col = list(Nr3c1 = col_ha_gr, Nr3c2 = col_ha_mr))

# Matrix
genes <- c("Nr3c1 x Nr3c2")
mat <- matrix(nrow = length(cells), ncol = length(genes))
for(g in 1:length(genes)){
    g   
    for (r in 1:length(cells)){
        r   
        mat[r,g] <- log10((gr[gr$id==cells[r],1])*(mr[mr$id==cells[r],1]))
           
    }
}

rownames(mat) = cells
colnames(mat) = genes


# Normalisation
max.value <- max(mat, na.rm = TRUE)
min.value <- min(mat, na.rm = TRUE)
mat <- (mat-min.value)/(max.value-min.value)

# Heatmaps
heatmap <- Heatmap(mat, cluster_rows = FALSE, row_names_side = "left", rect_gp = gpar(col = "white", lwd = 1),
                        cluster_columns = FALSE, name = "Coupling score", 
                        row_names_gp = gpar(fontsize = 20), column_names_gp = gpar(fontsize = 40), 
                        column_title_gp = gpar(fill="white", col="black", border="white"), 
                        col = cols,
                        right_annotation = ha_exp, na_col="white")

### 2. Co-expression of Nr3c1 (Gr) and Nr3c2 (Mr) with other steroid receptors

In [None]:
# Genes of interest 
genes <- c("Ar", "Pgr","Esr1", "Esr2")

# Subset data from dotplot and replace 0 by NA
data <- DotPlot(hip, features = genes)$data
data$avg.exp[data$avg.exp == 0] <- NA

In [None]:
# Matrix for GR
mat.gr <- matrix(nrow = length(cells), ncol = length(genes))
for(g in 1:length(genes)){
    g   
    for (r in 1:length(cells)){
        r   
        mat.gr[r,g] <- log10((gr[gr$id==cells[r],1])*(data[data$features.plot == genes[g] & data$id==cells[r],1]))
           
    }
}

rownames(mat.gr) = cells
colnames(mat.gr) = genes

# Matrix for MR
mat.mr <- matrix(nrow = length(cells), ncol = length(genes))
for(g in 1:length(genes)){
    g   
    for (r in 1:length(cells)){
        r   
        mat.mr[r,g] <- log10((mr[mr$id==cells[r],1])*(data[data$features.plot == genes[g] & data$id==cells[r],1]))
           
    }
}

rownames(mat.mr) = cells
colnames(mat.mr) = genes

# Normalisation
max.value <- max(mat.gr,mat.mr, na.rm = TRUE)
min.value <- min(mat.gr,mat.mr, na.rm = TRUE)
mat.gr <- (mat.gr-min.value)/(max.value-min.value)
mat.mr <- (mat.mr-min.value)/(max.value-min.value)


# Heatmaps
heatmap.gr <- Heatmap(mat.gr, cluster_rows = FALSE, row_names_side = "left", rect_gp = gpar(col = "white", lwd = 1),
                      cluster_columns = FALSE, name = "Coupling score", column_title = "Nr3c1", 
                      column_title_gp = gpar(fill="white", col="black", border="white", fontsize = 40), 
                      row_names_gp = gpar(fontsize = 40), column_names_gp = gpar(fontsize = 40), 
                      col = cols, na_col="white")

heatmap.mr <- Heatmap(mat.mr, cluster_rows = FALSE, row_names_side = "left", rect_gp = gpar(col = "white", lwd = 1),
                      cluster_columns = FALSE, name = "Coupling score", column_title = "Nr3c2", 
                      column_title_gp = gpar(fill="white", col="black", border="white", fontsize = 40), 
                      row_names_gp = gpar(fontsize = 40), column_names_gp = gpar(fontsize = 40), 
                      col = cols, na_col="white")

### 3. Co-expression of Nr3c1 (Gr) and Nr3c2 (Mr) with coregulators

#### 3.1 Co-expression of Nr3c1 (Gr) and Nr3c2 (Mr) with all coregulators

In [None]:
# Genes of interest
genes <- c('Brd8','Chd9','Ep300','Jmjd1c','Kat2b','Kat5','Kmt2d','Med1','Ncoa1','Ncoa2','Ncoa3',
           'Ncoa4','Ncoa6','Ncor1','Ncor2','Nr0b1','Nrip1','Pias2','Ppargc1a','Prox1','Rad9a',
           'Rela','Tgfb1i1','Trim24') 

# Subset data from dotplot and replace 0 by NA
data <- DotPlot(hip, features = genes)$data
data$avg.exp[data$avg.exp == 0] <- NA

In [None]:
# Matrix for GR
mat.gr <- matrix(nrow = length(cells), ncol = length(genes))
for(g in 1:length(genes)){
    g   
    for (r in 1:length(cells)){
        r   
        mat.gr[r,g] <- log10((gr[gr$id==cells[r],1])*(data[data$features.plot == genes[g] & data$id==cells[r],1]))
           
    }
}

rownames(mat.gr) = cells
colnames(mat.gr) = genes

# Matrix for MR
mat.mr <- matrix(nrow = length(cells), ncol = length(genes))
for(g in 1:length(genes)){
    g   
    for (r in 1:length(cells)){
        r   
        mat.mr[r,g] <- log10((mr[mr$id==cells[r],1])*(data[data$features.plot == genes[g] & data$id==cells[r],1]))
           
    }
}

rownames(mat.mr) = cells
colnames(mat.mr) = genes

# Normalisation
max.value <- max(mat.gr,mat.mr, na.rm = TRUE)
min.value <- min(mat.gr,mat.mr, na.rm = TRUE)
mat.gr <- (mat.gr-min.value)/(max.value-min.value)
mat.mr <- (mat.mr-min.value)/(max.value-min.value)

# Heatmaps
heatmap.gr <- Heatmap(mat.gr, cluster_rows = FALSE, row_names_side = "left", rect_gp = gpar(col = "white", lwd = 1),
                      cluster_columns = FALSE, name = "Coupling score", column_title = "Nr3c1", 
                      column_title_gp = gpar(fill="white", col="black", border="white", fontsize = 40), 
                      row_names_gp = gpar(fontsize = 30), column_names_gp = gpar(fontsize = 30), 
                      col = cols, na_col="white")

heatmap.mr <- Heatmap(mat.mr, cluster_rows = FALSE, row_names_side = "left", rect_gp = gpar(col = "white", lwd = 1),
                      cluster_columns = FALSE, name = "Coupling score", column_title = "Nr3c2", 
                      column_title_gp = gpar(fill="white", col="black", border="white", fontsize = 40), 
                      row_names_gp = gpar(fontsize = 30), column_names_gp = gpar(fontsize = 30), 
                      col = cols, na_col="white")

#### 3.2 Co-expression of Nr3c1 (Gr) and Nr3c2 (Mr) with a subset of coregulators

In [None]:
# Genes of interest
genes <- c('Med1','Ncoa1','Ncoa2','Ncoa3','Ncoa4','Ncoa6','Ncor1','Ncor2')

# Subset data from dotplot and replace 0 by NA
data <- DotPlot(hip, features = genes)$data
data$avg.exp[data$avg.exp == 0] <- NA

In [None]:
# Matrix for GR
mat.gr <- matrix(nrow = length(cells), ncol = length(genes))
for(g in 1:length(genes)){
    g   
    for (r in 1:length(cells)){
        r   
        mat.gr[r,g] <- log10((gr[gr$id==cells[r],1])*(data[data$features.plot == genes[g] & data$id==cells[r],1]))
           
    }
}

rownames(mat.gr) = cells
colnames(mat.gr) = genes

# Matrix for MR
mat.mr <- matrix(nrow = length(cells), ncol = length(genes))
for(g in 1:length(genes)){
    g   
    for (r in 1:length(cells)){
        r   
        mat.mr[r,g] <- log10((mr[mr$id==cells[r],1])*(data[data$features.plot == genes[g] & data$id==cells[r],1]))
           
    }
}

rownames(mat.mr) = cells
colnames(mat.mr) = genes

# Normalisation
max.value <- max(mat.gr,mat.mr, na.rm = TRUE)
min.value <- min(mat.gr,mat.mr, na.rm = TRUE)
mat.gr <- (mat.gr-min.value)/(max.value-min.value)
mat.mr <- (mat.mr-min.value)/(max.value-min.value)

# Heatmaps
heatmap.gr <- Heatmap(mat.gr, cluster_rows = FALSE, row_names_side = "left", rect_gp = gpar(col = "white", lwd = 1),
                      cluster_columns = FALSE, name = "Coupling score", column_title = "Nr3c1", 
                      column_title_gp = gpar(fill="white", col="black", border="white", fontsize = 40), 
                      row_names_gp = gpar(fontsize = 40), column_names_gp = gpar(fontsize = 40), 
                      col = cols, na_col="white")

heatmap.mr <- Heatmap(mat.mr, cluster_rows = FALSE, row_names_side = "left", rect_gp = gpar(col = "white", lwd = 1),
                      cluster_columns = FALSE, name = "Coupling score", column_title = "Nr3c2", 
                      column_title_gp = gpar(fill="white", col="black", border="white", fontsize = 40), 
                      row_names_gp = gpar(fontsize = 40), column_names_gp = gpar(fontsize = 40), 
                      col = cols, na_col="white")

### 4. Co-expression of Nr3c1 (Gr) and Nr3c2 (Mr) with neuropeptides and neurotransmitter receptors 

#### 4.1 Only in glutamatergic neurons

In [None]:
# Subset of cells
cells <- c("DG","CA3","CA2","CA1-ProS")

In [None]:
# Genes of interest
genes <- c("Adra1a","Adra1b","Adra1d","Adra2a","Adra2c","Adrb1","Adrb2","Adrb3", # Adrenergic receptors
           "Drd1","Drd2","Drd3","Drd4","Drd5", # Dopaminergic receptors
           "Htr3a","Htr3b","Htr1a","Htr1b","Htr1d","Htr2a","Htr2b","Htr2c","Htr4","Htr5a","Htr6","Htr7",# 5-HT rec.
           "Chrna1","Chrna2","Chrna3","Chrna4","Chrna5","Chrna7","Chrna9","Chrna10","Chrnb1","Chrnb2","Chrnb3",
           "Chrnb4","Chrnd","Chrne","Chrng","Chrm1","Chrm2","Chrm3","Chrm4","Chrm5", # Cholinergic rec.
           "Npy1r","Npy2r","Npy5r","Npy6r", # NPY receptors
           "Sstr1","Sstr2","Sstr3","Sstr4","Sstr5", # Somatostatin receptors
           "Vipr1","Vipr2","Adcyap1r1", # Vasoactive intestinal peptide receptors
           "Tacr1","Tacr2","Tacr3", # Tachykinin receptors
           "Agtr2", # Angiotensin receptors
           "Avpr1a","Avpr1b","Avpr2","Oxtr", # Arginine/Vasopressin and oxytocin receptors
           "Oprd1","Oprm1","Oprk1","Oprl1", # Opioid receptors
           "Trhr","Trhr2", # TRH receptors
           "Rxfp1","Rxfp2","Rxfp3", # Relaxin receptors
           "Ntsr1","Ntsr2", # Neurotensin receptors
           "Crhr1","Crhr2") # Crh receptors)

# Subset data from dotplot and replace 0 by NA
data <- DotPlot(hip, features = genes)$data
data$avg.exp[data$avg.exp == 0] <- NA

In [None]:
# Matrix for GR
mat.gr <- matrix(nrow = length(cells), ncol = length(genes))
for(g in 1:length(genes)){
    g   
    for (r in 1:length(cells)){
        r   
        mat.gr[r,g] <- log10((gr[gr$id==cells[r],1])*(data[data$features.plot == genes[g] & data$id==cells[r],1]))
           
    }
}

rownames(mat.gr) = cells
colnames(mat.gr) = genes

# Matrix for MR
mat.mr <- matrix(nrow = length(cells), ncol = length(genes))
for(g in 1:length(genes)){
    g   
    for (r in 1:length(cells)){
        r   
        mat.mr[r,g] <- log10((mr[mr$id==cells[r],1])*(data[data$features.plot == genes[g] & data$id==cells[r],1]))
           
    }
}

rownames(mat.mr) = cells
colnames(mat.mr) = genes

# Normalisation
max.value <- max(mat.gr,mat.mr, na.rm = TRUE)
min.value <- min(mat.gr,mat.mr, na.rm = TRUE)
mat.gr <- (mat.gr-min.value)/(max.value-min.value)
mat.mr <- (mat.mr-min.value)/(max.value-min.value)

# Heatmaps
heatmap.gr <- Heatmap(mat.gr, cluster_rows = FALSE, row_names_side = "left", rect_gp = gpar(col = "white", lwd = 1),
                      cluster_columns = FALSE, name = "Coupling score", column_title = "Nr3c1", 
                      column_title_gp = gpar(fill="white", col="black", border="white", fontsize = 40), 
                      row_names_gp = gpar(fontsize = 30), column_names_gp = gpar(fontsize = 30), 
                      col = cols, na_col="white")

heatmap.mr <- Heatmap(mat.mr, cluster_rows = FALSE, row_names_side = "left", rect_gp = gpar(col = "white", lwd = 1),
                      cluster_columns = FALSE, name = "Coupling score", column_title = "Nr3c2", 
                      column_title_gp = gpar(fill="white", col="black", border="white", fontsize = 40), 
                      row_names_gp = gpar(fontsize = 30), column_names_gp = gpar(fontsize = 30), 
                      col = cols, na_col="white")

#### 4.2 In GABAergic neurons and non-neuronal cells

In [None]:
# Subset of cells
cells <- c("Lamp5","Vip","Pvalb","Sncg","Sst","Endo","Micro-PVM","Astro","Oligo")

In [None]:
# Genes of interest
genes <- c("Adra1a","Adra1b","Adra1d","Adra2a","Adra2c","Adrb1","Adrb2","Adrb3", # Adrenergic receptors
           "Drd1","Drd2","Drd3","Drd4","Drd5", # Dopaminergic receptors
           "Htr3a","Htr3b","Htr1a","Htr1b","Htr1d","Htr2a","Htr2b","Htr2c","Htr4","Htr5a","Htr6","Htr7",# 5-HT rec.
           "Chrna1","Chrna2","Chrna3","Chrna4","Chrna5","Chrna7","Chrna9","Chrna10","Chrnb1","Chrnb2","Chrnb3",
           "Chrnb4","Chrnd","Chrne","Chrng","Chrm1","Chrm2","Chrm3","Chrm4","Chrm5", # Cholinergic rec.
           "Npy1r","Npy2r","Npy5r","Npy6r", # NPY receptors
           "Sstr1","Sstr2","Sstr3","Sstr4","Sstr5", # Somatostatin receptors
           "Vipr1","Vipr2","Adcyap1r1", # Vasoactive intestinal peptide receptors
           "Tacr1","Tacr2","Tacr3", # Tachykinin receptors
           "Agtr2", # Angiotensin receptors
           "Avpr1a","Avpr1b","Avpr2","Oxtr", # Arginine/Vasopressin and oxytocin receptors
           "Oprd1","Oprm1","Oprk1","Oprl1", # Opioid receptors
           "Trhr","Trhr2", # TRH receptors
           "Rxfp1","Rxfp2","Rxfp3", # Relaxin receptors
           "Ntsr1","Ntsr2", # Neurotensin receptors
           "Crhr1","Crhr2") # Crh receptors)

# Subset data from dotplot and replace 0 by NA
data <- DotPlot(hip, features = genes)$data
data$avg.exp[data$avg.exp == 0] <- NA

In [None]:
# Matrix for GR
mat.gr <- matrix(nrow = length(cells), ncol = length(genes))
for(g in 1:length(genes)){
    g   
    for (r in 1:length(cells)){
        r   
        mat.gr[r,g] <- log10((gr[gr$id==cells[r],1])*(data[data$features.plot == genes[g] & data$id==cells[r],1]))
           
    }
}

rownames(mat.gr) = cells
colnames(mat.gr) = genes

# Matrix for MR
mat.mr <- matrix(nrow = length(cells), ncol = length(genes))
for(g in 1:length(genes)){
    g   
    for (r in 1:length(cells)){
        r   
        mat.mr[r,g] <- log10((mr[mr$id==cells[r],1])*(data[data$features.plot == genes[g] & data$id==cells[r],1]))
           
    }
}

rownames(mat.mr) = cells
colnames(mat.mr) = genes

# Normalisation
max.value <- max(mat.gr,mat.mr, na.rm = TRUE)
min.value <- min(mat.gr,mat.mr, na.rm = TRUE)
mat.gr <- (mat.gr-min.value)/(max.value-min.value)
mat.mr <- (mat.mr-min.value)/(max.value-min.value)

# Heatmaps
heatmap.gr <- Heatmap(mat.gr, cluster_rows = FALSE, row_names_side = "left", rect_gp = gpar(col = "white", lwd = 1),
                      cluster_columns = FALSE, name = "Coupling score", column_title = "Nr3c1", 
                      column_title_gp = gpar(fill="white", col="black", border="white", fontsize = 40), 
                      row_names_gp = gpar(fontsize = 30), column_names_gp = gpar(fontsize = 30), 
                      col = cols, na_col="white")

heatmap.mr <- Heatmap(mat.mr, cluster_rows = FALSE, row_names_side = "left", rect_gp = gpar(col = "white", lwd = 1),
                      cluster_columns = FALSE, name = "Coupling score", column_title = "Nr3c2", 
                      column_title_gp = gpar(fill="white", col="black", border="white", fontsize = 40), 
                      row_names_gp = gpar(fontsize = 30), column_names_gp = gpar(fontsize = 30), 
                      col = cols, na_col="white")