# Seurat integration of all samples

In [None]:
library_load <- suppressMessages(
    
    list(
        # Seurat 
        library(Seurat), 
        
        # Data 
        library(tidyverse), 
        
        # Plotting 
        library(ggplot2), 
        library(patchwork), 
        
        # Heat map 
        library(scales), 
        library(viridis), 
        library(ComplexHeatmap)
        
    )
)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
reticulate::use_python("~/bin/miniconda3/envs/r.4.1.0-FD20200109SPLENO/bin/python")

In [None]:
options(warn=-1, future.globals.maxSize=10000*1024^2)

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")
source("bin/SeuratQC.R")
source("/research/peer/fdeckert/scFacility/script/dirFacility.R")

# Parameter settings

In [None]:
# Files 
so_file <- "data/object/seurat_sct.rds"
so_nacl_file <- "data/object/seurat_sct_nacl.rds"
so_cpg_file <- "data/object/seurat_sct_cpg.rds"

so_int_file <- "data/object/seurat_int.rds"

# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()

# Import Seurat object

In [None]:
so_nacl <- readRDS(so_nacl_file)
so_cpg <- readRDS(so_cpg_file)

In [None]:
so <- readRDS(so_file)
so <- SplitObject(so, split.by="treatment")
so <- so[c("NaCl", "CpG")]

# Integration features 

In [None]:
hvf_nacl <- VariableFeatures(so_nacl)
hvf_cpg <- VariableFeatures(so_cpg)

In [None]:
integration_features <- unique(c(hvf_nacl, hvf_cpg))

# Control integration features 

In [None]:
hvf_hm <- function(so, features=NULL, show_rownames=FALSE, cellheight=0.25) {
    
    # Libraries 
    ht_opt$message = FALSE

    # Get count matrix 
    cnt <- AverageExpression(so, assay="RNA", slot="data", group.by="SCT_snn_res.0.8")$RNA
    
    # Subset count matrix
    if(is.null(features)) {
        
        cnt <- cnt[rownames(cnt) %in% VariableFeatures(so), ]
    
    }
    if(!is.null(features)) {
        
        cnt <- cnt[rownames(cnt) %in% features, ]
        cnt <- na.omit(cnt)
    
    }
    
    # Get cluster annotation column 
    annoation_col <- data.frame("Cluster"=paste("Cluster ", colnames(cnt)))
    rownames(annoation_col) <- colnames(cnt)

    # Scale colors 
    color_ramp <- viridis(100, option="magma")

    heatmap_scale <- grid.grabExpr(
        draw(
            ComplexHeatmap::pheatmap(
                mat=cnt, 
                main=so$treatment[1], 
                cluster_cols=TRUE, 
                cluster_rows=TRUE, 
                show_rownames=show_rownames, 
                show_colnames=TRUE, 
                cellwidth=15, 
                cellheight=cellheight, 
                treeheight_row=20, 
                treeheight_col=20,
                color=color_ramp,  
                border_color=NA, 
                scale="row"
            )
        )
    )
    
    return(heatmap_scale)
}

## Variable features of SCTransform for each sample

In [None]:
hvf_hm_nacl <- hvf_hm(so[["NaCl"]], features=hvf_nacl)
hvf_hm_cpg <- hvf_hm(so[["CpG"]], features=hvf_cpg)

In [None]:
options(repr.plot.width=15, repr.plot.height=15)
gridExtra::arrangeGrob(grobs=list(hvf_hm_nacl, hvf_hm_cpg), ncol=2) %>% grid::grid.draw()

# Variable features unique to a treatment condition

Those features are actually recovered by running SCTransform on the combined data set. 

In [None]:
hvf_hm_nacl <- hvf_hm(so_nacl, features=hvf_nacl[!hvf_nacl %in% rownames(so_cpg)], show_rownames=TRUE, cellheight=10)
hvf_hm_cpg <- hvf_hm(so_cpg, features=hvf_cpg[!hvf_cpg %in% rownames(so_nacl)], show_rownames=TRUE, cellheight=10)

In [None]:
options(repr.plot.width=15, repr.plot.height=21)
gridExtra::arrangeGrob(grobs=list(hvf_hm_nacl, hvf_hm_cpg), ncol=2) %>% grid::grid.draw()

# SCTransform and integration 

In [None]:
so <- PrepSCTIntegration(object.list=so, assay="SCT", anchor.features=integration_features, verbose=FALSE)
anchors <- FindIntegrationAnchors(object.list=so, normalization.method="SCT", anchor.features=integration_features, verbose=FALSE)
so_int <- IntegrateData(anchorset=anchors, normalization.method="SCT", verbose=FALSE)

# Dimensional reduction and clustering 

In [None]:
so_int <- RunPCA(so_int, npcs=100, assay="integrated", seed.use=random_seed, verbose=FALSE)
so_int <- FindNeighbors(so_int, dims=1:100, assay="integrated", verbose=FALSE)
so_int <- FindClusters(so_int, algorithm=4, random.seed=random_seed, verbose=FALSE)
so_int <- RunUMAP(so_int, dims=1:100, umap.method="umap-learn", seed.use=random_seed, verbose=FALSE)

In [None]:
options(repr.plot.width=15, repr.plot.height=10)
dplot_1(so_int, cluster="integrated_snn_res.0.8")

In [None]:
options(repr.plot.width=15, repr.plot.height=10)
fplot_1(so_int)

# Cluster composition plots 

In [None]:
meta <- so_int@meta.data

In [None]:
meta$integrated_snn_res.0.8 <- factor(meta$integrated_snn_res.0.8, levels=1:length(unique(meta$integrated_snn_res.0.8)))

## Cell type

In [None]:
options(repr.plot.width=15, repr.plot.height=5)
ggplot(meta, aes(x=integrated_snn_res.0.8, fill=main_labels)) + 
    geom_bar(position="fill") + 
    ggtitle("Ratio of cell cell types (main labels)") + xlab("") + ylab("") +
    scale_fill_manual(values=color$main_labels_haemosphere, na.value="gray")

## Treatment 

In [None]:
options(repr.plot.width=15, repr.plot.height=5)
ggplot(meta, aes(x=integrated_snn_res.0.8, fill=treatment)) + 
    geom_bar(position="fill") + 
    ggtitle("Ratio of treatment") + xlab("") + ylab("") +
    scale_fill_manual(values=color$treatment, na.value="gray")

## Tissue 

In [None]:
options(repr.plot.width=15, repr.plot.height=5)
ggplot(meta, aes(x=integrated_snn_res.0.8, fill=tissue)) + 
    geom_bar(position="fill") + 
    ggtitle("Ratio of tissue source") + xlab("") + ylab("") +
    scale_fill_manual(values=color$tissue, na.value="gray")

# Save results

In [None]:
saveRDS(so_int, so_int_file)

In [None]:
# so_int[["RNA"]] <- NULL
# so_int[["SCT"]] <- NULL
# seurat2dir(so=so_int, dir="data/object/seurat_int/", overwrite=TRUE)