# Install Libraries
Install information can be found [here](https://satijalab.org/seurat/articles/install.html)\
Vignette [here](https://satijalab.org/seurat/articles/pbmc3k_tutorial.html)

In [None]:
#Seurat parameters
rna_matrix = "" #h5 count matrix
genome = "" #either hg38 or mm10

min_features = 200 #Seurat QC for number of min features
percent_MT = 5 #Seurat QC for max % of mt 
min_cells = 3 #Seurat QC for min number of cells

normalization_method = "LogNormalize"
normalization_scale_factor = 10000

variable_features_method = "vst"
variable_features_num = 2000

dim_loadings_dim = 2 #Number of dimensions to display

jackstraw_replicates = 100 #Number of replicate samplings to perform
jackstraw_score_dim = 20 #Which dimensions to examine, in the default case, 1:jackstraw_score_dim = 1:20
jackstraw_plot_dim = 15 #Dims to plot, in the default case, 1:jackstraw_plot_dim = 1:15

heatmap_dim = 1 #Dimensions to plot
heatmap_cells = 500 #A list of cells to plot. If numeric, just plots the top cells.
heatmap_balanced = TRUE #Plot an equal number of genes with both + and - scores.

umap_dim = 10 #Dimensions (number of PCs) used to create umap, in the default case, 1:umap_dim = 1:10
umap_resolution = 0.5 #Value of the resolution parameter, use a value below 1.0 if you want to obtain a smaller number of communities.

prefix = "prefix" #project name

#Terra specific parameters
table_name = "demux_BH3KTLDMXY"
experiment_name = "gm12878_fresh_RNA"

#Papermill specific parameters
papermill = TRUE

#jupyter notebook plot sizes
options(repr.plot.width=15, repr.plot.height=15)

In [None]:
papermill <- as.logical(papermill)
heatmap_balanced <- as.logical(heatmap_balanced)

In [None]:
if (!requireNamespace("Seurat", quietly = TRUE))
    install.packages("Seurat")
if (!requireNamespace("hdf5r", quietly = TRUE))
    install.packages("hdf5r")
if (!requireNamespace("future", quietly = TRUE))
    install.packages("future")
if (!requireNamespace("logr", quietly = TRUE))
    install.packages("logr")

suppressMessages(library(hdf5r))
suppressMessages(library(Seurat))
suppressMessages(library(future))
suppressMessages(library(logr))
future.seed=TRUE
plan("multisession")
options("logr.notes" = FALSE)
set.seed(1234)

In [None]:
#Function to save plots
dir.create("plots", showWarnings=F)
printPNG <- function(name, plotObject, papermill, wf=1, hf=1){
    filename = paste0(prefix,".rna.seurat.",name,".",genome)
    options(repr.plot.width = 7*wf, repr.plot.height = 7*hf)
    print(plotObject)
    if(papermill){
    png(sprintf("plots/%s.png", filename), width=480*wf, height=480*hf)
    print(plotObject)
    dev.off()
    }
}

#Create log file
logfile <- file.path(paste0(prefix,".rna.seurat.logfile.",genome,".txt"))
lf <- log_open(logfile)

In [None]:
get_file <- function(path){
    dest <- getwd()
    gsutil_cp(path, dest)
    name <- basename(path)
    return(name)
}

if (!papermill){
    table <- avtable(table_name)
    rna_matrix <- get_file(table$h5_matrix[table[, sprintf('%s_id', table_name)] == experiment_name])
}

In [None]:
#Read h5 matrix

data = tryCatch({
        log_print("Reading h5 matrix")
    
        #Code start to read rna_matrix
        data = Read10X_h5(rna_matrix)
    
        #Code end to read rna_matrix
    
        log_print("SUCCESSFUL: Reading h5 matrix")
        return(data)
    },
    error = function(cond) {
        log_print("ERROR: Reading h5 matrix")
        log_print(cond)
    },
    warning = function(cond) {
        log_print("WARNING: Reading h5 matrix")
        log_print(cond)
        return(Read10X_h5(rna_matrix))
    }
)


In [None]:
#Create Seurat Object

rna = tryCatch({
        log_print("Create Seurat Object")
    
        #Code start to create seurat object
    
        rna <- CreateSeuratObject(counts = data, project = prefix, min.cells = min_cells, min.features = min_features)
    
        #Code end to create seurat object
    
        log_print("SUCCESSFUL: Create Seurat Object")
        return(rna)
    },
    error = function(cond) {
        log_print("ERROR: Create Seurat Object")
        log_print(cond)
    }
)

In [None]:
# Calculate percent of mitochondrial reads

tryCatch({
        log_print("Violin Plot")
    
        #Code start to create violin plot
    
        rna[["percent.mt"]] <- PercentageFeatureSet(rna, pattern = "^MT-")
        obj <- VlnPlot(rna, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
        printPNG('violin', obj, papermill)
    
        #Code end to create violin plot
    
        log_print("SUCCESSFUL: Violin Plot")
    },
    error = function(cond) {
        log_print("ERROR: Violin Plot")
        log_print(cond)
    }
)

In [None]:
# Create feature plots

tryCatch({
        log_print("Feature Plots")
    
        #Code start to create feature plots
    
        plot1 <- FeatureScatter(rna, feature1 = "nCount_RNA", feature2 = "percent.mt")
        plot2 <- FeatureScatter(rna, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
        obj <- plot1 + plot2
        printPNG('mitochondria', obj, papermill, wf=2)
    
        #Code end to create feature plots
    
        log_print("SUCCESSFUL: Feature Plots")
    },
    error = function(cond) {
        log_print("ERROR: Feature Plots")
        log_print(cond)
    }
)

In [None]:
# Normalization

tryCatch({
        log_print("Normalization")
    
        #Code start to normalize
    
        rna <- NormalizeData(rna, normalization.method = normalization_method, scale.factor = normalization_scale_factor)
    
        #Code end to normalize
    
        log_print("SUCCESSFUL: Normalization")
    },
    error = function(cond) {
        log_print("ERROR: Normalization")
        log_print(cond)
    }
)


In [None]:
# Find Variable Features

tryCatch({
        log_print("Finding Variable Features")
        
        #Code start to find variable features
    
        rna <- FindVariableFeatures(rna, selection.method = variable_features_method, nfeatures = variable_features_num)
        
        # Identify the 10 most highly variable genes
        top10 <- head(VariableFeatures(rna), 10)

        # plot variable features with and without labels
        plot1 <- VariableFeaturePlot(rna)
        plot2 <- LabelPoints(plot = plot1, points = top10, repel = FALSE)
        obj <- plot1 + plot2
        printPNG('features', obj, papermill, wf=2)
    
        #Code end to find variable features
        
        log_print("SUCCESSFUL: Finding Variable Features")
    },
    error = function(cond) {
        log_print("ERROR: Finding Variable Features")
        log_print(cond)
    }
)

In [None]:
# Scaling

tryCatch({
        log_print("Scaling")
    
        #Code start to scale
    
        all.genes <- rownames(rna)
        rna <- ScaleData(rna, features = all.genes)
    
        #Code end to scale
    
        log_print("SUCCESSFUL: Scaling")
    },
    error = function(cond) {
        log_print("ERROR: Scaling")
        log_print(cond)
    }
)


In [None]:
#Principal Component Analysis

tryCatch({
        log_print("Principal Component Analysis")

        # Code start to run PCA    
    
        rna <- RunPCA(rna, features = VariableFeatures(object = rna))
        obj <- VizDimLoadings(rna, dims = 1:dim_loadings_dim, reduction = "pca")
        printPNG('dimLoadings', obj, papermill, wf=1.5)
    
        # Code end to run PCA  

        log_print("SUCCESSFUL: Principal Component Analysis")
    },
    error = function(cond) {
        log_print("ERROR: Principal Component Analysis")
        log_print(cond)
    }
)

In [None]:
# PCA plot

tryCatch({
        log_print("PCA plot")

        # Code start to create PCA plot 
    
        obj <- DimPlot(rna, reduction = "pca")
        printPNG('pca', obj, papermill)
    
        # Code end to create PCA plot  

        log_print("SUCCESSFUL: PCA plot")
    },
    error = function(cond) {
        log_print("ERROR: PCA plot")
        log_print(cond)
    }
)

In [None]:
# Heatmap

tryCatch({
        log_print("Heatmap")

        # Code start to create heatmap 
    
        obj <- DimHeatmap(rna, dims = heatmap_dim, cells = heatmap_cells, fast = FALSE, balanced = heatmap_balanced)
        printPNG('heatmap', obj, papermill)
    
        # Code end to create heatmap 

        log_print("SUCCESSFUL: Heatmap")
    },
    error = function(cond) {
        log_print("ERROR: Heatmap")
        log_print(cond)
    }
)

In [None]:
# Jackstraw Plot

tryCatch({
        log_print("Jackstraw Plot")

        # Code start to create jackstraw plot 
    
        rna <- JackStraw(rna, num.replicate = jackstraw_replicates)
        rna <- ScoreJackStraw(rna, dims = 1:jackstraw_score_dim)
        obj <- JackStrawPlot(rna, dims = 1:jackstraw_plot_dim)
        printPNG('jackstraw', obj, papermill)
    
        # Code end to create jackstraw plot 

        log_print("SUCCESSFUL: Jackstraw Plot")
    },
    error = function(cond) {
        log_print("ERROR: Jackstraw Plot")
        log_print(cond)
    }
)

In [None]:
# Elbow Plot

tryCatch({
        log_print("Elbow Plot")

        # Code start to create elbow plot 
    
        obj <- ElbowPlot(rna)
        printPNG('elbow', obj, papermill)
    
        # Code end to create elbow plot 

        log_print("SUCCESSFUL: Elbow Plot")
    },
    error = function(cond) {
        log_print("ERROR: Elbow Plot")
        log_print(cond)
    }
)

In [None]:
# Run UMAP

tryCatch({
        log_print("Run UMAP")

        # Code start to run umap 
    
        rna <- FindNeighbors(rna, dims = 1:umap_dim)
        rna <- FindClusters(rna, resolution = umap_resolution)
        rna <- RunUMAP(rna, dims = 1:umap_dim)
    
        # Code end to run umap 

        log_print("SUCCESSFUL: Run UMAP")
    },
    error = function(cond) {
        log_print("ERROR: Run UMAP")
        log_print(cond)
    }
)

In [None]:
# UMAP Plot

tryCatch({
        log_print("UMAP Plot")

        # Code start to create UMAP plot 
    
        obj <- DimPlot(rna, reduction = "umap")
        printPNG('umap', obj, papermill)
    
        # Code end to create UMAP plot 

        log_print("SUCCESSFUL: UMAP Plot")
    },
    error = function(cond) {
        log_print("ERROR: UMAP Plot")
        log_print(cond)
    }
)

In [None]:
#Create final output files

files2zip <- dir('plots/', full.names = TRUE)
zip(zipfile = 'plots.zip', files = files2zip)

saveRDS(rna, file = paste0(prefix,".rna.seurat.rds.",genome,".rds"))

log_close()