# Install Libraries
Install information can be found [here](https://satijalab.org/seurat/articles/install.html)\
Vignette [here](https://satijalab.org/seurat/articles/pbmc3k_tutorial.html)

In [None]:
#Seurat parameters
rna_matrix = "path-to-matrix.h5" #h5 count matrix
genome = "genome-name"#either hg38 or mm10

min_features = 200 #Seurat QC for number of min features
percent_MT = 5 #Seurat QC for max % of mt 
min_cells = 3 #Seurat QC for min number of cells

normalization_method = "LogNormalize"
normalization_scale_factor = 10000

variable_features_method = "vst"
variable_features_num = 2000

dim_loadings_dim = 2 #Number of dimensions to display

jackstraw_replicates = 100 #Number of replicate samplings to perform
jackstraw_score_dim = 20 #Which dimensions to examine, in the default case, 1:jackstraw_score_dim = 1:20
jackstraw_plot_dim = 15 #Dims to plot, in the default case, 1:jackstraw_plot_dim = 1:15

heatmap_dim = 1 #Dimensions to plot
heatmap_cells = 500 #A list of cells to plot. If numeric, just plots the top cells.
heatmap_balanced = TRUE #Plot an equal number of genes with both + and - scores.

umap_dim = 10 #Dimensions (number of PCs) used to create umap, in the default case, 1:umap_dim = 1:10
umap_resolution = 0.5 #Value of the resolution parameter, use a value below 1.0 if you want to obtain a smaller number of communities.

prefix = "prefix" #project name

#Terra specific parameters
table_name = "demux_BH3KTLDMXY"
experiment_name = "gm12878_fresh_RNA"

#Papermill specific parameters
papermill = FALSE

#jupyter notebook plot sizes
options(repr.plot.width=15, repr.plot.height=15)

In [None]:
papermill <- as.logical(papermill)
heatmap_balanced <- as.logical(heatmap_balanced)

In [None]:
if (!requireNamespace("Seurat", quietly = TRUE))
    install.packages("Seurat")
if (!requireNamespace("hdf5r", quietly = TRUE))
    install.packages("hdf5r")
if (!requireNamespace("future", quietly = TRUE))
    install.packages("future")

suppressMessages(library(hdf5r))
suppressMessages(library(Seurat))
suppressMessages(library(future))
plan("multisession")
set.seed(1234)

In [None]:
#Function to save plots
dir.create("plots", showWarnings=F)
printPNG <- function(name, plotObject, papermill, wf=1, hf=1){
    filename = paste0(prefix,".rna.seurat.",name,".",genome)
	options(repr.plot.width = 7*wf, repr.plot.height = 7*hf)
	print(plotObject)
	if(papermill){
		png(sprintf("plots/%s.png", filename), width=480*wf, height=480*hf)
		print(plotObject)
		dev.off()
	}
}

In [None]:
get_file <- function(path){
    dest <- getwd()
    gsutil_cp(path, dest)
    name <- basename(path)
    return(name)
}

if (!papermill){
    table <- avtable(table_name)
    rna_matrix <- get_file(table$h5_matrix[table[, sprintf('%s_id', table_name)] == experiment_name])
}

In [None]:
data <- Read10X_h5(rna_matrix)
rna <- CreateSeuratObject(counts = data, project = prefix, min.cells = min_cells, min.features = min_features)
rna

In [None]:
# Calculate percent of mitochondrial reads
rna[["percent.mt"]] <- PercentageFeatureSet(rna, pattern = "^MT-")
obj <- VlnPlot(rna, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
printPNG('violin', obj, papermill)

In [None]:
plot1 <- FeatureScatter(rna, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(rna, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

obj <- plot1 + plot2
printPNG('mitochondria', obj, papermill, wf=2)

In [None]:
# Normalization
rna <- NormalizeData(rna, normalization.method = normalization_method, scale.factor = normalization_scale_factor)

In [None]:
# Find Variable Features
rna <- FindVariableFeatures(rna, selection.method = variable_features_method, nfeatures = variable_features_num)

# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(rna), 10)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(rna)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = FALSE)
obj <- plot1 + plot2
printPNG('features', obj, papermill, wf=2)

In [None]:
# Scaling
all.genes <- rownames(rna)
rna <- ScaleData(rna, features = all.genes)

In [None]:
# PCA
rna <- RunPCA(rna, features = VariableFeatures(object = rna))
obj <- VizDimLoadings(rna, dims = 1:dim_loadings_dim, reduction = "pca")
printPNG('dimLoadings', obj, papermill, wf=1.5)

In [None]:
obj <- DimPlot(rna, reduction = "pca")
printPNG('pca', obj, papermill)

In [None]:
obj <- DimHeatmap(rna, dims = heatmap_dim, cells = heatmap_cells, fast = FALSE, balanced = heatmap_balanced)
printPNG('heatmap', obj, papermill)

In [None]:
rna <- JackStraw(rna, num.replicate = jackstraw_replicates)
rna <- ScoreJackStraw(rna, dims = 1:jackstraw_score_dim)
obj <- JackStrawPlot(rna, dims = 1:jackstraw_plot_dim)
printPNG('jackstraw', obj, papermill)

In [None]:
obj <- ElbowPlot(rna)
printPNG('elbow', obj, papermill)

In [None]:
rna <- FindNeighbors(rna, dims = 1:umap_dim)
rna <- FindClusters(rna, resolution = umap_resolution)
rna <- RunUMAP(rna, dims = 1:umap_dim)

In [None]:
obj <- DimPlot(rna, reduction = "umap")
printPNG('umap', obj, papermill)

In [None]:
#Create final output files

files2zip <- dir('plots/', full.names = TRUE)
zip(zipfile = 'plots.zip', files = files2zip)

saveRDS(rna, file = paste0(prefix,".rna.seurat.rds.",genome,".rds"))