Skip to content

Commit

Permalink
Merge pull request #5339 from astrovsky01/seurat_update
Browse files Browse the repository at this point in the history
Seurat update
  • Loading branch information
bgruening committed Jul 7, 2023
2 parents 9c2eb2c + 3fe1f25 commit b437a46
Show file tree
Hide file tree
Showing 9 changed files with 2,316 additions and 157 deletions.
178 changes: 122 additions & 56 deletions tools/seurat/Seurat.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,24 @@
#' low_thresholds: ""
#' high_thresholds: ""
#' numPCs: ""
#' cells_use: ""
#' resolution: ""
#' perplexity: ""
#' min_pct: ""
#' logfc_threshold: ""
#' end_step: ""
#' showcode: ""
#' warn: ""
#' varstate: ""
#' vlnfeat: ""
#' featplot: ""
#' PCplots: ""
#' tsne: ""
#' nmds: ""
#' heatmaps: ""
#' norm_out: ""
#' variable_out: ""
#' pca_out : ""
#' clusters_out: ""
#' markers_out: ""
#' ---

# nolint start
Expand All @@ -34,87 +39,148 @@ varstate <- as.logical(params$varstate)
vlnfeat <- as.logical(params$vlnfeat)
featplot <- as.logical(params$featplot)
pc_plots <- as.logical(params$PCplots)
tsne <- as.logical(params$tsne)
nmds <- as.logical(params$nmds)
heatmaps <- as.logical(params$heatmaps)

end_step <- as.integer(params$end_step)
norm_out <- as.logical(params$norm_out)
# we need that to not crash Galaxy with an UTF-8 error on German LC settings.
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")

#+ echo = F, warning = `warn`, include =`varstate`
min_cells <- as.integer(params$min_cells)
min_genes <- as.integer(params$min_genes)
low_thresholds <- as.integer(params$low_thresholds)
high_thresholds <- as.integer(params$high_thresholds)
num_pcs <- as.integer(params$numPCs)
cells_use <- as.integer(params$cells_use)
resolution <- as.double(params$resolution)
perplexity <- as.integer(params$perplexity)
min_pct <- as.double(params$min_pct)
logfc_threshold <- as.double(params$logfc_thresh)
print(paste0("Minimum cells: ", min_cells))
print(paste0("Minimum features: ", min_genes))
low_thresholds <- as.integer(params$low_thresholds)
high_thresholds <- as.integer(params$high_thresholds)
print(paste0("Umi low threshold: ", low_thresholds))
print(paste0("Umi high threshold: ", high_thresholds))
print(paste0("Number of principal components: ", num_pcs))
print(paste0("Resolution: ", resolution))
print(paste0("Perplexity: ", perplexity))
print(paste0("Minimum percent of cells", min_pct))
print(paste0("Logfold change threshold", logfc_threshold))

#+ echo = FALSE
if (end_step >= 2) {
variable_out <- as.logical(params$variable_out)
}


if (end_step >= 3) {
num_pcs <- as.integer(params$numPCs)
print(paste0("Number of principal components: ", num_pcs))
pca_out <- as.logical(params$pca_out)
}
if (end_step >= 4) {
if (params$perplexity == "") {
perplexity <- -1
print(paste0("Perplexity: ", perplexity))
} else {
perplexity <- as.integer(params$perplexity)
print(paste0("Perplexity: ", perplexity))
}
resolution <- as.double(params$resolution)
print(paste0("Resolution: ", resolution))
clusters_out <- as.logical(params$clusters_out)
}
if (end_step >= 5) {
min_pct <- as.double(params$min_pct)
logfc_threshold <- as.double(params$logfc_thresh)
print(paste0("Minimum percent of cells", min_pct))
print(paste0("Logfold change threshold", logfc_threshold))
markers_out <- as.logical(params$markers_out)
}


if (showcode == TRUE) print("Read in data, generate inital Seurat object")
#+ echo = `showcode`, warning = `warn`, message = F
counts <- read.delim(params$counts, row.names = 1)
seuset <- Seurat::CreateSeuratObject(counts = counts, min.cells = min_cells, min.features = min_genes)

#+ echo = FALSE
if (showcode == TRUE && vlnfeat == TRUE) print("Raw data vizualization")
#+ echo = `showcode`, warning = `warn`, include=`vlnfeat`
Seurat::VlnPlot(object = seuset, features = c("nFeature_RNA", "nCount_RNA"))
Seurat::FeatureScatter(object = seuset, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
if (vlnfeat == TRUE){
print(Seurat::VlnPlot(object = seuset, features = c("nFeature_RNA", "nCount_RNA")))
print(Seurat::FeatureScatter(object = seuset, feature1 = "nCount_RNA", feature2 = "nFeature_RNA"))
}

#+ echo = FALSE
if (showcode == TRUE) print("Filter and normalize for UMI counts")
#+ echo = `showcode`, warning = `warn`
seuset <- subset(seuset, subset = `nCount_RNA` > low_thresholds & `nCount_RNA` < high_thresholds)
seuset <- Seurat::NormalizeData(seuset, normalization.method = "LogNormalize", scale.factor = 10000)
if (norm_out == TRUE) {
saveRDS(seuset, "norm_out.rds")
}

#+ echo = FALSE
if (showcode == TRUE && featplot == TRUE) print("Variable Genes")
#+ echo = `showcode`, warning = `warn`, include = `featplot`
seuset <- Seurat::FindVariableFeatures(object = seuset, selection.method = "mvp")
Seurat::VariableFeaturePlot(seuset, cols = c("black", "red"), selection.method = "disp")
seuset <- Seurat::ScaleData(object = seuset, vars.to.regress = "nCount_RNA")
if (end_step >= 2) {
#+ echo = FALSE
if (showcode == TRUE && featplot == TRUE) print("Variable Genes")
#+ echo = `showcode`, warning = `warn`, include = `featplot`
seuset <- Seurat::FindVariableFeatures(object = seuset, selection.method = "mvp")
if (featplot == TRUE) {
print(Seurat::VariableFeaturePlot(seuset, cols = c("black", "red"), selection.method = "disp"))
}
seuset <- Seurat::ScaleData(object = seuset, vars.to.regress = "nCount_RNA")
if (variable_out == TRUE) {
saveRDS(seuset, "var_out.rds")
}
}

#+ echo = FALSE
if (showcode == TRUE && pc_plots == TRUE) print("PCA Visualization")
#+ echo = `showcode`, warning = `warn`, include = `pc_plots`
seuset <- Seurat::RunPCA(seuset, npcs = num_pcs)
Seurat::VizDimLoadings(seuset, dims = 1:2)
Seurat::DimPlot(seuset, dims = c(1, 2), reduction = "pca")
Seurat::DimHeatmap(seuset, dims = 1:num_pcs, nfeatures = 30, reduction = "pca")
seuset <- Seurat::JackStraw(seuset, dims = num_pcs, reduction = "pca", num.replicate = 100)
seuset <- Seurat::ScoreJackStraw(seuset, dims = 1:num_pcs)
Seurat::JackStrawPlot(seuset, dims = 1:num_pcs)
Seurat::ElbowPlot(seuset, ndims = num_pcs, reduction = "pca")
if (end_step >= 3) {
#+ echo = FALSE
if (showcode == TRUE && pc_plots == TRUE) print("PCA Visualization")
#+ echo = `showcode`, warning = `warn`, include = `pc_plots`
seuset <- Seurat::RunPCA(seuset, npcs = num_pcs)
seuset <- Seurat::JackStraw(seuset, dims = num_pcs, reduction = "pca", num.replicate = 100)
seuset <- Seurat::ScoreJackStraw(seuset, dims = 1:num_pcs)
if (pc_plots == TRUE) {
print(Seurat::VizDimLoadings(seuset, dims = 1:2))
print(Seurat::DimPlot(seuset, dims = c(1, 2), reduction = "pca"))
print(Seurat::DimHeatmap(seuset, dims = 1:num_pcs, nfeatures = 30, reduction = "pca"))
print(Seurat::JackStrawPlot(seuset, dims = 1:num_pcs))
print(Seurat::ElbowPlot(seuset, ndims = num_pcs, reduction = "pca"))
}
if (pca_out == TRUE) {
saveRDS(seuset, "pca_out.rds")
}
}

#+ echo = FALSE
if (showcode == TRUE && tsne == TRUE) print("tSNE")
#+ echo = `showcode`, warning = `warn`, include = `tsne`
seuset <- Seurat::FindNeighbors(object = seuset)
seuset <- Seurat::FindClusters(object = seuset)
if (perplexity == -1) {
seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution);
} else {
seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, perplexity = perplexity);
if (end_step >= 4) {
#+ echo = FALSE
if (showcode == TRUE && nmds == TRUE) print("tSNE and UMAP")
#+ echo = `showcode`, warning = `warn`, include = `nmds`
seuset <- Seurat::FindNeighbors(object = seuset)
seuset <- Seurat::FindClusters(object = seuset)
if (perplexity == -1) {
seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution);
} else {
seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, perplexity = perplexity);
}
if (nmds == TRUE) {
print(Seurat::DimPlot(seuset, reduction = "tsne"))
}
seuset <- Seurat::RunUMAP(seuset, dims = 1:num_pcs)
if (nmds == TRUE) {
print(Seurat::DimPlot(seuset, reduction = "umap"))
}
if (clusters_out == TRUE) {
tsnedata <- Seurat::Embeddings(seuset, reduction="tsne")
saveRDS(seuset, "tsne_out.rds")
umapdata <- Seurat::Embeddings(seuset, reduction="umap")
saveRDS(seuset, "umap_out.rds")
}
}
Seurat::DimPlot(seuset, reduction = "tsne")

#+ echo = FALSE
if (showcode == TRUE && heatmaps == TRUE) print("Marker Genes")
#+ echo = `showcode`, warning = `warn`, include = `heatmaps`
markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold)
top10 <- dplyr::group_by(markers, cluster)
top10 <- dplyr::top_n(top10, 10, avg_log2FC)
Seurat::DoHeatmap(seuset, features = top10$gene)

if (end_step == 5) {
#+ echo = FALSE
if (showcode == TRUE && heatmaps == TRUE) print("Marker Genes")
#+ echo = `showcode`, warning = `warn`, include = `heatmaps`
markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold)
top10 <- dplyr::group_by(markers, cluster)
top10 <- dplyr::top_n(top10, n = 10, wt = avg_log2FC)
print(top10)
if (heatmaps == TRUE) {
print(Seurat::DoHeatmap(seuset, features = top10$gene))
}
if (markers_out == TRUE) {
saveRDS(seuset, "markers_out.rds")
data.table::fwrite(x = markers, row.names=TRUE, sep="\t", file = "markers_out.tsv")
}
}
# nolint end

0 comments on commit b437a46

Please sign in to comment.