Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rprofile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
source("renv/activate.R")
57 changes: 57 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# renv files
renv/
!renv/settings.dcf
!renv/activate.R

# R files
.Rproj.user/
.Rhistory
.RData
.Ruserdata
*.Rproj

# Output files
outputs/*.png
outputs/*.pdf
outputs/*.csv
outputs/*.rds
outputs/*.html

# Data files (uncomment as needed)
# data/*.csv
# data/*.tsv
# data/*.h5
# data/*.gz
# data/*.rds

# Temporary files
*.tmp
*.temp
*~

# System files
.DS_Store
Thumbs.db

# IDE files
.vscode/
.idea/

# Log files
*.log

# Compiled R files
*.so
*.dll
*.dylib

# Package build files
*.tar.gz
*.zip

# HTML files (unless specifically needed)
*.html

# Cache directories
cache/
.cache/
109 changes: 109 additions & 0 deletions R/analysis_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Basic single-cell RNA-seq analysis functions
# Author: CC1001-CTRL
# Date: 2024

#' Load and preprocess single-cell data
#'
#' @param data_path Path to the data file
#' @param project_name Name for the Seurat object
#' @return A preprocessed Seurat object
#' @export
load_and_preprocess <- function(data_path, project_name = "scRNA_analysis") {
library(Seurat)
library(dplyr)

# Load data (adjust based on your data format)
# This is a template - modify for your specific data format
if (file.exists(data_path)) {
data <- Read10X(data_path)
seurat_obj <- CreateSeuratObject(counts = data, project = project_name)
} else {
# Use example data if file doesn't exist
data("pbmc_small")
seurat_obj <- pbmc_small
message("Using example pbmc_small dataset")
}

# Basic preprocessing
seurat_obj[["percent.mt"]] <- PercentageFeatureSet(seurat_obj, pattern = "^MT-")

# Filter cells and features
seurat_obj <- subset(seurat_obj,
subset = nFeature_RNA > 200 &
nFeature_RNA < 5000 &
percent.mt < 20)

return(seurat_obj)
}

#' Perform standard normalization and scaling
#'
#' @param seurat_obj A Seurat object
#' @param nfeatures Number of variable features to find
#' @return A normalized and scaled Seurat object
#' @export
normalize_and_scale <- function(seurat_obj, nfeatures = 2000) {
library(Seurat)

# Normalize data
seurat_obj <- NormalizeData(seurat_obj, normalization.method = "LogNormalize",
scale.factor = 10000)

# Find variable features
seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = "vst",
nfeatures = nfeatures)

# Scale data
all.genes <- rownames(seurat_obj)
seurat_obj <- ScaleData(seurat_obj, features = all.genes)

return(seurat_obj)
}

#' Perform dimensional reduction
#'
#' @param seurat_obj A Seurat object
#' @param npcs Number of principal components to compute
#' @return A Seurat object with PCA and UMAP
#' @export
run_dimensional_reduction <- function(seurat_obj, npcs = 50) {
library(Seurat)

# Run PCA
seurat_obj <- RunPCA(seurat_obj, features = VariableFeatures(object = seurat_obj),
npcs = npcs)

# Find neighbors and clusters
seurat_obj <- FindNeighbors(seurat_obj, dims = 1:20)
seurat_obj <- FindClusters(seurat_obj, resolution = 0.5)

# Run UMAP
seurat_obj <- RunUMAP(seurat_obj, dims = 1:20)

return(seurat_obj)
}

#' Create quality control plots
#'
#' @param seurat_obj A Seurat object
#' @return A list of ggplot objects
#' @export
create_qc_plots <- function(seurat_obj) {
library(ggplot2)
library(Seurat)

plots <- list()

# Violin plot for QC metrics
plots$violin <- VlnPlot(seurat_obj,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3)

# Feature scatter plots
plots$feature_scatter1 <- FeatureScatter(seurat_obj, feature1 = "nCount_RNA",
feature2 = "percent.mt")
plots$feature_scatter2 <- FeatureScatter(seurat_obj, feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")

return(plots)
}
91 changes: 91 additions & 0 deletions R/example_analysis.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Example single-cell RNA-seq analysis workflow
# This script demonstrates a complete analysis pipeline

# Load required libraries
library(Seurat)
library(ggplot2)
library(dplyr)
library(patchwork)

# Source custom functions
source("R/analysis_functions.R")

# Set seed for reproducibility
set.seed(42)

# 1. Load and preprocess data
message("Loading and preprocessing data...")
seurat_obj <- load_and_preprocess("data/") # Will use example data if path doesn't exist

# 2. Normalize and scale data
message("Normalizing and scaling data...")
seurat_obj <- normalize_and_scale(seurat_obj, nfeatures = 2000)

# 3. Run dimensional reduction
message("Running dimensional reduction...")
seurat_obj <- run_dimensional_reduction(seurat_obj, npcs = 50)

# 4. Create visualizations
message("Creating visualizations...")

# Quality control plots
qc_plots <- create_qc_plots(seurat_obj)

# Save QC plots
ggsave("outputs/qc_violin_plot.png", qc_plots$violin, width = 12, height = 6)
ggsave("outputs/qc_scatter_plot1.png", qc_plots$feature_scatter1, width = 8, height = 6)
ggsave("outputs/qc_scatter_plot2.png", qc_plots$feature_scatter2, width = 8, height = 6)

# PCA plot
pca_plot <- DimPlot(seurat_obj, reduction = "pca", group.by = "seurat_clusters")
ggsave("outputs/pca_plot.png", pca_plot, width = 8, height = 6)

# UMAP plot
umap_plot <- DimPlot(seurat_obj, reduction = "umap", group.by = "seurat_clusters",
label = TRUE, pt.size = 0.5) +
ggtitle("UMAP Clustering")
ggsave("outputs/umap_plot.png", umap_plot, width = 8, height = 6)

# Variable features plot
var_features_plot <- VariableFeaturePlot(seurat_obj)
top10_features <- head(VariableFeatures(seurat_obj), 10)
var_features_plot <- LabelPoints(plot = var_features_plot, points = top10_features, repel = TRUE)
ggsave("outputs/variable_features_plot.png", var_features_plot, width = 10, height = 6)

# 5. Find marker genes for clusters
message("Finding marker genes...")
all_markers <- FindAllMarkers(seurat_obj, only.pos = TRUE, min.pct = 0.25,
logfc.threshold = 0.25)

# Save marker genes
write.csv(all_markers, "outputs/cluster_markers.csv", row.names = FALSE)

# Plot top markers
top_markers <- all_markers %>%
group_by(cluster) %>%
slice_max(n = 2, order_by = avg_log2FC)

if(nrow(top_markers) > 0) {
marker_plot <- FeaturePlot(seurat_obj, features = head(top_markers$gene, 6),
ncol = 3)
ggsave("outputs/top_markers_plot.png", marker_plot, width = 15, height = 10)
}

# 6. Create summary statistics
message("Creating summary statistics...")
summary_stats <- data.frame(
n_cells = ncol(seurat_obj),
n_genes = nrow(seurat_obj),
n_clusters = length(unique(Idents(seurat_obj))),
median_genes_per_cell = median(seurat_obj$nFeature_RNA),
median_UMI_per_cell = median(seurat_obj$nCount_RNA),
median_mito_percent = median(seurat_obj$percent.mt)
)

write.csv(summary_stats, "outputs/analysis_summary.csv", row.names = FALSE)

# Save the final Seurat object
saveRDS(seurat_obj, "outputs/processed_seurat_object.rds")

message("Analysis complete! Check the outputs/ directory for results.")
print(summary_stats)
Loading