cc1001-ctrl · Copilot · Sep 13, 2025 · Sep 13, 2025
diff --git a/.Rprofile b/.Rprofile
@@ -0,0 +1 @@
+source("renv/activate.R")
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,57 @@
+# renv files
+renv/
+!renv/settings.dcf
+!renv/activate.R
+
+# R files
+.Rproj.user/
+.Rhistory
+.RData
+.Ruserdata
+*.Rproj
+
+# Output files
+outputs/*.png
+outputs/*.pdf
+outputs/*.csv
+outputs/*.rds
+outputs/*.html
+
+# Data files (uncomment as needed)
+# data/*.csv
+# data/*.tsv
+# data/*.h5
+# data/*.gz
+# data/*.rds
+
+# Temporary files
+*.tmp
+*.temp
+*~
+
+# System files
+.DS_Store
+Thumbs.db
+
+# IDE files
+.vscode/
+.idea/
+
+# Log files
+*.log
+
+# Compiled R files
+*.so
+*.dll
+*.dylib
+
+# Package build files
+*.tar.gz
+*.zip
+
+# HTML files (unless specifically needed)
+*.html
+
+# Cache directories
+cache/
+.cache/
diff --git a/R/analysis_functions.R b/R/analysis_functions.R
@@ -0,0 +1,109 @@
+# Basic single-cell RNA-seq analysis functions
+# Author: CC1001-CTRL
+# Date: 2024
+
+#' Load and preprocess single-cell data
+#'
+#' @param data_path Path to the data file
+#' @param project_name Name for the Seurat object
+#' @return A preprocessed Seurat object
+#' @export
+load_and_preprocess <- function(data_path, project_name = "scRNA_analysis") {
+  library(Seurat)
+  library(dplyr)
+
+  # Load data (adjust based on your data format)
+  # This is a template - modify for your specific data format
+  if (file.exists(data_path)) {
+    data <- Read10X(data_path)
+    seurat_obj <- CreateSeuratObject(counts = data, project = project_name)
+  } else {
+    # Use example data if file doesn't exist
+    data("pbmc_small")
+    seurat_obj <- pbmc_small
+    message("Using example pbmc_small dataset")
+  }
+
+  # Basic preprocessing
+  seurat_obj[["percent.mt"]] <- PercentageFeatureSet(seurat_obj, pattern = "^MT-")
+
+  # Filter cells and features
+  seurat_obj <- subset(seurat_obj, 
+                      subset = nFeature_RNA > 200 & 
+                               nFeature_RNA < 5000 & 
+                               percent.mt < 20)
+
+  return(seurat_obj)
+}
+
+#' Perform standard normalization and scaling
+#'
+#' @param seurat_obj A Seurat object
+#' @param nfeatures Number of variable features to find
+#' @return A normalized and scaled Seurat object
+#' @export
+normalize_and_scale <- function(seurat_obj, nfeatures = 2000) {
+  library(Seurat)
+
+  # Normalize data
+  seurat_obj <- NormalizeData(seurat_obj, normalization.method = "LogNormalize", 
+                             scale.factor = 10000)
+
+  # Find variable features
+  seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = "vst", 
+                                    nfeatures = nfeatures)
+
+  # Scale data
+  all.genes <- rownames(seurat_obj)
+  seurat_obj <- ScaleData(seurat_obj, features = all.genes)
+
+  return(seurat_obj)
+}
+
+#' Perform dimensional reduction
+#'
+#' @param seurat_obj A Seurat object
+#' @param npcs Number of principal components to compute
+#' @return A Seurat object with PCA and UMAP
+#' @export
+run_dimensional_reduction <- function(seurat_obj, npcs = 50) {
+  library(Seurat)
+
+  # Run PCA
+  seurat_obj <- RunPCA(seurat_obj, features = VariableFeatures(object = seurat_obj),
+                      npcs = npcs)
+
+  # Find neighbors and clusters
+  seurat_obj <- FindNeighbors(seurat_obj, dims = 1:20)
+  seurat_obj <- FindClusters(seurat_obj, resolution = 0.5)
+
+  # Run UMAP
+  seurat_obj <- RunUMAP(seurat_obj, dims = 1:20)
+
+  return(seurat_obj)
+}
+
+#' Create quality control plots
+#'
+#' @param seurat_obj A Seurat object
+#' @return A list of ggplot objects
+#' @export
+create_qc_plots <- function(seurat_obj) {
+  library(ggplot2)
+  library(Seurat)
+
+  plots <- list()
+
+  # Violin plot for QC metrics
+  plots$violin <- VlnPlot(seurat_obj, 
+                         features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), 
+                         ncol = 3)
+
+  # Feature scatter plots
+  plots$feature_scatter1 <- FeatureScatter(seurat_obj, feature1 = "nCount_RNA", 
+                                          feature2 = "percent.mt")
+  plots$feature_scatter2 <- FeatureScatter(seurat_obj, feature1 = "nCount_RNA", 
+                                          feature2 = "nFeature_RNA")
+
+  return(plots)
+}
diff --git a/R/example_analysis.R b/R/example_analysis.R
@@ -0,0 +1,91 @@
+# Example single-cell RNA-seq analysis workflow
+# This script demonstrates a complete analysis pipeline
+
+# Load required libraries
+library(Seurat)
+library(ggplot2)
+library(dplyr)
+library(patchwork)
+
+# Source custom functions
+source("R/analysis_functions.R")
+
+# Set seed for reproducibility
+set.seed(42)
+
+# 1. Load and preprocess data
+message("Loading and preprocessing data...")
+seurat_obj <- load_and_preprocess("data/")  # Will use example data if path doesn't exist
+
+# 2. Normalize and scale data
+message("Normalizing and scaling data...")
+seurat_obj <- normalize_and_scale(seurat_obj, nfeatures = 2000)
+
+# 3. Run dimensional reduction
+message("Running dimensional reduction...")
+seurat_obj <- run_dimensional_reduction(seurat_obj, npcs = 50)
+
+# 4. Create visualizations
+message("Creating visualizations...")
+
+# Quality control plots
+qc_plots <- create_qc_plots(seurat_obj)
+
+# Save QC plots
+ggsave("outputs/qc_violin_plot.png", qc_plots$violin, width = 12, height = 6)
+ggsave("outputs/qc_scatter_plot1.png", qc_plots$feature_scatter1, width = 8, height = 6)
+ggsave("outputs/qc_scatter_plot2.png", qc_plots$feature_scatter2, width = 8, height = 6)
+
+# PCA plot
+pca_plot <- DimPlot(seurat_obj, reduction = "pca", group.by = "seurat_clusters")
+ggsave("outputs/pca_plot.png", pca_plot, width = 8, height = 6)
+
+# UMAP plot
+umap_plot <- DimPlot(seurat_obj, reduction = "umap", group.by = "seurat_clusters", 
+                     label = TRUE, pt.size = 0.5) + 
+             ggtitle("UMAP Clustering")
+ggsave("outputs/umap_plot.png", umap_plot, width = 8, height = 6)
+
+# Variable features plot
+var_features_plot <- VariableFeaturePlot(seurat_obj)
+top10_features <- head(VariableFeatures(seurat_obj), 10)
+var_features_plot <- LabelPoints(plot = var_features_plot, points = top10_features, repel = TRUE)
+ggsave("outputs/variable_features_plot.png", var_features_plot, width = 10, height = 6)
+
+# 5. Find marker genes for clusters
+message("Finding marker genes...")
+all_markers <- FindAllMarkers(seurat_obj, only.pos = TRUE, min.pct = 0.25, 
+                             logfc.threshold = 0.25)
+
+# Save marker genes
+write.csv(all_markers, "outputs/cluster_markers.csv", row.names = FALSE)
+
+# Plot top markers
+top_markers <- all_markers %>% 
+  group_by(cluster) %>% 
+  slice_max(n = 2, order_by = avg_log2FC)
+
+if(nrow(top_markers) > 0) {
+  marker_plot <- FeaturePlot(seurat_obj, features = head(top_markers$gene, 6), 
+                            ncol = 3)
+  ggsave("outputs/top_markers_plot.png", marker_plot, width = 15, height = 10)
+}
+
+# 6. Create summary statistics
+message("Creating summary statistics...")
+summary_stats <- data.frame(
+  n_cells = ncol(seurat_obj),
+  n_genes = nrow(seurat_obj),
+  n_clusters = length(unique(Idents(seurat_obj))),
+  median_genes_per_cell = median(seurat_obj$nFeature_RNA),
+  median_UMI_per_cell = median(seurat_obj$nCount_RNA),
+  median_mito_percent = median(seurat_obj$percent.mt)
+)
+
+write.csv(summary_stats, "outputs/analysis_summary.csv", row.names = FALSE)
+
+# Save the final Seurat object
+saveRDS(seurat_obj, "outputs/processed_seurat_object.rds")
+
+message("Analysis complete! Check the outputs/ directory for results.")
+print(summary_stats)