## Differential Expression Testing

**Pinned Environment:** [`envs/R-DE.yaml`](../../envs/R-DE.yaml)

In [None]:
suppressPackageStartupMessages({
  library(DESeq2)
  library(EnhancedVolcano)
})

In [None]:
repo_root <- normalizePath(file.path(getwd(), ".."))

cmd <- paste0(
  "PYTHONPATH=", repo_root,
  " python3 -c 'import sys; from pathlib import Path; sys.path.append(str(Path.cwd().resolve().parents[1])); ",
  "from config.paths import BASE_DIR; print(BASE_DIR)'"
)

base_dir <- system(cmd, intern = TRUE)

input_dir <- file.path(base_dir, "deseq2", "epithelial", "prepped-data")
output_dir <- file.path(base_dir, "deseq2", "epithelial", "results")

cat("Input directory:", input_dir, "\nOutput directory:", output_dir, "\n")

## Run deseq2

In [None]:
zones <- c("Stem_Progenitor", "Early", "Late")

# Loop through each zone and run DESeq2
for (zone in zones) {
  message("Running DESeq2 for zone: ", zone)

  # File paths
  counts_path <- file.path(input_dir, paste0(zone, "_counts.csv"))
  meta_path <- file.path(input_dir, paste0(zone, "_metadata.csv"))
  output_path <- file.path(output_dir, paste0(zone, "_deseq2_results.csv"))

  # Read data
  count_data <- read.csv(counts_path, row.names = 1, check.names = FALSE)
  col_data <- read.csv(meta_path, row.names = 1)

  # Ensure metadata is a proper dataframe
  col_data <- as.data.frame(col_data)

  # Align samples
  sample_names <- intersect(colnames(count_data), rownames(col_data))
  count_data <- count_data[, sample_names]
  col_data <- col_data[sample_names, , drop = FALSE]

  stopifnot(all(colnames(count_data) == rownames(col_data)))

  # Construct DESeq2 object
  dds <- DESeqDataSetFromMatrix(
    countData = count_data,
    colData = col_data,
    design = ~ group
  )

  # Run DESeq2
  dds <- DESeq(dds)

  # Extract results
  res <- results(dds, contrast = c("group", "Trpv1-cre", "Control"))

  # Order by adjusted p-value and convert to a data frame
  res_ordered <- as.data.frame(res[order(res$padj), ])

  # Add a new column indicating the zone
  res_ordered$zone <- zone

  # Save to CSV
  write.csv(res_ordered, file = output_path)

  # Save to separate variable in memory
  varname <- paste0("res_", gsub("-", "_", gsub("/", "_", zone)))
  assign(varname, res_ordered, envir = .GlobalEnv)
}

Save concatenated results:

In [None]:
# Add gene column BEFORE binding rows
res_Stem_Progenitor$gene <- rownames(res_Stem_Progenitor)
res_Early$gene <- rownames(res_Early)
res_Late$gene <- rownames(res_Late)

# Combine
combined_results <- rbind(res_Stem_Progenitor, res_Early, res_Late)

rownames(combined_results) <- NULL

In [None]:
combined_results <- combined_results[, c("gene", setdiff(names(combined_results), "gene"))]

In [None]:
head(combined_results)

## Export

In [None]:
output_file <- file.path(output_dir, "combined_deseq2_results.csv")
write.csv(combined_results, file = output_file, row.names = FALSE)