In [None]:
# RNA-seq Differential Expression Analysis

In [None]:
## 1. Load Libraries
R
library(DESeq2)
library(tidyverse)
library(EnhancedVolcano)
library(ggplot2)
library(dplyr)

In [None]:
## 2. Load Input Files
## gene_counts.txt
## runinfo.csv
# delete first row if the .txt file if you get an error
counts <- read_tsv("gene_counts.txt", comment = "#") %>%
  select(-Chr, -Start, -End, -Strand, -Length) %>%
  column_to_rownames("Geneid")

colnames(counts) <- gsub(".sorted.bam", "", colnames(counts))
colnames(counts) <- gsub("alignments.", "", colnames(counts))

# Load the runinfo.csv file
coldata <- read.csv("runinfo.csv", row.names = 1)

# to check if the sample names match
colnames(counts)
rownames(coldata)
all(colnames(counts) %in% rownames(coldata))

#If this returns TRUE, you're ready to create a DESeq2 object.

In [None]:
## 3. Make DESeqDataSet & set reference
#Adjust "physiological_state" to your actual column name (e.g., "conditione" or "treatment") - aka your metadata column has a different name, update it accordingly...

dds <- DESeqDataSetFromMatrix(countData = counts,
                              colData = coldata,
                              design = ~ physiological_state)

# Set reference level BEFORE calling DESeq
dds$physiological_state <- relevel(dds$physiological_state, ref = "UT2_D0.5")

# Filter out low-count genes and normalize the data
dds <- dds[rowSums(counts(dds)) > 10, ]
dds <- DESeq(dds)

#Check conditions:
levels(dds$physiological_state)

In [None]:
## 4. Run DESeq and  generate volcano plots + csv files of all the comparisons - everything against reference and all the other combinations aswell.
source("generate_volcanos.R")

In [None]:
## 5. Generate PCA plot
vsd <- varianceStabilizingTransformation(dds, blind = TRUE)
pcaData <- plotPCA(vsd, intgroup = "physiological_state", returnData = TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))

png("PCA_plot.png", width=1200, height=900)
print(
ggplot(pcaData, aes(PC1, PC2, color = physiological_state)) +
  geom_point(size = 4) +
  xlab(paste0("PC1: ", percentVar[1], "% variance")) +
  ylab(paste0("PC2: ", percentVar[2], "% variance")) +
  coord_fixed() +
  theme_minimal()
    )
dev.off()

In [None]:
# Optional: Save normalized counts
norm_counts <- counts(dds, normalized=TRUE)
write.csv(norm_counts, "normalized_counts.csv")

In [None]:
## 6. significant_DE_genes 
source("extract_sig_genes.R")