In [None]:
# RNA-seq Differential Expression Analysis

## 1. Load Libraries
R
library(DESeq2)
library(tidyverse)
library(EnhancedVolcano)
library(ggplot2)

## 2. Load Input Files
## gene_counts.txt
## runinfo.csv
# delete first row if the .txt file if you get an error
counts <- read_tsv("gene_counts.txt", comment = "#") %>%
  select(-Chr, -Start, -End, -Strand, -Length) %>%
  column_to_rownames("Geneid")

colnames(counts) <- gsub(".sorted.bam", "", colnames(counts))
colnames(counts) <- gsub("alignments.", "", colnames(counts))

# Load the runinfo.csv file
coldata <- read.csv("runinfo.csv", row.names = 1)

# to check if the sample names match
colnames(counts)
rownames(coldata)
all(colnames(counts) %in% rownames(coldata))

#If this returns TRUE, you're ready to create a DESeq2 object.


##3. Make DESeqDataSet & set reference
#Adjust "physiological_state" to your actual column name (e.g., "conditione" or "treatment") - aka your metadata column has a different name, update it accordingly...

dds <- DESeqDataSetFromMatrix(countData = counts,
                              colData = coldata,
                              design = ~ physiological_state)

# Set reference level BEFORE calling DESeq
dds$physiological_state <- relevel(dds$physiological_state, ref = "UT2_D0.5")

# Filter out low-count genes and normalize the data
dds <- dds[rowSums(counts(dds)) > 10, ]
dds <- DESeq(dds)

#Check conditions:
levels(dds$physiological_state)

In [None]:
#4. Run DESeq and  extract results for a specific comparison:
# Example: compare UT2_D1nonSS vs UT2_D0.5
res_UT2_D1nonSS <- results(dds, contrast = c("physiological_state", "UT2_D1nonSS", "UT2_D0.5"))
summary(res_UT2_D1nonSS)


#You can repeat this for other conditions by changing "UT2_D1nonSS" to the condition of interest.