✅ Step-by-step: Load & Normalize Count Data in R
1. Start R or RStudio
In terminal:

In [None]:
Some R tips/commands:
# Check current working directory
getwd()

# Change directory 
# Use forward slashes (/) even on Windows
setwd("/path/to/your/folder")

# For example:
setwd("/home/hingelman/BifrostOmics")

# List files in current directory
list.files()

# Go up one directory level
setwd("..")

# Exit R
quit()  # or just q()

# When exiting, R will ask if you want to save workspace
# Usually type 'n' to not save unless you specifically need to save

In [None]:
R

if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("DESeq2")
install.packages("tidyverse")

library(DESeq2)
library(tidyverse)

From previous work:
BifrostOmics\statisticalandfunctionalanalysis\fromweek1
following files:
gene_counts.txt
gene_counts.txt.summary
runinfo.csv

In [None]:
# Load full table
counts_full <- read.delim("statisticalandfunctionalanalysis/fromweek1/gene_counts.txt")

# Keep Geneid and the last columns (count data)
counts <- counts_full[, c(1, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)]

# Set Geneid as rownames and remove the Geneid column
rownames(counts) <- counts$Geneid
counts$Geneid <- NULL

# Load the runinfo.csv file
coldata <- read.csv("statisticalandfunctionalanalysis/fromweek1/runinfo.csv", row.names = 1)

# to check if the sample names match
colnames(counts)
rownames(coldata)

# Clean up the column names
colnames(counts) <- gsub(".sorted.bam", "", colnames(counts))
colnames(counts) <- gsub("alignments.", "", colnames(counts))

# Check if names match now
all(colnames(counts) %in% rownames(coldata))

#If this returns TRUE, you're ready to create a DESeq2 object.



In [None]:
#4. Make DESeqDataSet & set reference
#Adjust "physiological_state" to your actual column name (e.g., "conditione" or "treatment") - aka your metadata column has a different name, update it accordingly...

dds <- DESeqDataSetFromMatrix(countData = counts,
                              colData = coldata,
                              design = ~ physiological_state)

# Set reference level BEFORE calling DESeq
dds$physiological_state <- relevel(dds$physiological_state, ref = "UT2_D0.5")


#5. Filter out low-count genes
dds <- dds[rowSums(counts(dds)) > 10, ]



#6. Normalize the data
dds <- DESeq(dds)

# Optional: Save normalized counts
norm_counts <- counts(dds, normalized=TRUE)
write.csv(norm_counts, "statisticalandfunctionalanalysis/normalized_counts.csv")
