# Making Counts Matrix For Genes In ZFR Samples

This is the `.ipynb` used to make a counts matrix from the counts files generated by HTseq.

In [None]:
# Load DESeq2 library
library(DESeq2)

# Define the folder path containing counts files with GENE counts:
folder_path <- "/path/to/folder/containing/gene/counts"

In [None]:
# List all counts files in the folder
count_files <- list.files(path = folder_path, pattern = "*.txt", full.names = TRUE)

basename(count_files)

In [None]:
# Initialize an empty list to store the count matrices
count_matrices <- list()

# Loop through count files, read each file, and store in the list
for (count_file in count_files) {
  # Print the name of the count file being read
  cat("Reading count file:", count_file, "\n")

  # Read the count file (assuming tab-separated format)
  count_matrix <- read.table(count_file, header = TRUE, row.names = 1, sep = "\t")
  
  # Store the count matrix in the list
  count_matrices[[count_file]] <- count_matrix
}

# Combine count matrices into one count matrix using cbind
combined_count_matrix <- do.call(cbind, count_matrices)

head(combined_count_matrix)

In [None]:
# Rename the columns if needed (e.g., using sample names)
colnames(combined_count_matrix) <- gsub(".txt", "", tools::file_path_sans_ext(basename(count_files)))

In [None]:
head(combined_count_matrix)


To make the counts matrix we need to specify the output file path:

In [None]:
# Define the path for the output TSV file
output_file <- "/path/to/my/output/tsv/my_tsv.tsv"

In [None]:
# Write the combined count matrix to the TSV file with row names
write.table(combined_count_matrix, file = output_file, sep = "\t", quote = FALSE, row.names = TRUE)

## Rename The Column Names

In [None]:
# Extract the existing column names
existing_colnames <- colnames(combined_count_matrix)
print(existing_colnames)

In [None]:
# Define new column names (replace these with your desired names)
new_colnames <- c("Control-01__Control",
                  "Control-02__Control",
                  "Control-03__Control",
                  "Control-04__Control",
                  "Control-05__Control",
                  "Control-06__Control",
                  "Experimental-01__Experimental",
                  "Experimental-02__Experimental",
                  "Experimental-03__Experimental",
                  "Experimental-04__Experimental",
                  "Experimental-05__Experimental",
                  "Experimental-06__Experimental"
                  )

In [None]:
# Rename the columns by assigning the new names
colnames(combined_count_matrix) <- new_colnames

In [None]:
head(combined_count_matrix)

In [None]:
# Define the path for the output TSV file
output_file <- "/my/path/to/my/tsv/file.tsv"

In [None]:
# Write the combined count matrix to the TSV file with row names
write.table(combined_count_matrix, file = output_file, sep = "\t", quote = FALSE, row.names = TRUE)

## Session Information

In [None]:
sessionInfo()

In [None]:
print("Done")