# Pivoting Tables Per Sample TSV

This is a required step to flip the table so that it is in a format REDITs will accept. An example of the format we hope to achieve is placed below:

```
  Region_Position___Count_Type SampleName-#Edited_And_NonEdited__Counts_Counts
  <chr>                                                            <int>
1 1_2468___Edited                                                      5
2 1_2468___Non_Edited                                                 50
```

It needs to be in the format above where `SampleName` (is the name of the sample) can vary and the `#` is a number or numbers that can vary depending on the id of the sample.

## Input Directory Path (Manual Input):

In directory path please place the full path to the `Part_3___Per_Sample_TSV`.

In [None]:

# Specify the path to the `Per_Sample_TSV` directory 
directory_path <- "/path/to/Step_04___FilterationViaProportion___BasicPreFiltrationDirectory/Part_3___Per_Sample_TSV"


## Required Libraries

In [None]:
library(tidyr)
library(dplyr)
library(readr)
library(stringr)

## Output Directory Path

In [None]:
# Get the parent directory of the TSV file
parent_directory_tsv <- dirname(directory_path)

# Create the target folder path for Step_3___Per_Sample_TSV
output_directory <- file.path(parent_directory_tsv, 'Part_4___PivotedTablesPerSampleTSV')

# Print the parent directory of the TSV file
cat("\n*Parent Directory:*", parent_directory_tsv, "\n\n")

# Print the output directory path for Step_3___Per_Sample_TSV
cat("*Output Directory:*", output_directory, "\n\n")

# Check if the folder already exists, if not, create it
if (!dir.exists(output_directory)) {
  dir.create(output_directory)
  cat("*Folder:* '", output_directory, "' has been created.\n")
}


## Main:

In [None]:
process_file <- function(file_path, output_directory) {
  # Extract prefix from the file name
  prefix <- sub("^(.*?)_", "\\1", tools::file_path_sans_ext(basename(file_path)))
  
  # Read the TSV file
  df <- read.table(file_path, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
  df <- df %>%
    select(1, 3, 2, everything())

  cat("Head of the input file", prefix, ":\n")
  print(head(df))

  # Pivot and manipulate the data
  stacked_df <- df %>%
    pivot_longer(
        cols = -Region_Position,  # Specify the columns to pivot
        names_to = "Count_Type",
        values_to = "Counts"
    ) %>%
    mutate(
        Count_Type = rep(c("Edited", "Non_Edited"), length.out = n()),
        Region_Position = factor(Region_Position, levels = unique(Region_Position))
    ) %>%
    arrange(Region_Position, Count_Type)

  ##cat("Head of the stacked data frame for prefix", prefix, ":\n")
  ##print(head(stacked_df))
  
  # Rename the "Counts" column to "Prefix_Counts"
  colnames(stacked_df)[colnames(stacked_df) == "Counts"] <- paste(prefix, "Counts", sep = "_")
  
  # Create the final stacked data frame
  stacked_df <- stacked_df %>%
    mutate(Region_Position___Count_Type = paste(Region_Position, Count_Type, sep = "___")) %>%
    select(Region_Position___Count_Type, everything()) %>%
    select(-c(Region_Position, Count_Type))

  cat("Head of the stacked data frame final", prefix, ":\n")
  print(head(stacked_df))
  
  # Write the result to a TSV file in the output directory
  output_file_path <- file.path(output_directory, paste(prefix, "stacked.tsv", sep = "_"))
  write.table(stacked_df, file = output_file_path, sep = "\t", quote = FALSE, row.names = FALSE)
  # Print a success message
  cat("*Table written successfully to:*", output_file_path, "\n")
  
}

In [None]:
# List all TSV files in the directory
file_list <- list.files(directory_path, pattern = "\\.tsv$", full.names = TRUE)

print(file_list)

In [None]:
# Process each file and store the results in the output directory
lapply(file_list, function(file_path) {
  process_file(file_path, output_directory)
})


## Session Information:

In [None]:
# Print Session Information
cat("\n\nSession Information:\n\n")
print(sessionInfo())