# Batch Process CSV Files for sRg Calculation

This notebook processes multiple CSV files in a specified directory, calculating sRg and classification values for trajectory data.

In [16]:
# Load required libraries
library(tools)

# Source the file containing trajectory feature calculation functions
source('featuresCalcsNew.R')

In [18]:
# Set parameters
cutoffLen <- 3  # minimum trajectory length
cutoff <- 2.22236433588659  # immobile/mobile threshold

In [20]:
# the csv file is assumed to have the following columns:
#,track_number,frame,x,y,intensity,id,x [nm],y [nm]
# the header is assumed to be present
# GD - the numbering system has to match R (start from 1) for both track_number and frame and have sequential tracks (no missing) otherwise things go wrong

In [22]:
# Function to process a single CSV file
process_csv_file <- function(file_path) {
  # Read the CSV file
  df <- read.csv(file_path, header = TRUE)
    
  # Split data by track_number
  things <- split(df[, c('frame', 'x', 'y')], df$track_number)
  
  # Select tracks above the length cutoff
  lens <- sapply(things, nrow)
  sel <- which(lens >= cutoffLen)
  lsel <- length(sel)
  cat(paste("Processing", file_path, "-", lsel, "tracks extracted of length >=", cutoffLen, "\n"))
  
  # Compute sRg for selected tracks
  sRgL <- vector(mode = "list", length = lsel)
  for (i in seq_along(sel)) {
    mymat <- things[[sel[i]]]
    tmat <- matrix(nrow = mymat[nrow(mymat), 'frame'] - mymat[1, 'frame'] + 1, ncol = 2)
    tmat[mymat[, 1] - mymat[1, 1] + 1, 1] <- mymat[, 2]
    tmat[mymat[, 1] - mymat[1, 1] + 1, 2] <- mymat[, 3]
    sRgL[[i]] <- getsRg(tmat)
  }
  sRg <- unlist(sRgL)
  
  # Threshold classification
  myclass <- ifelse(sRg < cutoff, 'immobile', 'mobile')
  
  # Create a data frame with track numbers, sRg values, and classifications
  sRg_data <- data.frame(track_number = sel, sRg = sRg, classification = myclass)
  
  # Merge the original data with the new sRg and classification data
  merged_df <- merge(df, sRg_data, by.x = "track_number", by.y = "track_number", all.x = TRUE)
  
  # Sort the dataframe to ensure it's in the original order
  merged_df <- merged_df[order(merged_df$track_number, merged_df$frame), ]
  
  # Create the new filename with '_sRg' tag
  new_filename <- file_path_sans_ext(file_path)
  new_filename <- paste0(new_filename, "_sRg.csv")
  
  # Save the new CSV file
  write.csv(merged_df, file = new_filename, row.names = FALSE)
  
  cat(paste("New file saved as:", new_filename, "\n"))
}

In [24]:
# Function to process all CSV files in a directory
process_directory <- function(directory_path) {
  # List all CSV files in the directory
  csv_files <- list.files(directory_path, pattern = "\\.csv$", full.names = TRUE)
  
  # Process each CSV file
  for (file in csv_files) {
    tryCatch({
      process_csv_file(file)
    }, error = function(e) {
      cat(paste("Error processing file:", file, "\n"))
      cat(paste("Error message:", e$message, "\n"))
    })
  }
}

In [32]:
# Main execution
# Prompt user for directory path
directory_path <-"/Users/george/Desktop/tdt_analysis/for_Alan/differntRecordingLengths/2s/for_sRg"

# Check if the directory exists
if (!dir.exists(directory_path)) {
  stop("The specified directory does not exist.")
}

# Process all CSV files in the directory
process_directory(directory_path)

cat("Batch processing complete.\n")

Processing /Users/george/Desktop/tdt_analysis/for_Alan/differntRecordingLengths/2s/for_sRg/AL_55_2020-06-29-TIRFM_Diff_tdt-MEFs_C_2_MMStack_Pos0_crop20_locsID_tracksRG_SVMPredicted_NN_forR.csv - 5167 tracks extracted of length >= 3 
New file saved as: /Users/george/Desktop/tdt_analysis/for_Alan/differntRecordingLengths/2s/for_sRg/AL_55_2020-06-29-TIRFM_Diff_tdt-MEFs_C_2_MMStack_Pos0_crop20_locsID_tracksRG_SVMPredicted_NN_forR_sRg.csv 
Processing /Users/george/Desktop/tdt_analysis/for_Alan/differntRecordingLengths/2s/for_sRg/AL_56_2020-07-01-TIRFM_Diff_tdt-MEFs_A_4_MMStack_Pos0_crop20_locsID_tracksRG_SVMPredicted_NN_forR.csv - 4577 tracks extracted of length >= 3 
New file saved as: /Users/george/Desktop/tdt_analysis/for_Alan/differntRecordingLengths/2s/for_sRg/AL_56_2020-07-01-TIRFM_Diff_tdt-MEFs_A_4_MMStack_Pos0_crop20_locsID_tracksRG_SVMPredicted_NN_forR_sRg.csv 
Processing /Users/george/Desktop/tdt_analysis/for_Alan/differntRecordingLengths/2s/for_sRg/AL_66_2020-07-28-TIRFM_Diff_tdt