# Installing and setting up libraries and environment

In [None]:
# install.packages("readr")
# install.packages("data.table")

In [None]:
# library(readr)
# library(data.table)

# Setup directories

In [44]:
input_dir <- "SimLRs/TRIO/familias-raw-LR"
output_dir <- "SimLRs/TRIO/familias-cleaned-LR"

# Get all *_SimLR.txt files

In [45]:
sim_files <- list.files(
    path = input_dir,
    pattern = "_SimLRs\\.txt$",
    full.names = TRUE
)

if (length(sim_files) == 0) {
    stop("No *_SimLRs.txt files found in input_dir.")
}

cat ("Found", length(sim_files), "files:\n")
print (sim_files)

Found 7 files:
[1] "SimLRs/TRIO/familias-raw-LR/Trio_23aSTR_SimLRs.txt"        
[2] "SimLRs/TRIO/familias-raw-LR/Trio_23astr-90iisnp_SimLRs.txt"
[3] "SimLRs/TRIO/familias-raw-LR/Trio_23astr-94iisnp_SimLRs.txt"
[4] "SimLRs/TRIO/familias-raw-LR/Trio_27aSTR_SimLRs.txt"        
[5] "SimLRs/TRIO/familias-raw-LR/Trio_27astr-94iisnp_SimLRs.txt"
[6] "SimLRs/TRIO/familias-raw-LR/Trio_90iisnp_SimLRs.txt"       
[7] "SimLRs/TRIO/familias-raw-LR/Trio_94iisnp_SimLRs.txt"       


# Create function to clean one life

In [46]:
clean_simLR_file <- function(input_file, output_dir) {
    #Read the file starting at row 6
    lr_table <- read.table(
    #    "../data/FS_94iisnp_SimLRs.txt", 
        input_file, 
        header = FALSE, 
        skip = 5
        )
    #Extract only the columns for 
    Raw_LR <- lr_table[,1:2]

    # Build new filenames
    # input_file <- "../data/FS_94iisnp_SimLRs.txt"
    new_name <- sub("SimLRs", "cleanLR", basename(input_file))

    #Label columns
    colnames(Raw_LR) <- c("True_FS", "True_Unrel")

    #Save file
    #output_file <- file.path("../data", new_name)
    output_file <- file.path(output_dir, new_name)
    write.table(
        Raw_LR,
        file = output_file,
        sep = "\t",
        quote = FALSE,
        row.names = FALSE
    )
    cat("Cleaned LR file saved successfully.\n")
    cat("Filename:", new_name,"\n")
    print(head(Raw_LR,3))
    cat("\n","\n")
}

# Loop to clean all files

In [47]:
for (f in sim_files) {
    clean_simLR_file(f, output_dir)
}

cat("Done. Clean files saved to:", normalizePath(output_dir), "\n")

Cleaned LR file saved successfully.
Filename: Trio_23aSTR_cleanLR.txt 
      True_FS  True_Unrel
1 4.15132e+11 1.02927e-39
2 3.73301e+09 6.76348e-40
3 2.83703e+09 5.35803e-33

 
Cleaned LR file saved successfully.
Filename: Trio_23astr-90iisnp_cleanLR.txt 
      True_FS   True_Unrel
1 4.25022e+20 4.60869e-126
2 2.15481e+18 1.86885e-114
3 2.58358e+17 3.14394e-135

 
Cleaned LR file saved successfully.
Filename: Trio_23astr-94iisnp_cleanLR.txt 
      True_FS   True_Unrel
1 5.11696e+21 4.27045e-166
2 1.00687e+26 2.15142e-156
3 2.95272e+22 1.36766e-116

 
Cleaned LR file saved successfully.
Filename: Trio_27aSTR_cleanLR.txt 
      True_FS  True_Unrel
1 4.23261e+13 1.82599e-52
2 1.42979e+12 1.69563e-51
3 6.33532e+11 3.45232e-54

 
Cleaned LR file saved successfully.
Filename: Trio_27astr-94iisnp_cleanLR.txt 
      True_FS   True_Unrel
1 1.92967e+23 7.31262e-205
2 2.36302e+23 1.87028e-189
3 1.20916e+23 1.15641e-117

 
Cleaned LR file saved successfully.
Filename: Trio_90iisnp_cleanLR.txt 
  