# Data wrangling to extract LRs for analysis

# Installing and setting up libraries and environment

In [7]:
# install.packages("readr")
# install.packages("data.table")

In [8]:
library(readr)
library(data.table)

# Setup directories

In [9]:
input_dir <- "../7_FC"                                          # CHANGE THIS
output_dir <- "../8_ROC/7-FC/FC_familias-cleaned-LR"            # CHANGE THIS

# Get all *_SimLR.txt files

In [None]:
sim_files <- list.files(
    path = input_dir,
    pattern = "_SimLRs\\.txt$",              # Scan diretory to look for files ending in 'SimLRs.txt'
    full.names = TRUE,
    recursive = TRUE
)

if (length(sim_files) == 0) {
    stop("No *_SimLRs.txt files found in input_dir.")
}

cat ("Found", length(sim_files), "files:\n")
print (sim_files)

Found 7 files:
[1] "../7_FC/0_23aSTR/FC_23aSTR_SimLRs.txt"                
[2] "../7_FC/1_94iiSNP/FC_94iisnp_SimLRs.txt"              
[3] "../7_FC/2_90iiSNP/FC_90iisnp_SimLRs.txt"              
[4] "../7_FC/3_23aSTR-94iiSNP/FC_23astr-94iisnp_SimLRs.txt"
[5] "../7_FC/4_23aSTR-90iiSNP/FC_23astr-90iisnp_SimLRs.txt"
[6] "../7_FC/5_27aSTR-94iiSNP/FC_27astr-94iisnp_SimLRs.txt"
[7] "../7_FC/6_27aSTR/FC_27astr_SimLRs.txt"                


# Create function to clean one life

In [None]:
clean_simLR_file <- function(input_file, output_dir) {
    
    lr_table <- read.table( 
        input_file, 
        header = FALSE, 
        skip = 5
        )

    
    Raw_LR <- lr_table[,1:2]

    new_name <- sub("SimLRs", "cleanLR", basename(input_file))

    colnames(Raw_LR) <- c("True_FS", "True_Unrel")

    output_file <- file.path(output_dir, new_name)
    write.table(
        Raw_LR,
        file = output_file,
        sep = "\t",
        quote = FALSE,
        row.names = FALSE
    )
    cat("Cleaned LR file saved successfully.\n")
    cat("Filename:", new_name,"\n")
    print(head(Raw_LR,3))
    cat("\n","\n")
}

# Loop to clean all files

In [12]:
for (f in sim_files) {
    clean_simLR_file(f, output_dir)
}

cat("Done. Clean files saved to:", normalizePath(output_dir), "\n")

Cleaned LR file saved successfully.
Filename: FC_23aSTR_cleanLR.txt 
    True_FS True_Unrel
1 11.907100  0.3901080
2 74.150700  0.0886055
3  0.594697  0.3451670

 
Cleaned LR file saved successfully.
Filename: FC_94iisnp_cleanLR.txt 
    True_FS True_Unrel
1  0.588026  0.6786350
2  1.134610  0.4869070
3 10.752900  0.0784437

 
Cleaned LR file saved successfully.
Filename: FC_90iisnp_cleanLR.txt 
  True_FS True_Unrel
1 3.81880   0.156673
2 2.97301   0.417133
3 5.45770   0.167268

 
Cleaned LR file saved successfully.
Filename: FC_23astr-94iisnp_cleanLR.txt 
  True_FS True_Unrel
1 8.78568  0.3030440
2 0.42574  0.0216652
3 5.13955  0.1097330

 
Cleaned LR file saved successfully.
Filename: FC_23astr-90iisnp_cleanLR.txt 
   True_FS True_Unrel
1  60.4625  0.0691959
2 475.3650  0.2634500
3  17.1510  4.4419300

 
Cleaned LR file saved successfully.
Filename: FC_27astr-94iisnp_cleanLR.txt 
     True_FS True_Unrel
1   0.272185  0.1255430
2 100.471000  1.9761800
3   3.976740  0.0718924

 
Cleane