In [None]:
source("../../BrusselSprouts/scripts/functions.R")
scripts_dir="/home/jnrunge/data/trd/mapped_reads/scripts/"
initial_timedate=Sys.time()
library(tidytable)

In [None]:
options(repr.plot.width=10, repr.plot.height=5)

In [None]:
selectSimilarity=0.7

In [None]:
df_Strains=fread("../Shiny/data/Victor/operationalTable_Full2543Sace_Clades.csv")

In [None]:
crosses=readLines("~/data/trd/mapped_reads/TRD.vcf.gz.samples")
crosses=crosses[startsWith(crosses, "YJNRC") | startsWith(crosses, "Chris")]
crosses

In [None]:
file_a_exists_and_is_newer_than_file_b=function(a,b){
    if(file.exists(a)){
        if(file.mtime(a)>file.mtime(b)){
            return(TRUE)
        }
    }
    return(FALSE)
}

In [None]:
for(c in crosses){
    
    print(c)


    # Define file paths
    TRD_regions_file = paste0("/home/jnrunge/data/TRD/results/shiny/", c, "-TRD_regions.csv.gz")
    AS_file = paste0("/home/jnrunge/data/TRD/results/shiny/", c, "-AF.csv.gz.allelesharing.csv.gz")
    TRD_file = paste0("/home/jnrunge/data/TRD/results/shiny/", c, "-AF.csv.gz")

    # Check if files exist and are updated
    if (!file_a_exists_and_is_newer_than_file_b(TRD_file, "~/data/trd/mapped_reads/TRD.vcf.gz")) {
        print(paste0(c, " TRD file does not exist or is not newer than TRD.vcf.gz"))
        next
    }

    if (!file_a_exists_and_is_newer_than_file_b(TRD_regions_file, TRD_file)) {
        print(paste0(c, " has no TRD regions or no up-to-date ones"))
        next
    }

    if (!file_a_exists_and_is_newer_than_file_b(AS_file, TRD_file)) {
        print(paste0(c, " has no or no updated allele sharing file"))
        next
    }

    # Get all pixy files
    pixy_files_all = list.files(path = "~/data/trd/mapped_reads", pattern = paste0(c, "_.*pixy.*txt.gz$"), full.names = TRUE)

    if (length(pixy_files_all) == 0) {
        print(paste(c, " has no pixy files."))
        next
    }

    # Load files
    AS = fread(AS_file)
    TRD = fread(TRD_file)
    TRD_loci = fread(TRD_regions_file)

    for(i in 1:nrow(TRD_loci)){

        # Select specific pixy files
        pixy_files = list.files(path = "~/data/trd/mapped_reads", pattern = paste0(c, "_", i, ".*pixy.*txt.gz$"), full.names = TRUE)

        if (length(pixy_files) != (16 * 3)) {
            print(paste0(c, " with TRD ", i, " has no or not enough or not updated pixy files :("))
            next
        }

        # Check for chromosome overlapping TRD
        if (TRD_loci$chr_start[i] != TRD_loci$chr_end[i]) {
            stop("chr overlapping TRD")
        }

        # Filtering and processing
        TRD_subset = filter(TRD, chr == TRD_loci$chr_start[i] & global_pos >= TRD_loci$global_start[i] & global_pos <= TRD_loci$global_end[i])
        df_AS_filtered = filter(AS, `#CHROM` == TRD_loci$chr_start[i], POS %in% TRD_subset$pos)

        # Process pixy files
        pixy_pi = fread_and_bind_files(pixy_files[grepl("_pi.txt", pixy_files, fixed = TRUE)])
        setDT(pixy_pi)
        pixy_dxy = fread_and_bind_files(pixy_files[grepl("_dxy.txt", pixy_files, fixed = TRUE)])
        setDT(pixy_dxy)
        pixy_fst = fread_and_bind_files(pixy_files[grepl("_fst.txt", pixy_files, fixed = TRUE)])
        setDT(pixy_fst)

        pixy_pi = arrange(pixy_pi, pop)
        pixy_pi$pop_fct = as.factor(pixy_pi$pop)
        pixy_pi$color = NA
        pixy_pi$color[pixy_pi$pop == "distorter-like"] = colorRampPalette(c("#FFC4CB", "#8B0000"))(1)
        pixy_pi$color[pixy_pi$pop == "other"] = colorRampPalette(c("#ADD8E6", "#00008B"))(1)

        # Generate plot
        options(repr.plot.width = 10, repr.plot.height = 5)

        pi_summary<-filter(pixy_pi, chromosome == TRD_loci$chr_start[i],
                                      (window_pos_1>=min(TRD_subset$pos) & window_pos_2<=max(TRD_subset$pos)) | 
                                       (window_pos_1<=min(TRD_subset$pos) & window_pos_2>=min(TRD_subset$pos)) |
                                       (window_pos_1<=max(TRD_subset$pos) & window_pos_2>=max(TRD_subset$pos)))%>%  group_by(pop) %>% summarise(mean_pi=mean(avg_pi))

        # Function definition
        calculate_measures <- function(df, value, population) {

          # Calculate pi measures
          pi_pop <- filter(pi_summary, pop == population) %>% pull(mean_pi)

          # Calculate ecdf
          ecdf_value <- ecdf(filter(df, pop == population) %>% pull(value))(pi_pop)

          # Calculate the standardized measure
          standardized_measure <- (pi_pop - mean(filter(df, pop == population) %>% pull(value), na.rm = TRUE)) /
                                    sd(filter(df, pop == population) %>% pull(value), na.rm = TRUE)

          # Return a list of calculated values
          return(list("pi_pop" = pi_pop, 
                      "ecdf_value" = ecdf_value, 
                      "standardized_measure" = standardized_measure))
        }

        # Calculate measures for 'distorter-like' population in pixy_pi data frame using 'avg_pi' column
        distorter_like_measures <- calculate_measures(pixy_pi, "avg_pi", "distorter-like")

        # Calculate measures for 'other' population in pixy_pi data frame using 'avg_pi' column
        other_measures <- calculate_measures(pixy_pi, "avg_pi", "other")


        # pi plot with two colors for the two groups

        p<-ggplot(pixy_pi, aes(avg_pi*100,fill=pop))+
        geom_histogram(binwidth=0.05,alpha=1)+scale_x_log10(breaks=c(0.0001,0.001,0.01)*100, labels=c(0.0001,0.001,0.01), minor_breaks=seq(from=0,to=1*100,by=0.001*100))+
        scale_color_manual(values=c("distorter-like"="red", "other"="blue"))+
        scale_fill_manual(values=c("distorter-like"="red", "other"="blue"))+
        geom_segment(data=pi_summary,
                   mapping=aes(x=mean_pi*100, xend=mean_pi*100, y=-1, yend=-30, color=pop), linewidth=3)+theme_bw(18)+xlab("Nucleotide diversity")+ylab(NULL)

        saveRDS(p, paste0("/home/jnrunge/data/trd/pop_genomics/",c,".",i,"-pi-plot.RDS"))

        library(dplyr)
        library(ggplot2)

        calculate_summarize_plot <- function(df, df_name, TRD_loci, TRD_subset, i, value, xlab_text, x_scale = NULL) {

          # Calculate summarize
          sum_value <- summarise(
            group_by(
              filter(df, chromosome == TRD_loci$chr_start[i], 
                     (window_pos_1>=min(TRD_subset$pos) & window_pos_2<=max(TRD_subset$pos)) | 
                     (window_pos_1<=min(TRD_subset$pos) & window_pos_2>=min(TRD_subset$pos)) |
                     (window_pos_1<=max(TRD_subset$pos) & window_pos_2>=max(TRD_subset$pos))), pop1, pop2), mean_val=mean(get(value),na.rm=TRUE)
          ) %>% pull(mean_val)

          # Plot
          p <- ggplot(df, aes_string(value))+
            geom_histogram()+
            geom_vline(xintercept = sum_value, color="red")+
            theme_bw(18)+ylab(NULL)+xlab(xlab_text)

          # If x_scale is provided, apply it to the plot
          if(!is.null(x_scale)) {
            p <- p + x_scale
          }

          saveRDS(p, paste0("/home/jnrunge/data/trd/pop_genomics/",c,".",i,"-",xlab_text,"-plot.RDS"))

          # Calculate ecdf
          ecdf_value <- ecdf(pull(df, value))(sum_value)

          # Calculate the standardized measure
          standardized_measure <- (sum_value - mean(pull(df, value), na.rm = TRUE)) /
                                    sd(pull(df, value), na.rm = TRUE)

          # Return a list of calculated values
          return(list("sum_value" = sum_value, 
                      "ecdf_value" = ecdf_value, 
                      "standardized_measure" = standardized_measure))
        }


        # For pixy_fst data frame
        fst_results <- calculate_summarize_plot(pixy_fst, "pixy_fst", TRD_loci, TRD_subset, i, "avg_wc_fst", "Fst")

        # For pixy_dxy data frame
        dxy_scale <- scale_x_log10(breaks=c(0.0001,0.001,0.01)*100, labels=c(0.0001,0.001,0.01), minor_breaks=seq(from=0,to=1*100,by=0.001*100))
        dxy_results <- calculate_summarize_plot(pixy_dxy, "pixy_dxy", TRD_loci, TRD_subset, i, "avg_dxy", "Dxy", dxy_scale)


        results_values<-list(
        "pi_distorter"=distorter_like_measures,
        "pi_other"=other_measures,
        "fst"=fst_results,
        "dxy"=dxy_results)



        saveRDS(results_values, paste0("/home/jnrunge/data/trd/pop_genomics/",c,".",i,"-relative-measures.RDS"))
        
        }
    }


