In [None]:
# options(repos = c(CRAN = "https://cloud.r-project.org"))

# install.packages("Matrix")
# install.packages("lme4")
# install.packages("TMB")
# install.packages("glmmTMB")
# install.packages("dplyr")

# FA08

**Input**: `FA08_SM1_t1.txt`, `FA08_SM1_t2.txt`, `FA08_SM2_t1.txt`, `FA08_SM2_t2.txt`, `FA08_SM3_t1.txt`, `FA08_SM3_t2.txt`, `FA08_SM4_t1.txt`, `FA08_SM4_t2.txt`, `FA08_meta_data.txt`

**Output**: `fa08_all_F1_final.csv`, `fa08_all_Parents_final.csv`

In [None]:
suppressPackageStartupMessages({
  library(dplyr)
  library(glmmTMB)
})

# ---------- helpers ----------
find_file <- function(fname) {
  if (file.exists(fname)) return(fname)
  p <- file.path("/mnt/data", fname)
  if (file.exists(p)) return(p)
  stop("Missing file: ", fname)
}

logit <- function(x) log(x / (1 - x))

fit_est <- function(f1, f0, t) {
  eps <- 1e-9
  f0 <- pmin(pmax(f0, eps), 1 - eps)
  f1 <- pmin(pmax(f1, eps), 1 - eps)
  (1 / t) * (logit(f1) - logit(f0))
}

extract_well_id <- function(x) {
  x <- gsub("\\.fcs$", "", gsub("^Specimen_001_", "", x))
  strsplit(x, "_")[[1]][2]
}

# ---------- metadata ----------
meta <- read.table(find_file("FA08_meta_data.txt"),
                   header = TRUE, sep = "\t",
                   stringsAsFactors = TRUE, check.names = FALSE)

B1 <- meta[meta$BLOCK_ID == 1, ]
B2 <- meta[meta$BLOCK_ID == 2, ]
B3 <- meta[meta$BLOCK_ID == 3, ]
B4 <- meta[meta$BLOCK_ID == 4, ]

# env assignment
B1$env <- "YPD"
B2$env <- "YPD"
B3$env <- "YPD"
B3$env[B3$plate_id %in% c("plate3","plate4")] <- "SC_37C"
B4$env <- "SC_pH7.3"
B4$env[B4$plate_id %in% c("plate3","plate4")] <- "SC_02M_NaCl"

# ---------- read plate function ----------
read_plate <- function(t1, t2, mode) {
  t1 <- read.table(find_file(t1), header = FALSE, stringsAsFactors = FALSE)
  t2 <- read.table(find_file(t2), header = FALSE, stringsAsFactors = FALSE)

  t1$well_id <- sapply(t1$V1, extract_well_id)
  t2$well_id <- sapply(t2$V1, extract_well_id)

  if (mode %in% c("SM1","SM2")) {
    m <- merge(t1[,c(4,2,3)], t2[,c(4,2,3)], by="well_id", sort=FALSE)
    names(m) <- c("well_id","tot_1","ref_1","tot_2","ref_2")
    m$s_hat <- fit_est(
      (m$tot_2 - m$ref_2)/m$tot_2,
      (m$tot_1 - m$ref_1)/m$tot_1,
      10
    )
    m <- m[!(m$tot_1 < 15000 | m$tot_2 < 15000), ]
    return(m)
  }

  if (mode == "SM3") {
    t1$tot_1 <- t1$V2 - t1$V3
    m <- merge(t1[,c("well_id","tot_1","V4")],
               t2[,c(4,2,3)], by="well_id", sort=FALSE)
    names(m) <- c("well_id","tot_1","ref_1","tot_2","ref_2")
  }

  if (mode == "SM4") {
    m <- merge(t1[,c("well_id","V3","V4")],
               t2[,c("well_id","V3","V4")],
               by="well_id", sort=FALSE)
    names(m) <- c("well_id","tot_1","ref_1","tot_2","ref_2")
  }

  m$s_hat <- fit_est(
    (m$tot_2 - m$ref_2)/m$tot_2,
    (m$tot_1 - m$ref_1)/m$tot_1,
    10
  )
  m
}

# ---------- read all plates ----------
SM1 <- read_plate("FA08_SM1_t1.txt","FA08_SM1_t2.txt","SM1")
SM2 <- read_plate("FA08_SM2_t1.txt","FA08_SM2_t2.txt","SM2")
SM3 <- read_plate("FA08_SM3_t1.txt","FA08_SM3_t2.txt","SM3")
SM4 <- read_plate("FA08_SM4_t1.txt","FA08_SM4_t2.txt","SM4")

B <- bind_rows(
  merge(B1, SM1, by="well_id", sort=FALSE),
  merge(B2, SM2, by="well_id", sort=FALSE),
  merge(B3, SM3, by="well_id", sort=FALSE),
  merge(B4, SM4, by="well_id", sort=FALSE)
)

# ---------- cleanup ----------
FA08 <- B
FA08$AP1_ID <- gsub("_A1\\.1_","_a1.1_", FA08$AP1_ID)
FA08$AP1_ID <- gsub("_A3_","_a3_", FA08$AP1_ID)
FA08$plate_id <- gsub("plate","", FA08$plate_id)

names(FA08)[names(FA08)=="AP1_ID"] <- "kk_strain_id"
names(FA08)[names(FA08)=="SEX"]    <- "regime"

FA08$regime <- factor(FA08$regime)
levels(FA08$regime) <- c("asexual","sexual")

FA08$env <- factor(FA08$env)
levels(FA08$env)[levels(FA08$env)=="SC_37C"]      <- "SC37C"
levels(FA08$env)[levels(FA08$env)=="SC_02M_NaCl"] <- "SC_0.2M_NaCl"

FA08$plate_block <- paste0(FA08$plate_id,"_",FA08$BLOCK_ID)

# ---------- plate effects (YPD parents only) ----------
FA08p <- FA08 %>%
  filter(is.na(IS_ANC), F1==0, F1_AP!="BLANK", PAR_ID!="BLANK", env=="YPD")

pm <- glmmTMB(
  s_hat ~ regime + (1|kk_strain_id) + (1|plate_block),
  data = FA08p,
  family = gaussian
)

pe <- ranef(pm)[[1]]$plate_block
pe <- data.frame(plate_block=row.names(pe), plate_means=pe[,1])

FA08b <- merge(FA08, pe, by="plate_block", all.x=TRUE)
FA08b$fitness_gain <- FA08b$s_hat - FA08b$plate_means
FA08b$fitness_gain[is.na(FA08b$fitness_gain)] <- FA08b$s_hat[is.na(FA08b$fitness_gain)]

# ---------- ALL parents ----------
FA08_parents <- FA08b %>%
  filter(F1==0, F1_AP!="BLANK", PAR_ID!="BLANK", !is.na(kk_strain_id)) %>%
  group_by(kk_strain_id, regime, env) %>%
  summarise(
    fitness_gain = mean(fitness_gain),
    sd_s = sd(fitness_gain),
    n_s  = n(),
    .groups="drop"
  )

# ---------- ALL F1 ----------
FA08_f1 <- FA08b %>%
  filter(F1==1, F1_AP!="BLANK", PAR_ID!="BLANK") %>%
  mutate(f1_unique_id = paste0(kk_strain_id,"_",well_id_96))

# ---------- env mean-centering ----------
env_means <- bind_rows(
  FA08_parents[,c("env","fitness_gain")],
  FA08_f1[,c("env","fitness_gain")]
) %>%
  group_by(env) %>%
  summarise(env_means=mean(fitness_gain), .groups="drop")

FA08_parents <- merge(FA08_parents, env_means, by="env")
FA08_parents$fitness_gain_adj <- FA08_parents$fitness_gain - FA08_parents$env_means

FA08_f1 <- merge(FA08_f1, env_means, by="env")
FA08_f1$fitness_gain_adj <- FA08_f1$fitness_gain - FA08_f1$env_means

# ---------- write outputs ----------
write.csv(FA08_f1,      "fa08_all_F1_final.csv",      row.names=FALSE)
write.csv(FA08_parents, "fa08_all_Parents_final.csv", row.names=FALSE)

#message("Wrote: fa08_all_F1_final.csv, fa08_all_Parents_final.csv")


Wrote: fa08_all_F1_final.csv, fa08_all_Parents_final.csv



# FA03-FA08

**Input**: `parsed_clone_fitness_data.csv` (output of `clone_fitness_processing.ipynb`) , `fa08_all_Parents_final.csv` (output of above section)

**Output**: `parental_mc_fitnesses_fa03_fa08.csv`

In [None]:
suppressPackageStartupMessages(library(dplyr))

# load data
CD4 <- read.csv("parsed_clone_fitness_data.csv", stringsAsFactors = FALSE)

# define ancestor flag
CD4$anc <- 0
CD4$anc[grep("MJM", CD4$kk_pop_id)] <- 1

# remove extreme outliers
CD4 <- CD4[abs(CD4$fitness_gain_avg) < 0.2, ]

# compute environment means
env_means <- CD4 %>%
  group_by(env) %>%
  summarise(env_mean = mean(fitness_gain_avg), .groups = "drop")

# merge and mean-center
CD4b <- merge(CD4, env_means, by = "env")
CD4b$mc_avg <- CD4b$fitness_gain_avg - CD4b$env_mean

# write ancestor-excluded, mean-centered data
write.csv(
  CD4b[CD4b$anc == 0, ],
  file = "fa03_mean_centered_fitnesses_clones.csv",
  row.names = FALSE
)

In [None]:
suppressPackageStartupMessages(library(dplyr))

fa03 <- read.csv("fa03_mean_centered_fitnesses_clones.csv", stringsAsFactors = FALSE)
fa08 <- read.csv("fa08_all_Parents_final.csv", stringsAsFactors = FALSE)

# FA03 side (already mean-centered as mc_avg)
fa03_use <- fa03 %>%
  mutate(
    env = as.character(env),
    kk_strain_id = as.character(sample_id),
    regime = as.character(regime)
  ) %>%
  { if ("ref" %in% names(.)) filter(., ref == 0) else . } %>%
  { if ("anc" %in% names(.)) filter(., anc == 0) else . } %>%
  filter(!is.na(env), !is.na(kk_strain_id)) %>%
  select(env, kk_strain_id, regime, mc_avg)

# FA08 side
fa08_use <- fa08 %>%
  mutate(
    env = as.character(env),
    kk_strain_id = as.character(kk_strain_id),
    regime = if ("regime" %in% names(.)) as.character(regime) else NA_character_,
    fitness_gain_adj = as.numeric(fitness_gain_adj)
  ) %>%
  filter(!is.na(env), !is.na(kk_strain_id)) %>%
  group_by(env) %>%
  mutate(strain_mean = mean(fitness_gain_adj, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(fitness_gain_adj = fitness_gain_adj - strain_mean) %>%
  select(env, kk_strain_id, regime, fitness_gain_adj)

# Merge by kk_strain_id + regime + env
out <- inner_join(fa08_use, fa03_use, by = c("kk_strain_id","regime","env")) %>%
  select(env, kk_strain_id, regime, fitness_gain_adj, mc_avg) %>%
  arrange(env, kk_strain_id)

write.csv(out, "parental_mc_fitnesses_fa03_fa08.csv", row.names = FALSE)

Wrote: parental_mc_fitnesses_fa03_fa08.csv
