In [None]:
# options(repos = c(CRAN = "https://cloud.r-project.org"))

# install.packages("Matrix")
# install.packages("lme4")
# install.packages("TMB")
# install.packages("glmmTMB")
# install.packages("dplyr")

# FA07

**Input**: `FA07_YPD_counts.csv`, `FA07_SC37C_counts.csv`, `FA07_env3_counts.csv`, `FA07_env4_counts.csv`

**Output**: `fa07_F1_final.csv`, `fa07_Parents_final.csv`

In [None]:
suppressPackageStartupMessages({
  library(dplyr)
  library(glmmTMB)
})

# Same fit_est form used by your pipelines (logit difference / t)
fit_est <- function(f0, f1, t) {
  eps <- 1e-9
  f0 <- pmin(pmax(f0, eps), 1 - eps)
  f1 <- pmin(pmax(f1, eps), 1 - eps)
  (1 / t) * (log(f1 / (1 - f1)) - log(f0 / (1 - f0)))
}

# ---------- Load ----------
FA07_files <- Sys.glob("FA07*_counts.csv")
if (length(FA07_files) == 0) stop("No files matching FA07*_counts.csv found in current directory.")
FA07_dat <- lapply(as.list(FA07_files), read.csv)
FA07 <- do.call(rbind, FA07_dat)

# ---------- Regime ----------
FA07$regime <- NA
FA07$regime[grep("a3_",   FA07$kk_pop_id)] <- "sexual"
FA07$regime[grep("a1.1_", FA07$kk_pop_id)] <- "asexual"

# ---------- Plate label ----------
FA07$plate <- gsub("plate", "", paste0(FA07$plate_id, "_", FA07$env))

# ---------- s_hat ----------
FA07$s_hat <- with(FA07, fit_est(
  f0 = (cells_t1 - test_t1) / cells_t1,
  f1 = (cells_t2 - test_t2) / cells_t2,
  t  = 10
))
FA07$s_hat[FA07$env == "YPD"] <- with(FA07[FA07$env == "YPD", ], fit_est(
  f0 = test_t1 / cells_t1,
  f1 = test_t2 / cells_t2,
  t  = 10
))

# ---------- QC flag (kept, but NOT used for filtering) ----------
FA07$exclude <- 0
FA07$exclude[FA07$cells_t1 < 1000 | FA07$cells_t2 < 1000] <- 1

# ---------- Parent flags ----------
FA07$parent[FA07$kk_strain_id == "BLANK"] <- 0
FA07$parent[grep("MAT", FA07$kk_strain_id)] <- 0

# ---------- Keep only columns used downstream ----------
FA07b <- FA07[, c(
  "env","plate","kk_strain_id","kk_pop_id","kk_well_id","well_id_96","plate_id",
  "F1","parent","anc","regime","s_hat","exclude"
)]

# ---------- Env relabeling ----------
FA07b$env <- as.factor(FA07b$env)
levels(FA07b$env)[levels(FA07b$env) == "env4"] <- "SC_pH7.3"
levels(FA07b$env)[levels(FA07b$env) == "env3"] <- "SC_0.2M_NaCl"

# Drop outlier SC37C parent wells
FA07b <- FA07b %>%
  dplyr::filter(!(F1 == 0 & env == "SC37C" & kk_strain_id == "B04_a1.1_A02_1" &
                  plate_id == "plate4" & well_id_96 == "G11"))
FA07b <- FA07b %>%
  dplyr::filter(!(F1 == 0 & env == "SC37C" & kk_strain_id == "G03_a1.1_B12_1" &
                  plate_id == "plate4" & well_id_96 == "F12"))

# ---------- Parents subset for plate-effect model ----------
FA07p <- dplyr::filter(FA07b, F1 == 0, kk_strain_id != "BLANK", s_hat > -0.25)

# Remove MAT controls from parents used for model
FA07p <- dplyr::filter(FA07p, !grepl("^MAT", kk_strain_id, ignore.case = TRUE))

# ---------- Plate-effect model ----------
FA07_pm1 <- glmmTMB(
  s_hat ~ regime * env + (1 | kk_strain_id) + (1 | plate),
  dispformula = ~ env,
  data = FA07p,
  family = gaussian
)

plate_effect <- ranef(FA07_pm1)[[1]]$plate
plate_effect <- data.frame(plate = row.names(plate_effect), plate_means = plate_effect[, 1])

# ---------- Apply plate effects ----------
FA07c <- merge(FA07b, plate_effect, by = "plate", sort = FALSE)
FA07c$fitness_gain <- FA07c$s_hat - FA07c$plate_means

FA07p2 <- merge(FA07p, plate_effect, by = "plate", sort = FALSE)
FA07p2$fitness_gain <- FA07p2$s_hat - FA07p2$plate_means

# ---------- Parent summaries ----------
FA07p3 <- dplyr::group_by(FA07p2, regime, kk_strain_id, env) %>%
  summarise(
    mu_s = mean(fitness_gain),
    sd_s = sd(fitness_gain),
    n_s  = length(fitness_gain),
    .groups = "drop"
  )

# ---------- Env means (same structure as FA07_parsing.R) ----------
FA07_f1s <- dplyr::filter(FA07c, F1 == 1, kk_strain_id != "BLANK")

for_means <- dplyr::bind_rows(
  FA07_f1s[, c("env","fitness_gain","kk_strain_id")],
  FA07p3[,  c("env","mu_s","kk_strain_id")] %>% rename(fitness_gain = mu_s)
)

env_means <- dplyr::group_by(for_means, env) %>%
  summarise(env_means = mean(fitness_gain), .groups = "drop")

FA07_f1sb <- merge(FA07_f1s, env_means, by = "env")
FA07p4    <- merge(FA07p3,  env_means, by = "env")

FA07_f1sb$fitness_gain_adj <- FA07_f1sb$fitness_gain - FA07_f1sb$env_means
FA07p4$fitness_gain_adj    <- FA07p4$mu_s - FA07p4$env_means

# ---------- IDs / final filters ----------
FA07_f1sb$plate_id <- gsub("plate", "", FA07_f1sb$plate_id)
FA07_f1sb$f1_unique_id <- paste0(FA07_f1sb$kk_strain_id, "_", FA07_f1sb$plate_id, "_", FA07_f1sb$well_id_96)

FA07_f1sb <- dplyr::filter(FA07_f1sb, anc == 0)

# Ensure final Parents output has no MAT controls (should already be true from FA07p filtering)
FA07p4 <- dplyr::filter(FA07p4, !grepl("^MAT", kk_strain_id, ignore.case = TRUE))

# ---------- Write outputs ----------
write.csv(FA07_f1sb, "fa07_F1_final.csv", row.names = FALSE)
write.csv(FA07p4,    "fa07_Parents_final.csv", row.names = FALSE)

#message("Wrote: fa07_F1_final.csv and fa07_Parents_final.csv")