In [None]:
# Read the data from the file
data <- read.table("constraint_am_epi25_chr2_results.tsv.gz", header = TRUE, sep = '\t')

# Get unique values in the 'gene_id' column
unique_genes <- unique(data$gene_id)

# Initialize lists to store p-values
p_values_prob_0 <- numeric()
p_values_am_pathogenicity <- numeric()

# Loop over each gene and build a meta-regression model
for (current_gene in unique_genes) {
  # Subset the data for the current gene
  gene_data <- subset(data, gene_id == current_gene)
  
  # Check if there are at least 2 unique values in the dependent variable
#   if (length(unique(gene_data$effect_size)) < 2) {
#     cat(paste("Skipping gene", current_gene, "due to insufficient variability in the dependent variable.\n"))
#     next
#   }
  
  # Meta-regression model for the current gene
  model <- lm(log(effect_size) ~ prob_0 + am_pathogenicity, data = gene_data, weights = 1 / sqrt(var_effect_size))
  
  # Get p-values for prob_0 and am_pathogenicity
  p_value_prob_0 <- summary(model)$coefficients["prob_0", "Pr(>|t|)"]
  p_value_am_pathogenicity <- summary(model)$coefficients["am_pathogenicity", "Pr(>|t|)"]

  # Append p-values to lists
  p_values_prob_0 <- c(p_values_prob_0, p_value_prob_0)
  p_values_am_pathogenicity <- c(p_values_am_pathogenicity, p_value_am_pathogenicity)
}

# Combine p-values using Fisher's method
fisher_combined_p_prob_0 <- fisher.test(p_values_prob_0)$p.value
fisher_combined_p_am_pathogenicity <- fisher.test(p_values_am_pathogenicity)$p.value

# Print combined p-values
cat("Combined p-value for prob_0:", fisher_combined_p_prob_0, "\n")
cat("Combined p-value for am_pathogenicity:", fisher_combined_p_am_pathogenicity, "\n")
