# Compare HSP algorithms

In [None]:
library(ape)
library(castor)
library(dplyr)
library(ggtree)
library(ggplot2)
library(stringr)

In [None]:
# Phylogenetic tree
tree_path <- "../../data/ref_bac2feature/phyogeny/phylogeny.tre"
tree <- ape::read.tree(tree_path)
# trait data
trait_path <- "../../data/ref_bac2feature/trait_bac2feature.tsv"
trait <- read.table(trait_path, sep="\t", comment.char="", quote = "", header=T)
trait$species_tax_id <- as.character(trait$species_tax_id)
rownames(trait) <- trait$species_tax_id

In [None]:
tip_label <- tree$tip.label

set.seed(10)
test_frac <- 0.02
test_Nnode <- as.integer(tree$Nnode * test_frac)
shuffled_tips <- sample(x = tree$tip.label)
l_test_nodes <- list()
for (i in c(1:10)) {
  from_idx <- test_Nnode * (i - 1) + 1
  to_idx <- test_Nnode * i
  l_test_nodes <- c(l_test_nodes, list(shuffled_tips[from_idx:to_idx]))
}

distance_matrix <- castor::get_all_pairwise_distances(tree = tree)
Ntips <- length(tree$tip.label)
distance_matrix <- distance_matrix[1:Ntips, 1:Ntips]
colnames(distance_matrix) <- tree$tip.label
rownames(distance_matrix) <- tree$tip.label

## Continuous traits
- castor::asr_independent_contrasts
- castor::asr_squared_change_parsimony(weighted=T)
- castor::asr_subtree_averaging

In [None]:
l_threshold <- c(0.001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
for (threshold in l_threshold) {
  for (i in c(1:10)) {
    # Test data
    test_nodes <- l_test_nodes[[i]]
    test_nodes_index <- which(tree$tip.label %in% test_nodes)
    # Test data + NA the trait of TIP below a threshold phylogenetic distance.
    rm_tips <- c()
    for (t in test_nodes) {
      tmp <- tree$tip.label[which(distance_matrix[t,] < threshold)]
      rm_tips <- c(rm_tips, tmp)
    }

    ref_trait <- data.frame(trait)
    ref_trait[rm_tips,] <- NA
    # Independent contrasts
    n_ref_trait <- Filter(is.numeric, ref_trait)
    n_predict_out <- lapply(n_ref_trait[tree$tip.label,],
                            castor::hsp_independent_contrasts,
                            tree=tree,
                            weighted=TRUE,
                            check_input=TRUE
                     )
    n_hsp_res <- lapply(n_predict_out,
                        function(x) { x$states[test_nodes_index] })
    n_hsp_res <- data.frame(n_hsp_res)
    n_hsp_res$species_tax_id <- tree$tip.label[test_nodes_index]
    n_hsp_res$iteration <- i
    n_hsp_res$threshold <- threshold
    if (i == 1 && threshold == l_threshold[1]) {
      n_hsp_res_all <- n_hsp_res
    } else {
      n_hsp_res_all <- rbind(n_hsp_res_all, n_hsp_res)
    }
  }
}

res_path <- "hsp_results/hsp_result_pic.tsv"
write.table(x=n_hsp_res_all, file=res_path, sep="\t", na="", row.names=F, quote=F)

In [None]:
l_threshold <- c(0.001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
for (threshold in l_threshold) {
  for (i in c(1:10)) {
    # Test data
    test_nodes <- l_test_nodes[[i]]
    test_nodes_index <- which(tree$tip.label %in% test_nodes)
    # Test data + NA the trait of TIP below a threshold phylogenetic distance.
    rm_tips <- c()
    for (t in test_nodes) {
      tmp <- tree$tip.label[which(distance_matrix[t,] < threshold)]
      rm_tips <- c(rm_tips, tmp)
    }

    ref_trait <- data.frame(trait)
    ref_trait[rm_tips,] <- NA
    # Squared change parsimony(weighted=True)
    n_ref_trait <- Filter(is.numeric, ref_trait)
    n_predict_out <- lapply(n_ref_trait[tree$tip.label,],
                            castor::hsp_squared_change_parsimony,
                            tree=tree,
                            weighted=TRUE,
                            check_input=TRUE
                     )
    n_hsp_res <- lapply(n_predict_out,
                        function(x) { x$states[test_nodes_index] })
    n_hsp_res <- data.frame(n_hsp_res)
    n_hsp_res$species_tax_id <- tree$tip.label[test_nodes_index]
    n_hsp_res$iteration <- i
    n_hsp_res$threshold <- threshold
    if (i == 1 && threshold == l_threshold[1]) {
      n_hsp_res_all <- n_hsp_res
    } else {
      n_hsp_res_all <- rbind(n_hsp_res_all, n_hsp_res)
    }
  }
}
# 出力
res_path <- "hsp_results/hsp_result_wscp.tsv"
write.table(x=n_hsp_res_all, file=res_path, sep="\t", na="", row.names=F, quote=F)

In [None]:
l_threshold <- c(0.001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
for (threshold in l_threshold) {
  for (i in c(1:10)) {
    # Test data
    test_nodes <- l_test_nodes[[i]]
    test_nodes_index <- which(tree$tip.label %in% test_nodes)
    # Test data + NA the trait of TIP below a threshold phylogenetic distance.
    rm_tips <- c()
    for (t in test_nodes) {
      tmp <- tree$tip.label[which(distance_matrix[t,] < threshold)]
      rm_tips <- c(rm_tips, tmp)
    }
    # 形質をNAに
    ref_trait <- data.frame(trait)
    ref_trait[rm_tips,] <- NA
    # Subtree averaging
    n_ref_trait <- Filter(is.numeric, ref_trait)
    n_predict_out <- lapply(n_ref_trait[tree$tip.label,],
                            castor::hsp_subtree_averaging,
                            tree=tree,
                            check_input=TRUE
                     )
    n_hsp_res <- lapply(n_predict_out,
                        function(x) { x$states[test_nodes_index] })
    n_hsp_res <- data.frame(n_hsp_res)
    n_hsp_res$species_tax_id <- tree$tip.label[test_nodes_index]
    n_hsp_res$iteration <- i
    n_hsp_res$threshold <- threshold
    if (i == 1 && threshold == l_threshold[1]) {
      n_hsp_res_all <- n_hsp_res
    } else {
      n_hsp_res_all <- rbind(n_hsp_res_all, n_hsp_res)
    }
  }
}
# 出力
res_path <- "hsp_results/hsp_result_sa.tsv"
write.table(x=n_hsp_res_all, file=res_path, sep="\t", na="", row.names=F, quote=F)

## Categorical traits
- castor::asr_empirical_probabilities
- castor::asr_max_parsimony
- castor::asr_mk_model

In [None]:
ct <- c("gram_stain",	"sporulation",	"motility",	"range_salinity",	"facultative_respiration",	"anaerobic_respiration",	"aerobic_respiration",	"mesophilic_range_tmp",	"thermophilic_range_tmp",	"psychrophilic_range_tmp",	"bacillus_cell_shape",	"coccus_cell_shape",	"filament_cell_shape",	"coccobacillus_cell_shape",	"vibrio_cell_shape",	"spiral_cell_shape")

get_sorted_prob <- function(in_likelihood, study_tips_i, tree_tips) {

  # Subset to study sequences only and set as rownames.
  tmp_lik <- in_likelihood[study_tips_i, , drop=FALSE]
  rownames(tmp_lik) <- tree_tips[study_tips_i]

  # Set column names to be 1 to max num of counts.
  # colnames(tmp_lik) <- c(0:(ncol(tmp_lik)-1))
  colnames(tmp_lik) <- c(1:(ncol(tmp_lik)))

  # Remove columns that are 0 across all sequences.
  col2remove <- which(colSums(tmp_lik) == 0)
  if(length(col2remove) > 0) {
    tmp_lik <- tmp_lik[, -col2remove, drop=FALSE]
  }

  return(tmp_lik)

}

In [None]:
# Maximum Parsimony Reconstruction, edge_exponent is 0
# Transition cost is fixxed to "all_equal". Just consider edge_exponent is 0/1.
mp_study_probs <- function(in_trait, in_tree ,unknown_i, check_input) {

  mp_hsp_out <- hsp_max_parsimony(tree = in_tree,
                                  tip_states = in_trait,
                                  transition_costs="all_equal",
                                  edge_exponent=0.0,
                                  weight_by_scenarios = TRUE,
                                  check_input=check_input)

  return(get_sorted_prob(mp_hsp_out$likelihoods,
                         study_tips_i=unknown_i,
                         tree_tips=in_tree$tip.label))
}

l_threshold <- c(0.001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
for (threshold in l_threshold) {
  for (i in c(1:10)) {
    # Test data
    test_nodes <- l_test_nodes[[i]]
    test_nodes_index <- which(tree$tip.label %in% test_nodes)
    # Test data + NA the trait of TIP below a threshold phylogenetic distance.
    rm_tips <- c()
    for (t in test_nodes) {
      tmp <- tree$tip.label[which(distance_matrix[t,] < threshold)]
      rm_tips <- c(rm_tips, tmp)
    }
    ref_trait <- data.frame(trait)
    ref_trait[rm_tips,] <- NA
    c_ref_trait <- Filter(is.integer, ref_trait)
    c_ref_trait[c_ref_trait == ""] <- NA
    # 1-origin
    c_ref_trait <- c_ref_trait + 1
    c_predict_out_lik <- lapply(c_ref_trait[tree$tip.label,],
                           function(x) {
                              mp_study_probs(
                              in_trait = x,
                              in_tree = tree,
                              unknown_i = test_nodes_index,
                              check_input = TRUE)})
    c_predict_out <- lapply(c_predict_out_lik,
                             function(x) { # 0-originに戻す
                                 as.numeric(colnames(x)[max.col(x)]) - 1
                            })
    c_hsp_res <- as.data.frame(c_predict_out)
    c_hsp_res$species_tax_id <- tree$tip.label[test_nodes_index]
    c_hsp_res$iteration <- i
    c_hsp_res$threshold <- threshold
    if (i == 1 && threshold == l_threshold[1]) {
      c_hsp_res_all <- c_hsp_res
    } else {
      c_hsp_res_all <- rbind(c_hsp_res_all, c_hsp_res)
    }
  }
}
# 出力
res_path <- "hsp_results/hsp_result_mp_no_edge.tsv"
write.table(x=c_hsp_res_all, file=res_path, sep="\t", na="", row.names=F, quote=F)

In [None]:
# Maximum Parsimony Reconstruction, edge_exponent is 1

mp_study_probs <- function(in_trait, in_tree ,unknown_i, check_input) {

  mp_hsp_out <- hsp_max_parsimony(tree = in_tree,
                                  tip_states = in_trait,
                                  transition_costs="all_equal",
                                  edge_exponent=1.0, # change the exponent
                                  weight_by_scenarios = TRUE,
                                  check_input=check_input)

  return(get_sorted_prob(mp_hsp_out$likelihoods,
                         study_tips_i=unknown_i,
                         tree_tips=in_tree$tip.label))
}

l_threshold <- c(0.001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
for (threshold in l_threshold) {
  for (i in c(1:10)) {
    # Test data
    test_nodes <- l_test_nodes[[i]]
    test_nodes_index <- which(tree$tip.label %in% test_nodes)
    # Test data + NA the trait of TIP below a threshold phylogenetic distance.
    rm_tips <- c()
    for (t in test_nodes) {
      tmp <- tree$tip.label[which(distance_matrix[t,] < threshold)]
      rm_tips <- c(rm_tips, tmp)
    }
    ref_trait <- data.frame(trait)
    ref_trait[rm_tips,] <- NA
    c_ref_trait <- Filter(is.integer, ref_trait)
    c_ref_trait[c_ref_trait == ""] <- NA
    # Sort by species_tax_id and drop the col
    c_ref_trait <- c_ref_trait + 1
    c_predict_out_lik <- lapply(c_ref_trait[tree$tip.label,],
                          function(x) {
                              mp_study_probs(
                              in_trait = x,
                              in_tree = tree,
                              unknown_i = test_nodes_index,
                              check_input = TRUE)})
    c_predict_out <- lapply(c_predict_out_lik,
                             function(x) { # 0-originに戻す
                                 as.numeric(colnames(x)[max.col(x)]) - 1
                            })
    c_hsp_res <- as.data.frame(c_predict_out)
    c_hsp_res$species_tax_id <- tree$tip.label[test_nodes_index]
    c_hsp_res$iteration <- i
    c_hsp_res$threshold <- threshold
    if (i == 1 && threshold == l_threshold[1]) {
      c_hsp_res_all <- c_hsp_res
    } else {
      c_hsp_res_all <- rbind(c_hsp_res_all, c_hsp_res)
    }
  }
}
# 出力
res_path <- "hsp_results/hsp_result_mp_inversed_edge.tsv"
write.table(x=c_hsp_res_all, file=res_path, sep="\t", na="", row.names=F, quote=F)

In [None]:
# Empirical probabilities

hsp_study_probs <- function(in_trait, in_tree ,unknown_i, check_input) { 

  mp_hsp_out <- hsp_empirical_probabilities(
                    tree = in_tree,
                    tip_states = in_trait,
                    check_input=check_input
                )

  return(get_sorted_prob(mp_hsp_out$likelihoods,
                         study_tips_i=unknown_i,
                         tree_tips=in_tree$tip.label))
}

l_threshold <- c(0.001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
for (threshold in l_threshold) {
  for (i in c(1:10)) {
    # Test data
    test_nodes <- l_test_nodes[[i]]
    test_nodes_index <- which(tree$tip.label %in% test_nodes)
    # Test data + NA the trait of TIP below a threshold phylogenetic distance.
    rm_tips <- c()
    for (t in test_nodes) {
      tmp <- tree$tip.label[which(distance_matrix[t,] < threshold)]
      rm_tips <- c(rm_tips, tmp)
    }
    ref_trait <- data.frame(trait)
    ref_trait[rm_tips,] <- NA
    ref_trait[ref_trait == ""] <- NA
    # HSP
    # Sort by species_tax_id
    c_ref_trait <- Filter(is.integer, ref_trait)
    c_ref_trait <- c_ref_trait + 1
    c_predict_out_lik <- lapply(c_ref_trait[tree$tip.label,],
                          function(x) {
                              hsp_study_probs(
                              in_trait = x,
                              in_tree = tree,
                              unknown_i = test_nodes_index,
                              check_input = TRUE)})
    c_predict_out <- lapply(c_predict_out_lik,
                             function(x) { # 0-originに戻す
                                 as.numeric(colnames(x)[max.col(x)]) - 1
                            })
    c_hsp_res <- as.data.frame(c_predict_out)
    c_hsp_res$species_tax_id <- tree$tip.label[test_nodes_index]
    c_hsp_res$iteration <- i
    c_hsp_res$threshold <- threshold
    if (i == 1 && threshold == l_threshold[1]) {
      c_hsp_res_all <- c_hsp_res
    } else {
      c_hsp_res_all <- rbind(c_hsp_res_all, c_hsp_res)
    }
  }
}

res_path <- "hsp_results/hsp_result_emp.tsv"
write.table(x=c_hsp_res_all, file=res_path, sep="\t", na="", row.names=F, quote=F)

In [None]:
# Malkov model, rate model: ER
hsp_study_probs <- function(in_trait, in_tree ,unknown_i, check_input) { 

  mp_hsp_out <- hsp_mk_model(
                    tree = in_tree,
                    tip_states = in_trait,
                    Nstates = max(in_trait[!is.na(in_trait)]),
                    rate_model = "ER",
                    include_likelihoods = TRUE,
                    root_prior = "empirical",
                    Ntrials = 1,
                    check_input=check_input
                )

  sorted_prob <- get_sorted_prob(mp_hsp_out$likelihoods,
                                 study_tips_i=unknown_i,
                                 tree_tips=in_tree$tip.label)
  transition_matrix <- mp_hsp_out$transition_matrix

  return(list(sorted_prob, transition_matrix))
}

make_path <- function(prefix, trait, threshold, iteration) {
  return(paste0(paste(paste(paste(prefix, sep="/", c), sep="_", threshold), sep="_", iteration), ".tsv"))
}

l_threshold <- c(0.001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
for (threshold in l_threshold) {
  for (i in c(1:10)) {
    # Test data
    test_nodes <- l_test_nodes[[i]]
    test_nodes_index <- which(tree$tip.label %in% test_nodes)
    # Test data + NA the trait of TIP below a threshold phylogenetic distance.
    rm_tips <- c()
    for (t in test_nodes) {
      tmp <- tree$tip.label[which(distance_matrix[t,] < threshold)]
      rm_tips <- c(rm_tips, tmp)
    }
    ref_trait <- data.frame(trait)
    ref_trait[rm_tips,] <- NA
    ref_trait[ref_trait == ""] <- NA
    # HSP
    # Sort by species_tax_id
    c_ref_trait <- Filter(is.integer, ref_trait)
    c_ref_trait <- c_ref_trait + 1
    c_predict_out_lik <- lapply(c_ref_trait[tree$tip.label,],
                          function(x) {
                              hsp_study_probs(
                              in_trait = x,
                              in_tree = tree,
                              unknown_i = test_nodes_index,
                              check_input = TRUE)})
    c_predict_out <- lapply(c_predict_out_lik,
                             function(x) { # 0-originに戻す
                                 as.numeric(colnames(x[[1]])[max.col(x[[1]])]) - 1
                            })
    transition_matrix <- lapply(c_predict_out_lik,
                             function(x) { x[[2]] })
    # Record the transition matrix
    # for (c in ct) {
    #   tm <- transition_matrix[[c]]
    #   out_path <- make_path(
    #     prefix = "../../data/2025-03-24/transition_matrix_ER",
    #     trait = c,
    #     threshold = threshold,
    #     iteration = i)
    #   write.table(tm, file=out_path, row.names=FALSE, col.names=FALSE, quote=FALSE, sep="\t")
    # }
    c_hsp_res <- as.data.frame(c_predict_out)
    c_hsp_res$species_tax_id <- tree$tip.label[test_nodes_index]
    c_hsp_res$iteration <- i
    c_hsp_res$threshold <- threshold
    if (i == 1 && threshold == l_threshold[1]) {
      c_hsp_res_all <- c_hsp_res
    } else {
      c_hsp_res_all <- rbind(c_hsp_res_all, c_hsp_res)
    }
  }
}

res_path <- "hsp_results/hsp_result_mk_ER.tsv"
write.table(x=c_hsp_res_all, file=res_path, sep="\t", na="", row.names=F, quote=F)

In [None]:
# Malkov model, rate model: ER, It took 2 hours.
hsp_study_probs <- function(in_trait, in_tree ,unknown_i, check_input) { 

  mp_hsp_out <- hsp_mk_model(
                    tree = in_tree,
                    tip_states = in_trait,
                    Nstates = max(in_trait[!is.na(in_trait)]),
                    rate_model = "SYM",
                    include_likelihoods = TRUE,
                    root_prior = "empirical",
                    Ntrials = 1,
                    check_input=check_input
                )

  sorted_prob <- get_sorted_prob(mp_hsp_out$likelihoods,
                                 study_tips_i=unknown_i,
                                 tree_tips=in_tree$tip.label)
  transition_matrix <- mp_hsp_out$transition_matrix

  return(list(sorted_prob, transition_matrix))
}

make_path <- function(prefix, trait, threshold, iteration) {
  return(paste0(paste(paste(paste(prefix, sep="/", c), sep="_", threshold), sep="_", iteration), ".tsv"))
}

l_threshold <- c(0.001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
for (threshold in l_threshold) {
  for (i in c(1:10)) {
    # Test data
    test_nodes <- l_test_nodes[[i]]
    test_nodes_index <- which(tree$tip.label %in% test_nodes)
    # Test data + NA the trait of TIP below a threshold phylogenetic distance.
    rm_tips <- c()
    for (t in test_nodes) {
      tmp <- tree$tip.label[which(distance_matrix[t,] < threshold)]
      rm_tips <- c(rm_tips, tmp)
    }
    ref_trait <- data.frame(trait)
    ref_trait[rm_tips,] <- NA
    ref_trait[ref_trait == ""] <- NA
    # HSP
    # Sort by species_tax_id
    c_ref_trait <- Filter(is.integer, ref_trait)
    c_ref_trait <- c_ref_trait + 1
    c_predict_out_lik <- lapply(c_ref_trait[tree$tip.label,],
                          function(x) {
                              hsp_study_probs(
                              in_trait = x,
                              in_tree = tree,
                              unknown_i = test_nodes_index,
                              check_input = TRUE)})
    c_predict_out <- lapply(c_predict_out_lik,
                             function(x) { # 0-originに戻す
                                 as.numeric(colnames(x[[1]])[max.col(x[[1]])]) - 1
                            })
    transition_matrix <- lapply(c_predict_out_lik,
                             function(x) { x[[2]] })
    # # Record the transition matrix
    # for (c in ct) {
    #   tm <- transition_matrix[[c]]
    #   out_path <- make_path(
    #     prefix = "../../data/2025-03-24/transition_matrix_SYM",
    #     trait = c,
    #     threshold = threshold,
    #     iteration = i)
    #   write.table(tm, file=out_path, row.names=FALSE, col.names=FALSE, quote=FALSE, sep="\t")
    # }
    c_hsp_res <- as.data.frame(c_predict_out)
    c_hsp_res$species_tax_id <- tree$tip.label[test_nodes_index]
    c_hsp_res$iteration <- i
    c_hsp_res$threshold <- threshold
    if (i == 1 && threshold == l_threshold[1]) {
      c_hsp_res_all <- c_hsp_res
    } else {
      c_hsp_res_all <- rbind(c_hsp_res_all, c_hsp_res)
    }
  }
}

res_path <- "hsp_results/hsp_result_mk_SYM.tsv"
write.table(x=c_hsp_res_all, file=res_path, sep="\t", na="", row.names=F, quote=F)

In [None]:
# Malkov model, rate model: ARD
hsp_study_probs <- function(in_trait, in_tree ,unknown_i, check_input) { 

  mp_hsp_out <- hsp_mk_model(
                    tree = in_tree,
                    tip_states = in_trait,
                    Nstates = max(in_trait[!is.na(in_trait)]),
                    rate_model = "ARD",
                    include_likelihoods = TRUE,
                    root_prior = "empirical",
                    Ntrials = 1,
                    check_input=check_input
                )

  sorted_prob <- get_sorted_prob(mp_hsp_out$likelihoods,
                                 study_tips_i=unknown_i,
                                 tree_tips=in_tree$tip.label)
  transition_matrix <- mp_hsp_out$transition_matrix

  return(list(sorted_prob, transition_matrix))
}

make_path <- function(prefix, trait, threshold, iteration) {
  return(paste0(paste(paste(paste(prefix, sep="/", c), sep="_", threshold), sep="_", iteration), ".tsv"))
}

l_threshold <- c(0.001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
for (threshold in l_threshold) {
  for (i in c(1:10)) {
    # Test data
    test_nodes <- l_test_nodes[[i]]
    test_nodes_index <- which(tree$tip.label %in% test_nodes)
    # Test data + NA the trait of TIP below a threshold phylogenetic distance.
    rm_tips <- c()
    for (t in test_nodes) {
      tmp <- tree$tip.label[which(distance_matrix[t,] < threshold)]
      rm_tips <- c(rm_tips, tmp)
    }
    ref_trait <- data.frame(trait)
    ref_trait[rm_tips,] <- NA
    ref_trait[ref_trait == ""] <- NA
    # HSP
    # Sort by species_tax_id
    c_ref_trait <- Filter(is.integer, ref_trait)
    c_ref_trait <- c_ref_trait + 1
    c_predict_out_lik <- lapply(c_ref_trait[tree$tip.label,],
                          function(x) {
                              hsp_study_probs(
                              in_trait = x,
                              in_tree = tree,
                              unknown_i = test_nodes_index,
                              check_input = TRUE)})
    c_predict_out <- lapply(c_predict_out_lik,
                             function(x) { # 0-originに戻す
                                 as.numeric(colnames(x[[1]])[max.col(x[[1]])]) - 1
                            })
    transition_matrix <- lapply(c_predict_out_lik,
                             function(x) { x[[2]] })
    # # Record the transition matrix
    # for (c in ct) {
    #   tm <- transition_matrix[[c]]
    #   out_path <- make_path(
    #     prefix = "../../data/2025-03-24/transition_matrix_ARD",
    #     trait = c,
    #     threshold = threshold,
    #     iteration = i)
    #   write.table(tm, file=out_path, row.names=FALSE, col.names=FALSE, quote=FALSE, sep="\t")
    # }
    c_hsp_res <- as.data.frame(c_predict_out)
    c_hsp_res$species_tax_id <- tree$tip.label[test_nodes_index]
    c_hsp_res$iteration <- i
    c_hsp_res$threshold <- threshold
    if (i == 1 && threshold == l_threshold[1]) {
      c_hsp_res_all <- c_hsp_res
    } else {
      c_hsp_res_all <- rbind(c_hsp_res_all, c_hsp_res)
    }
  }
}

res_path <- "hsp_results/hsp_result_mk_ARD.tsv"
write.table(x=c_hsp_res_all, file=res_path, sep="\t", na="", row.names=F, quote=F)