In [28]:
import os
import pickle
import numpy as np
import torch
import heracles
import gurobi

from heracles.main2 import main
from heracles.metrics import cas_triplets_correct
from simulate_data import simulate_data
from cassiopeia.data import CassiopeiaTree
from cassiopeia.solver import VanillaGreedySolver, ILPSolver
from cassiopeia.critique import triplets_correct

# automatically reload modules
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [29]:
# simulate data
num_states=40
num_sites=40 # target sites aka characters
mutation_rate=0.025
deletion_rate=9e-4    
transition_prob = {i: 1/num_states for i in range(num_states)}
missing_data = 0.2
exp_time = 11 # experiment time aka depth of tree
path = '../heracles/data'

simulate_data(transition_prob, num_sites, num_states, mutation_rate, deletion_rate,
              missing_data, exp_time, path)

In [30]:
# load data
fname = os.path.join(path, 'true_tree')
with open(fname, 'rb') as file:
    true_tree = pickle.load(file)
    
fname = os.path.join(path, 'params')
with open(fname, 'rb') as file:
    params = pickle.load(file)

In [31]:
print('Num nodes: ', len(true_tree.nodes))
print('Max Depth: ', true_tree.get_max_depth_of_tree())
print('Avg Depth: ', true_tree.get_mean_depth_of_tree())

Num nodes:  1000
Max Depth:  4.974889468915673
Avg Depth:  4.974889468915673


In [32]:
cas_tree = CassiopeiaTree(character_matrix=true_tree.character_matrix, priors=None)
# vanilla_greedy = VanillaGreedySolver()
# vanilla_greedy.solve(cas_tree, collapse_mutationless_edges=True)
ilp = ILPSolver()
ilp.solve(cas_tree)
triplets = triplets_correct(true_tree, cas_tree)
cas_ans = np.mean(list(triplets[0].values()))

[2023-04-27 22:40:04,632]    INFO [ILPSolver] Solving tree with the following parameters.
[2023-04-27 22:40:04,633]    INFO [ILPSolver] Convergence time limit: 12600
[2023-04-27 22:40:04,633]    INFO [ILPSolver] Convergence iteration limit: 0
[2023-04-27 22:40:04,634]    INFO [ILPSolver] Max potential graph layer size: 10000
[2023-04-27 22:40:04,634]    INFO [ILPSolver] Max potential graph lca distance: None
[2023-04-27 22:40:04,634]    INFO [ILPSolver] MIP gap: 0.01
[2023-04-27 22:40:04,641]    INFO [ILPSolver] Phylogenetic root: (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
[2023-04-27 22:40:04,645]    INFO [ILPSolver] (Process: 32b24e9ec792094647f6080001ac26d5) Estimating a potential graph with a maximum layer size of 10000 and a maximum LCA distance of 11.
[2023-04-27 22:40:17,027]    INFO [ILPSolver] (Process: 32b24e9ec792094647f6080001ac26d5) LCA distance 0 completed with a neighborhood size of 274.
[2023-

Set parameter Username
Academic license - for non-commercial use only - expires 2024-04-26


[2023-04-28 02:47:26,696]    INFO [ILPSolver] (Process 32b24e9ec792094647f6080001ac26d5) Steiner tree solving tool 0 days, 3 hours, 219 minutes, and 33 seconds.


RecursionError: maximum recursion depth exceeded while calling a Python object

In [9]:
char_matrix = true_tree.character_matrix
mutation_rate = params['mutation_rate']
deletion_rate = params['deletion_rate']
transition_prob = params['transition_prob']
seed = 0
num_epochs = 30
lr = 5e-2
embedding_dim = 3
rho = 2
stabilize = 1
est_tree_method = 'neighbor-joining'
true_tree = true_tree

best_embeddings = main(char_matrix, mutation_rate, deletion_rate, transition_prob,
                       seed, num_epochs, lr, embedding_dim, rho, stabilize, est_tree_method, true_tree)
heracles_ans = cas_triplets_correct(true_tree, best_embeddings, rho)

In [10]:
print('Cassiopeia: ', cas_ans)
print('Heracles: ', heracles_ans)

Cassiopeia:  0.45799999999999996
Heracles:  0.4651666666666667
