# Simulate Tree, Transition Matrix $P$

In [61]:
import os
import pickle
import numpy as np
import cassiopeia as cas

from cassiopeia.sim import BirthDeathFitnessSimulator
from cassiopeia.sim import Cas9LineageTracingDataSimulator

seed =10
rng = np.random.default_rng(seed)
path = '../data' # relative path to data directory

def simulate_evolutionary_tree():
    # instantiate BirthDeathFitnessSimulator() object and simulate tree
    bd_sim = cas.sim.BirthDeathFitnessSimulator(
        birth_waiting_distribution = lambda scale: rng.exponential(scale),
        initial_birth_scale = 0.5,
        death_waiting_distribution = lambda: rng.exponential(1.5),
        mutation_distribution = lambda: 1 if rng.uniform() < 0.5 else 0,
        fitness_distribution = lambda: rng.normal(0, .5),
        fitness_base = 1.3,
        num_extant = 40,
        random_seed=seed
    )
    tree = bd_sim.simulate_tree()
    return tree

def lineage_tracing(tree, params, num_sites, num_states):
   # instantiate Cas9 lineage tracing object & overlay data onto ground_truth_tree
    lt_sim = cas.sim.Cas9LineageTracingDataSimulator(
        number_of_cassettes = num_sites,
        size_of_cassette = 1,
        mutation_rate = params['mutation_rate'],
        state_generating_distribution = None,
        number_of_states = num_states,
        state_priors = params['transition_prob'], # must be dict
        heritable_silencing_rate = params['deletion_rate'],
        stochastic_silencing_rate = 0.1,
        heritable_missing_data_state = -1,
        stochastic_missing_data_state = -1,
        random_seed = seed
    )
    lt_sim.overlay_data(tree)    

In [62]:
def get_tree():
    # hyper-parameters and parameters
    num_sites = 3 
    num_states = 20
    params = {'mutation_rate': np.repeat(0.1, num_sites),                       # mutation rates [λM_1, λM_2, ..., λM_NumSites]
                'deletion_rate': np.array([9e-4]),                                # deletion rate λD
                'transition_prob': {i: 1/num_states for i in range(num_states)}}  # simplex P = [p_1 ... p_NumStates]
                # ^ probability p_i of transitioning from unedited state to mutated state i

    # simulate evolutionary tree and overly CRISPR-Cas9 data on top of it
    tree = simulate_evolutionary_tree()
    lineage_tracing(tree, params, num_sites, num_states)
    return tree

In [63]:
def get_transition_matricies(num_sites, num_states):
    dim = num_sites + num_states + 1
    transition_matricies = [rng.normal(size=(dim, dim)) for _ in range(num_sites)]
    return transition_matricies

# Felsenstein's Prunning Algorithm

In [64]:
# generate artificial input parameters for Felsenstein's algo
tree = get_tree()
num_sites = 3 
num_states = 20

transition_matricies = get_transition_matricies(num_sites, num_states)
    
fig = cas.pl.plot_plotly(tree, random_state=seed)
fig.show()

In [49]:
# postorder traversal of nodes
for node in tree.depth_first_traverse_nodes(postorder=True):
    # if leaf node
    if tree.is_leaf(node):
        state = tree.get_character_states(node) # find character state at this leaf node
        
        # likelihood of observing state at this node 
        likelihood = np.zeros((num_states, num_sites)) # zero everywhere
        likelihood[state, np.arange(num_sites)] = 1 # except for 1 at the leaf state
        
    # if not leaf node
    else:
        pass
    
    tree.set_attribute(node, 'felsenstein_likelihood', likelihood) # store likelihood

[12, 0, 0]
[[0. 1. 1.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [1. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
