In [None]:
import os
import sys

import msprime
import numpy as np

# import local phylokit modules
phylokit_path = os.path.abspath(os.path.join(os.pardir))
if phylokit_path not in sys.path:
    sys.path.append(phylokit_path)

import phylokit as pk

# 1. Setup Client

### Option 1 -> Using dask distributed

In [None]:
# from dask.distributed import Client
# Client = Client("127.0.0.1:8786") # the ip address of the scheduler

### Option 2 -> Using a local cluster

In [None]:
from dask.distributed import Client
from dask.distributed import LocalCluster

localcluster = LocalCluster(n_workers=5, threads_per_worker=1)
client = Client(localcluster)
client

### Option 3 -> Using coiled.io 

In [None]:
# from coiled import Cluster
# from dask.distributed import Client

# with Cluster(name="test", n_workers=1, package_sync=True) as cluster:
#     client = Client(cluster)
#     client

# 2. Load Your Tree Sequence

### Option 1 -> Simulate a tree sequence

In [None]:
def simulate_ts(num_samples, num_sites, seed=1234):
    tsa = msprime.sim_ancestry(
        num_samples, sequence_length=num_sites, ploidy=1, random_seed=seed
    )
    return msprime.sim_mutations(tsa, rate=0.01, random_seed=seed)

def create_mutation_tree(num_samples, sequence_length, chunk_size, seed=1234):
    ts_in = simulate_ts(num_samples, sequence_length, seed=seed)
    pk_mts = pk.parsimony.hartigan.ts_to_dataset(ts_in, chunks=chunk_size)
    ds_in = pk.from_tskit(ts_in.first())
    ds = ds_in.merge(pk_mts)
    return ds

pk_mts = create_mutation_tree(10000, 1000, 100)
pk_mts

### Option 2 -> Open a TreeSequence dataset from (e.g. local disk, s3, etc.)

In [None]:
# pk_mts = pk.open_dataset("/YOUR/PATH/HERE/dataset/tree.zarr")
# pk_mts

# 3. Calculate

In [None]:
x = pk.get_hartigan_parsimony_score(pk_mts)
result = x.compute()
result

In [None]:
y = pk.append_parsimony_score(pk_mts)
result = y.compute()
result