<a href="https://colab.research.google.com/github/emilydolson/alife-phylogeny-tutorial/blob/main/hstrat_ping_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# set up environment
!find . -name . -o -prune -exec rm -rf -- {} +
!git init
!git remote add origin https://github.com/mmore500/alife-phylogeny-tutorial.git
!git pull origin main
!python3 -m pip install -r requirements.txt

In [None]:
import random
import string
import typing

import alifedata_phyloinformatics_convert as apc
from hstrat import hstrat
import pandas as pd
import typing_extensions
from tqdm import tqdm

import pylib  # local Python library @ ./pylib/

# Configure Parameters

In [None]:
# how many characters can genomes' domain string be?
TARGET_DOMAIN_LEN: int = 4
CHAR_MUTATE_RATE: float = 0.1
N_POP: int = 8
N_GEN: int = 10

# how many copies can each genome make of itself
# per reproduction event
# i.e., how many outgoing pings to send at once
PING_COPY_COUNT: int = 2

# use 1 byte differentia values for hstrat instrumentation
DIFFERENTIA_BIT_WIDTH: int = 8

# Choose Retention Policy

In [None]:
# suppose a 32 byte size budget for ping payload...
# how big can our hstrat column be?
#
# with 4 bytes for target domain string
# and 4 bytes for generation counter
#
# 24 bytes left...
# so @ 1 byte per differentia,
# we can accomodate up to 24 differentia

# curbed recency-proportional resolution
# stratum retention algorithm
# is a good go-to choice for space-constrained
# evolutionary applications of hstrat
ping_stratum_retention_policy = (
    hstrat.recency_proportional_resolution_curbed_algo.Policy(
        size_curb=24,  # max num differentia retained at any one time
    )
)

# Define Genome


In [None]:
class PingGenome:

    # where to ping this genome against
    target_domain: str

    # instrumentation to facilitate phylogenetic inference
    hstrat_column: hstrat.HereditaryStratigraphicColumn

    def __init__(
        self: "PingGenome",
        target_domain: typing.Optional[str] = None,
        hstrat_column: typing.Optional = None,
    ):
        if target_domain is None:
            # create random target domain
            target_domain = "".join(
                random.choice(string.ascii_lowercase)
                for __ in range(TARGET_DOMAIN_LEN)
            )
        self.target_domain = target_domain

        if hstrat_column is None:
            self.hstrat_column = hstrat.HereditaryStratigraphicColumn(
                # stratum_retention_policy: typing.Any
                # Policy struct that specifies the set of strata ranks
                # that should be pruned from a hereditary
                # stratigraphic column when the nth stratum is deposited.
                stratum_retention_policy=ping_stratum_retention_policy,
                # always_store_rank_in_stratum : bool, optional
                # Should the deposition rank be stored as a data member of generated
                # strata, even if not strictly necessary?
                always_store_rank_in_stratum=False,
                # stratum_differentia_bit_width : int, optional
                # The bit width of the generated differentia. Default 64, allowing
                # for 2^64 distinct values.
                stratum_differentia_bit_width=DIFFERENTIA_BIT_WIDTH,
            )
        else:
            self.hstrat_column = hstrat_column

    def mutate(self: "PingGenome") -> None:
        # for each target_domain character,
        # apply a scramble event with CHAR_MUTATE_RATE probability
        self.target_domain = "".join(
            random.choice(string.ascii_lowercase)
            if random.random() < CHAR_MUTATE_RATE
            else char
            for char in self.target_domain
        )

    def create_offspring(self: "PingGenome") -> "PingGenome":
        offspring = PingGenome(
            target_domain=self.target_domain,  # inherit target_domain
            hstrat_column=(
                # register elapsed generation w/ hstrat instrumentation,
                # then pass instrumentation along to offspring
                self.hstrat_column.CloneDescendant()
            ),
        )
        offspring.mutate()  # mutate target_domain
        return offspring

    def to_packet(self: "PingGenome") -> typing_extensions.Buffer:
        # serialize genome to a binary string
        # that can be transmitted within ping payload
        annotation_packet_bytes = hstrat.col_to_packet(self.hstrat_column)
        return self.target_domain.encode() + annotation_packet_bytes

    @staticmethod
    def from_packet(data: typing_extensions.Buffer) -> "PingGenome":
        # deserialize genome from a binary string
        # i.e., extracted from a ping payload

        # first TARGET_DOMAIN_LEN bytes are target_domain string
        target_domain = data[:TARGET_DOMAIN_LEN].decode()

        # all the rest is the hstrat instrumentation
        hstrat_column = hstrat.col_from_packet_buffer(
            packet_buffer=data[TARGET_DOMAIN_LEN:],
            differentia_bit_width=DIFFERENTIA_BIT_WIDTH,
            stratum_retention_policy=ping_stratum_retention_policy,
        )

        # put deserialized components together into a genome object
        return PingGenome(
            target_domain=target_domain,
            hstrat_column=hstrat_column,
        )

# Define Selection

In [None]:
# process one generation of evolution
# on a population of PingGenome's
# and return "winning" offspring who made it back first
# as the next population
def elapse_generation(
    population: typing.List[PingGenome],
) -> typing.List[PingGenome]:

    # manages socket resources, etc.
    pinger = pylib.PayloadPinger()

    # loop until we get enough packets back
    # to fill next population to same size as current population
    next_population_packets: typing.List[typing_extensions.Buffer] = []
    while len(next_population_packets) < len(population):

        # how many more packets do we need?
        num_empty_next_population_slots = len(population) - len(
            next_population_packets
        )

        # dispatch ping requests
        for __ in range(num_empty_next_population_slots):

            # selection is random among current population
            selection = random.choice(population)
            # create several offspring and dispatch into ping payloads
            for __ in range(PING_COPY_COUNT):
                # create_offspring makes genome copy, applies mutation,
                # & registers elapsed generation w/ hstrat instrumentaiton
                offspring = selection.create_offspring()

                # figure out where offspring points to
                # and dispatch it as a ping payload
                target_url = offspring.target_domain + ".com"
                pinger.send(target_url, offspring.to_packet())
                # log request event
                print(f"---> packet sent to {target_url}")

        # collect all available ping responses
        # & extact their payloads into next_population_packets
        # until we have enough packets for next population
        while len(next_population_packets) < len(population):

            maybe_packet = pinger.read()
            if maybe_packet is None:
                break  # no more ping responses to read right now
            else:
                next_population_packets.append(maybe_packet)

            # log response event
            packet_domain = maybe_packet[:TARGET_DOMAIN_LEN].decode()
            print(f" <=== packet returned from {packet_domain}")

    # deserialize packets back into genome objects
    next_population: typing.List[PingGenome] = [
        PingGenome.from_packet(packet) for packet in next_population_packets
    ]
    return next_population

# Do Evolution

In [None]:
# create a common ancestor
common_ancestor = PingGenome()

# initialize population with offspring of common ancestor
population = [common_ancestor.create_offspring() for __ in range(N_POP)]

# update population N_GEN times
for __ in tqdm(range(N_GEN)):
    population = elapse_generation(population)

# Extract Annotations and Build Tree

In [None]:
# hstrat instrumentation from population at end of simulation
extant_annotations = [
    # extract hstrat columns from genomes
    # & freeze dynamic instrumentation as "specimens,"
    # which are optimized for postprocessing analysis
    hstrat.col_to_specimen(genome.hstrat_column)
    for genome in population
]

# estimated_phylogeny is stored in alife data standards format
# https://alife-data-standards.github.io/alife-data-standards/phylogeny.html
estimated_phylogeny: pd.DataFrame = hstrat.build_tree(
    population=extant_annotations,
    taxon_labels=[genome.target_domain for genome in population],
    # the `build_tree` function tracks the current best-known general
    # purpose reconstruction algorithm
    # pin to the current version (e.g., "1.7.2") for long-term stability
    # or pin to hstrat.__version__ to track latest algorithm updates
    version_pin=hstrat.__version__,
)

# Visualize Phylogeny

In [None]:
# translate to dendropy (which provides lots of phylogenetics tools)
# via alifedata phyloinformatics conversion tool
dendropy_tree = apc.alife_dataframe_to_dendropy_tree(
    estimated_phylogeny,
    setup_edge_lengths=True,
)

# draw the reconstruction!
print(dendropy_tree.as_ascii_plot(plot_metric="age"))