## Dependencies

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Data Loading

In [None]:
def load_variants_and_summaries(folder: os.PathLike) -> pd.DataFrame:
    dfs = []
    for root, _, filenames in os.walk(folder):
        for filename in filenames:
            if filename.endswith('.evo'):
                filepath = os.path.join(root, filename)
                variant = os.path.dirname(filepath)[len(folder)+1:]
                df = pd.read_csv(filepath, sep='\t')
                df = df.set_index('generation_id')
                dfs.append((variant, df))
    return dict(dfs)

In [None]:
summary_per_variant = load_variants_and_summaries(folder='../evolution')

In [None]:
[(k, len(v)) for (k, v) in summary_per_variant.items()]

# Probability of Leap

In [None]:
def pool_series(ticks, values, fn):
    out = []
    prev_i = 0
    for i in ticks:
        section = list(values[prev_i:i])
        value = fn(section)
        out.append(value)
        prev_i = i
    return out

def max_pool_series(ticks, values):
    return pool_series(ticks, values, np.max)

def mean_pool_series(ticks, values):
    return pool_series(ticks, values, np.mean)

## Genetic Material over Generations

In [None]:
def plot_genetic_material_over_generations(df: pd.DataFrame, figlen: int = 8):
    
    def annotated_area(ax, ticks, values: pd.Series, color: str, label: str):
        values = mean_pool_series(ticks, values)
        ax.set_xticks(ticks)
        ax.fill_between(ticks, values, color=color, edgecolor='#fff', label=label)
        ax.plot(ticks, values, color="#ffffff", marker='o')

    def annotated_candles(ax, ticks, highs: pd.Series, lows: pd.Series, color: str, label: str):
        highs = mean_pool_series(ticks, highs)
        lows = mean_pool_series(ticks, lows)
        heights = [highs[i] - lows[i] for i in range(len(highs))]
        ax.set_xticks(ticks)
        ax.bar(ticks, height=heights, bottom=lows, width=2.5, color=color + 'cc', edgecolor=color, label=label)
        for i, h, l in zip(ticks, highs, lows):
            ax.text(i, h + 0.15, str(round(h, 2)), ha="center", va="center", bbox=dict(boxstyle="round", fc="w", ec="#ccc", lw=1))
            ax.text(i, l - 0.15, str(round(l, 2)), ha="center", va="center", bbox=dict(boxstyle="round", fc="w", ec="#ccc", lw=1))

    tickslen = len(df.index)
    ticksstep = max(1, int(tickslen / 15))
    ticks = np.arange(1, tickslen + 1, ticksstep)
    fig, ax = plt.subplots(figsize=(figlen, figlen/2))
    annotated_area(ax, ticks, df.entropy_dna_pool, color='#003f5c44', label='entropy (dna)')
    annotated_candles(ax, ticks, df.genes_max, df.genes_min, color='#58508d', label='genes')
    ax.set_facecolor("#eee")
    ax.legend(loc='lower center', bbox_to_anchor=(0.5, -0.3), ncol=4)
    ax.grid(True)
    ax.set_xlabel('generation')
    ax.set_ylabel('gene counts & entropy score')
    fig.subplots_adjust(bottom=0.25)
    fig.tight_layout()

#plot_genetic_material_over_generations(df=summary_per_variant.get('tests'))

In [None]:
plot_genetic_material_over_generations(df=summary_per_variant.get('atavism=0.0/pmr=0.1/pma=-0.15'))

In [None]:
plot_genetic_material_over_generations(df=summary_per_variant.get('atavism=0.0/pmr=0.1/pma=-0.05'))

In [None]:
plot_genetic_material_over_generations(df=summary_per_variant.get('atavism=0.0/pmr=0.1/pma=0.05'))

In [None]:
plot_genetic_material_over_generations(df=summary_per_variant.get('atavism=0.0/pmr=0.1/pma=0.15'))

## Fitness over Generations

In [None]:
def calculate_leap_series(highs: pd.Series) -> pd.Series:
    leaps = []
    for i, high in enumerate(highs):
        seq = highs[0:i]
        leap = 0.
        if i > 0 and len(seq) > 0:
            max_prev_high = max(seq)
            if max_prev_high > 0 and high > max_prev_high:
                leap = 1.
        leaps.append(leap)
    return pd.Series(leaps)

In [None]:
def plot_fitness_over_generations(df: pd.DataFrame, figlen: int = 8, meanfit_yadjust: int = 0):
    from numpy import trapz
    def annotated_line(ax, ticks, values: pd.Series, color: str, label: str, ydist: float = None, pool_type: str = 'mean'):
        values = mean_pool_series(ticks, values) if pool_type else max_pool_series(ticks, values)
        ax.set_xticks(ticks)
        ax.plot(ticks, values, marker='o', color=color, label=label)
        ydist = max(values) * ydist
        for i, v in zip(ticks, values):
            ax.text(i, v + ydist, str(round(v, 2)), ha="center", va="center", bbox=dict(boxstyle="round", fc="w", ec="#ccc", lw=1))
    def annotated_bar(ax, ticks, values: pd.Series, color: str, label: str, ydist: int):
        values = mean_pool_series(ticks, values)
        ax.set_xticks(ticks)
        ax.bar(ticks, height=values, color=color, edgecolor='#fff', width=5., label=label)
        for i, v in zip(ticks, values):
            ax.text(i, v + ydist, str(round(v, 2)), ha="center", va="center", bbox=dict(boxstyle="round", fc="#ffac00", ec="#ccc", lw=1))
    tickslen = len(df.index)
    ticksstep = max(1, int(tickslen / 15))
    ticks = np.arange(1, tickslen + 1, ticksstep)
    df['leaps'] = calculate_leap_series(df.fitness_mean)
    
    normalised_leaps = mean_pool_series(ticks, df.leaps)
    print(f"Total Leaps: {len(df[df.leaps > 0])}")
    print(f"AUC of Lel(k): {trapz(normalised_leaps)}")
    print(f"Highest Fitness Scores: {float(df.fitness_highest.max())}")
    print(f"Mean Entropy of DNA Pool: {float(df.entropy_dna_pool.mean())}")
    print(f"Mean Entropy of Fitness Scores: {float(df.entropy_fitness_scores.mean())}")

    fig, ax = plt.subplots(figsize=(figlen, figlen))
    ax.set_facecolor("#eee")
    ax.grid(True)
    ax.set_xlabel('generation')
    ax.set_ylabel('lel(k)')
    annotated_bar(ax, ticks, df.leaps, color='#bc509044', label='lel(k)', ydist=0.025)
    ax.legend(loc='lower right', bbox_to_anchor=(0.5, -0.125), ncol=4)
    
    ax2 = ax.twinx()
    ax2.set_ylabel('fitness score & entropy score')
    annotated_line(ax2, ticks, df.fitness_highest, color='#003f5c', label='fitness (highest)', ydist=0.025, pool_type='max')
    annotated_line(ax2, ticks, df.fitness_mean, color='#58508d', label='fitness (mean)', ydist=0.45 + meanfit_yadjust)
    ax2.legend(loc='lower left', bbox_to_anchor=(0.5, -0.125), ncol=4)
    
    fig.subplots_adjust(bottom=0.25)
    fig.tight_layout()

#plot_fitness_over_generations(df=summary_per_variant.get('tests'))

In [None]:
plot_fitness_over_generations(df=summary_per_variant.get('atavism=0.0/pmr=0.1/pma=-0.15'), meanfit_yadjust=0.2)

In [None]:
plot_fitness_over_generations(df=summary_per_variant.get('atavism=0.0/pmr=0.1/pma=-0.05'))

In [None]:
plot_fitness_over_generations(df=summary_per_variant.get('atavism=0.0/pmr=0.1/pma=0.05'), meanfit_yadjust=-0.35)

In [None]:
plot_fitness_over_generations(df=summary_per_variant.get('atavism=0.0/pmr=0.1/pma=0.15'))

In [None]:
plot_fitness_over_generations(df=summary_per_variant.get('atavism=0.0/pmr=0.25/pma=-0.15'), meanfit_yadjust=0.2)

In [None]:
plot_fitness_over_generations(df=summary_per_variant.get('atavism=0.0/pmr=0.25/pma=-0.05'))

In [None]:
plot_fitness_over_generations(df=summary_per_variant.get('atavism=0.0/pmr=0.25/pma=0.05'), meanfit_yadjust=-0.375)

In [None]:
plot_fitness_over_generations(df=summary_per_variant.get('atavism=0.0/pmr=0.25/pma=0.15'), meanfit_yadjust=-0.375)