In [1]:
import black
import jupyter_black

jupyter_black.load(
    lab=True,
    line_length=110,
    target_version=black.TargetVersion.PY310,
)

In [2]:
import os
import re

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd

from matplotlib.colors import BoundaryNorm, ListedColormap

import lysis

pd.reset_option("display.precision")
pd.set_option("display.float_format", lambda x: f"{x:,.2f}")

In [3]:
experiment_type = np.dtype(
    [
        ("descriptor", np.str_, 40),
        ("file_code", np.str_, 40),
        ("forced_unbind", np.float_),
        ("average_bind_time", np.float_),
    ]
)
code_type = np.dtype([("file_code", np.str_, 40), ("descriptor", np.str_, 40), ("executable", np.str_, 40)])
run_type = np.dtype(
    [
        ("exp_code", np.str_, 15),
        ("experiment", np.str_, 40),
        ("code", np.str_, 40),
        ("seed", int),
        ("running_time", int),
    ]
)

In [4]:
experiments = np.array(
    [
        ("Physiological Kd", "", 8.52e-2, 27.8),
        ("10x bigger", "_Kd0236", 5.4e-3, 2.78),
        ("10x smaller", "_Kd00020036", 0.5143, 277.8),
    ],
    dtype=experiment_type,
)
programs = np.array(
    [
        ("_always", "Always bind", "macro_Q2_always_rebind"),
        ("_along", "Diffuse along clot", "macro_Q2_diffuse_along"),
        ("_into", "Diffuse into clot", "macro_Q2_diffuse_into"),
        (
            "_into_and_along",
            "Diffuse into and along clot - BUGGED",
            "macro_Q2_diffuse_into_and_along",
        ),
        (
            "_into_and_along_fixed",
            "Diffuse into and along clot - FIXED",
            "macro_Q2_diffuse_into_and_along_fixed",
        ),
    ],
    dtype=code_type,
)
runs = np.empty(15, dtype=run_type)

In [5]:
in_file_code = "_PLG2_tPA01{data_code}_Q2.dat"
out_file_code = "_PLG2_tPA01{data_code}{program_code}_Q2.dat"

slope_tolerance = 1e-3

In [6]:
runs = np.array(
    [
        ("2023-02-01-2000", "Physiological Kd", "Diffuse along clot", 17109424, 1200),
        ("2023-02-01-2001", "Physiological Kd", "Always bind", 9965734, 1800),
        ("2023-02-01-2002", "Physiological Kd", "Diffuse into and along clot - BUGGED", -2137354075, 1200),
        ("2023-02-01-2003", "Physiological Kd", "Diffuse into and along clot - FIXED", -2137354075, 1800),
        ("2023-02-01-2004", "Physiological Kd", "Diffuse into clot", -2135977853, 1200),
        ("2023-02-01-2005", "10x smaller", "Diffuse along clot", -848304637, 1200),
        ("2023-02-01-2006", "10x smaller", "Always bind", 1299539472, 1800),
        ("2023-02-01-2007", "10x smaller", "Diffuse into and along clot - BUGGED", -854989241, 1200),
        ("2023-02-01-2008", "10x smaller", "Diffuse into and along clot - FIXED", -854989241, 1800),
        ("2023-02-01-2009", "10x smaller", "Diffuse into clot", -850336215, 1200),
        ("2023-02-01-2010", "10x bigger", "Diffuse along clot", -1216563743, 1200),
        ("2023-02-01-2011", "10x bigger", "Always bind", 669985532, 900),
        ("2023-02-01-2012", "10x bigger", "Diffuse into and along clot - BUGGED", -1212172957, 1200),
        ("2023-02-01-2013", "10x bigger", "Diffuse into and along clot - FIXED", -1212172957, 1800),
        ("2023-02-01-2014", "10x bigger", "Diffuse into clot", -1213352577, 1200),
    ],
    dtype=run_type,
)

In [7]:
index = pd.MultiIndex.from_product(
    [experiments["descriptor"], programs["descriptor"]], names=["data", "program"]
)
# index = [run['experiment'] + " - " + run['code'] for run in runs]
statistics = ["Mean front velocity", "Mean of Standard Deviation of front velocity"]
results = pd.DataFrame(index=index, columns=statistics)
#front_velocity_table = pd.DataFrame(index=programs["descriptor"], columns=experiments["descriptor"])

In [8]:
def load_files(exp, file_code):
    deg = np.fromfile(os.path.join(e.os_path, "deg" + file_code))
    tsave = np.fromfile(os.path.join(e.os_path, "tsave" + file_code))
    mfpt = np.fromfile(os.path.join(e.os_path, "mfpt" + file_code))
    deg = deg.reshape(
        exp.macro_params.total_trials, exp.macro_params.number_of_saves, exp.macro_params.total_edges
    )
    tsave = tsave.reshape(exp.macro_params.total_trials, exp.macro_params.number_of_saves)
    return deg, tsave, mfpt


def map_fortran_deg(exp, deg):
    mapped_deg = -deg
    mapped_deg[deg == 0] = exp.macro_params.total_time + 1  # float("inf")
    mapped_deg[deg == -1] = 0
    return mapped_deg


def calculate_time_row_exposed(exp, deg):
    exposed_time = np.empty(
        (exp.macro_params.total_trials, exp.macro_params.rows - 1, exp.macro_params.cols), dtype=np.float_
    )
    for run in range(exp.macro_params.total_trials):
        for j in range(exp.macro_params.cols):
            for i in range(exp.macro_params.rows - 1):
                if i == 0:
                    exposed_time[run, i, j] = 0
                else:
                    k = lysis.to_fortran_edge_index(i, j, exp.macro_params.rows, exp.macro_params.cols)
                    exposed_time[run, i, j] = max(
                        exposed_time[run, i - 1, j], deg[run, exp.macro_params.number_of_saves - 1, k]
                    )
    # exposed_time = 10* np.ceil(exposed_time / 10)
    return exposed_time / 60


def find_degradation_fronts(exp, exposed_time, y_distance):
    deg_fronts = []
    for r in range(exp.macro_params.total_trials):
        run_deg_fronts = []
        for j in range(exp.macro_params.cols):
            col_deg_front = []
            for i in range(1, exp.macro_params.rows - 1):
                if exposed_time[r, i - 1, j] < exposed_time[r, i, j] < exp.macro_params.total_time + 1:
                    col_deg_front.append([exposed_time[r, i, j], y_distance[i]])
            run_deg_fronts.append(np.array(col_deg_front).T)
        deg_fronts.append(run_deg_fronts)
    return deg_fronts


# TODO(bpaynter): Change this later to do mean and std of all columns across all runs
def mean_front_velocity(exp, deg_fronts):
    run_mean_velocity = np.empty(exp.macro_params.total_trials, dtype=np.float_)
    run_std_velocity = np.empty(exp.macro_params.total_trials, dtype=np.float_)
    for run in range(exp.macro_params.total_trials):
        front_velocity = np.empty(exp.macro_params.cols, dtype=np.float_)
        for j in range(exp.macro_params.cols):
            b, m = np.polynomial.polynomial.polyfit(deg_fronts[run][j][0], deg_fronts[run][j][1], 1)
            front_velocity[j] = m
        run_mean_velocity[run] = np.mean(front_velocity)
        run_std_velocity[run] = np.std(front_velocity)
    return np.mean(run_mean_velocity), np.mean(run_std_velocity)


def plot_front_degradation(exp, file_code, deg_fronts, deg):
    fig = plt.figure(figsize=(7, 5))
    ax = fig.add_axes([0, 0, 1, 1])
    ax.set_axis_on()
    ax.set_xlim(0, (np.max(deg[:, -1, :]) // 60) + 1)
    ax.set_ylim(
        (exp.macro_params.empty_rows - 1) * e.macro_params.grid_node_distance,
        (exp.macro_params.rows - 1) * exp.macro_params.grid_node_distance,
    )
    for run in range(exp.macro_params.total_trials):
        for j in range(exp.macro_params.cols):
            plt.plot(deg_fronts[run][j][0], deg_fronts[run][j][1], linewidth=1)
    fig.savefig(os.path.join(exp.os_path, "deg_fronts" + file_code[:-4] + ".png"), bbox_inches="tight")
    plt.close()


def find_degraded_percent(exp, deg, tsave):
    degraded_percent = np.empty(
        (exp.macro_params.total_trials, exp.macro_params.number_of_saves), dtype=np.float_
    )
    for r in range(exp.macro_params.total_trials):
        for t in range(exp.macro_params.number_of_saves):
            degraded_percent[r, t] = np.count_nonzero(deg[r, t] <= tsave[r, t])
    degraded_percent -= exp.macro_params.empty_rows * exp.macro_params.full_row
    return degraded_percent / exp.macro_params.total_fibers


def mean_degradation_rate(exp, degraded_percent, tsave):
    slope = np.empty((exp.macro_params.total_trials, exp.macro_params.number_of_saves), dtype=np.float_)
    for r in range(exp.macro_params.total_trials):
        slope[r, 0] = degraded_percent[r, 0]
        for t in range(1, exp.macro_params.number_of_saves):
            slope[r, t] = degraded_percent[r, t] - degraded_percent[r, t - 1]
    degradation_happening = slope_tolerance <= slope
    degradation_rate = np.empty(exp.macro_params.total_trials, dtype=np.float_)
    offset = np.empty(exp.macro_params.total_trials, dtype=np.float_)
    for r in range(exp.macro_params.total_trials):
        b, m = np.polynomial.polynomial.polyfit(
            tsave[r][degradation_happening[r]] / 60, degraded_percent[r][degradation_happening[r]], 1
        )
        degradation_rate[r] = m
        offset[r] = b
    return degradation_rate, offset


def plot_degradation_percent(exp, degraded_percent, tsave, degradation_rate, offset):
    fig = plt.figure(figsize=(7, 5))
    ax = fig.add_axes([0, 0, 1, 1])
    ax.set_xlim(0, exp.macro_params.total_time / 60)
    ax.set_ylim(-0.1, 1.1)
    for r in range(exp.macro_params.total_trials):
        plt.plot(tsave[r] / 60, degraded_percent[r])
        plt.plot(
            np.arange(exp.macro_params.total_time / 60 + 1) * degradation_rate[r] + offset[r],
            color="b",
            alpha=0.5,
            zorder=0.1,
        )
    fig.savefig(os.path.join(exp.os_path, "deg_rate" + file_code[:-4] + ".png"), bbox_inches="tight")
    plt.close()


def get_unbind_amounts(exp):
    macro_unbind_pattern = re.compile(r"countmacrounbd=\s*(\d+)")
    micro_unbind_pattern = re.compile(r"countmicrounbd=\s*(\d+)")
    log_file_name = os.path.join(exp.os_path, "macro" + file_code[:-4] + ".txt")
    with open(log_file_name, "r") as file:
        log_text = file.read()
    macro_unbinds = re.findall(macro_unbind_pattern, log_text)
    micro_unbinds = re.findall(micro_unbind_pattern, log_text)
    return np.array(macro_unbinds, dtype=int), np.array(micro_unbinds, dtype=int)

In [9]:
for run in runs:
    prog = programs[programs["descriptor"] == run["code"]]
    exper = experiments[experiments["descriptor"] == run["experiment"]]
    e = lysis.util.Experiment(os.path.join("..", "..", "data"), experiment_code=run["exp_code"])
    e.read_file()
    y_distance = np.arange(e.macro_params.rows - 1) * e.macro_params.grid_node_distance
    file_code = out_file_code.format(data_code=exper["file_code"][0], program_code=prog["file_code"][0])
    print(run["exp_code"], file_code)
    deg, tsave, mfpt = load_files(e, file_code)
    deg = map_fortran_deg(e, deg)
    exposed_time = calculate_time_row_exposed(e, deg)
    deg_fronts = find_degradation_fronts(e, exposed_time, y_distance)
    plot_front_degradation(e, file_code, deg_fronts, deg)
    m, sd = mean_front_velocity(e, deg_fronts)
    results.loc[(run["experiment"], run["code"]), "Mean front velocity"] = m
    results.loc[(run["experiment"], run["code"]), "Mean of Standard Deviation of front velocity"] = sd

    deg_percent = find_degraded_percent(e, deg, tsave)
    results.loc[(run["experiment"], run["code"]), "Mean degradation percent"] = (
        np.mean(deg_percent[:, -1]) * 100
    )
    deg_rate, offset = mean_degradation_rate(e, deg_percent, tsave)
    results.loc[(run["experiment"], run["code"]), "Mean degradation rate"] = np.mean(deg_rate) * 100
    results.loc[(run["experiment"], run["code"]), "Standard deviation of degradation rate"] = (
        np.std(deg_rate) * 100
    )
    plot_degradation_percent(e, deg_percent, tsave, deg_rate, offset)

    results.loc[
        (run["experiment"], run["code"]), "Number of molecules that reached the back row"
    ] = np.count_nonzero(mfpt > 0)
    results.loc[(run["experiment"], run["code"]), "Percent of molecules that reached the back row"] = (
        np.count_nonzero(mfpt > 0) / e.macro_params.total_molecules * 100
    )
    results.loc[(run["experiment"], run["code"]), "Mean first passage time (min)"] = np.mean(
        mfpt[mfpt > 0] / 60
    )
    results.loc[(run["experiment"], run["code"]), "Standard deviation of first passage time"] = np.std(
        mfpt[mfpt > 0] / 60
    )
    macro_unbinds, micro_unbinds = get_unbind_amounts(e)
    if macro_unbinds.size > 0:
        results.loc[(run["experiment"], run["code"]), "Mean number of macroscale unbinds"] = np.mean(
            macro_unbinds
        )
        results.loc[
            (run["experiment"], run["code"]), "Standard deviation in number of macroscale unbinds"
        ] = np.std(macro_unbinds)
    if micro_unbinds.size > 0:
        results.loc[(run["experiment"], run["code"]), "Mean number of microscale unbinds"] = np.mean(
            micro_unbinds
        )
        results.loc[
            (run["experiment"], run["code"]), "Standard deviation in number of microscale unbinds"
        ] = np.std(micro_unbinds)

2023-02-01-2000 _PLG2_tPA01_along_Q2.dat
2023-02-01-2001 _PLG2_tPA01_always_Q2.dat
2023-02-01-2002 _PLG2_tPA01_into_and_along_Q2.dat
2023-02-01-2003 _PLG2_tPA01_into_and_along_fixed_Q2.dat
2023-02-01-2004 _PLG2_tPA01_into_Q2.dat
2023-02-01-2005 _PLG2_tPA01_Kd00020036_along_Q2.dat


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


2023-02-01-2006 _PLG2_tPA01_Kd00020036_always_Q2.dat
2023-02-01-2007 _PLG2_tPA01_Kd00020036_into_and_along_Q2.dat
2023-02-01-2008 _PLG2_tPA01_Kd00020036_into_and_along_fixed_Q2.dat
2023-02-01-2009 _PLG2_tPA01_Kd00020036_into_Q2.dat
2023-02-01-2010 _PLG2_tPA01_Kd0236_along_Q2.dat
2023-02-01-2011 _PLG2_tPA01_Kd0236_always_Q2.dat
2023-02-01-2012 _PLG2_tPA01_Kd0236_into_and_along_Q2.dat
2023-02-01-2013 _PLG2_tPA01_Kd0236_into_and_along_fixed_Q2.dat
2023-02-01-2014 _PLG2_tPA01_Kd0236_into_Q2.dat


In [27]:
compilations = {
    "Front Velocity": [
        "Mean front velocity",
        "Mean of Standard Deviation of front velocity",
    ],
    "Degradation Rate": ["Mean degradation rate", "Standard deviation of degradation rate"],
    "Mean First Passage Time": ["Mean first passage time (min)", "Standard deviation of first passage time"],
    "Macroscale Unbinds": [
        "Mean number of macroscale unbinds",
        "Standard deviation in number of macroscale unbinds",
    ],
    "Microscale Unbinds": [
        "Mean number of microscale unbinds",
        "Standard deviation in number of microscale unbinds",
    ],
}

In [46]:
which_display = "Macroscale Unbinds"
display = pd.DataFrame(index=results.index)
display = results[compilations[which_display]].apply(lambda x: f"{x[0]:.2f} \u00B1 {x[1]:.2f}", axis=1)
display.unstack(0).reindex(results.index.get_level_values(1).unique())[
    results.index.get_level_values(0).unique()
]

data,Physiological Kd,10x bigger,10x smaller
program,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Always bind,nan ± nan,nan ± nan,nan ± nan
Diffuse along clot,594867.50 ± 608.71,571004.10 ± 1204.67,154365.30 ± 169.41
Diffuse into clot,205717.10 ± 727.29,369550.00 ± 562.57,108361.40 ± 404.93
Diffuse into and along clot - BUGGED,205944.10 ± 722.66,369901.00 ± 657.76,108462.60 ± 414.49
Diffuse into and along clot - FIXED,434408.90 ± 2741.68,533335.00 ± 1322.27,213134.20 ± 289.18


Index(['Physiological Kd', '10x bigger', '10x smaller'], dtype='object', name='data')

In [11]:
results = results.astype({"Number of molecules that reached the back row": int})
results

Unnamed: 0_level_0,Unnamed: 1_level_0,Mean front velocity,Mean of Standard Deviation of front velocity,Mean degradation percent,Mean degradation rate,Standard deviation of degradation rate,Number of molecules that reached the back row,Percent of molecules that reached the back row,Mean first passage time (min),Standard deviation of first passage time,Mean number of macroscale unbinds,Standard deviation in number of macroscale unbinds,Mean number of microscale unbinds,Standard deviation in number of microscale unbinds
data,program,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Physiological Kd,Always bind,6.8,0.05,100.0,6.75,0.01,43068,99.99,15.11,1.15,,,,
Physiological Kd,Diffuse along clot,4.35,0.04,85.92,4.34,0.01,2,0.0,19.49,0.25,594867.5,608.71,48267.6,273.64
Physiological Kd,Diffuse into clot,10.93,1.78,100.0,17.97,0.16,43066,99.98,5.0,2.69,205717.1,727.29,30342.5,161.28
Physiological Kd,Diffuse into and along clot - BUGGED,10.93,1.76,100.0,18.01,0.18,43066,99.98,4.99,2.67,205944.1,722.66,30390.2,175.36
Physiological Kd,Diffuse into and along clot - FIXED,6.23,0.26,100.0,7.93,0.07,43065,99.98,12.72,3.49,434408.9,2741.68,40807.4,111.22
10x bigger,Always bind,10.64,0.16,100.0,10.57,0.01,42400,98.44,9.75,1.04,,,,
10x bigger,Diffuse along clot,9.44,0.15,100.0,9.4,0.02,42930,99.67,11.24,1.62,571004.1,1204.67,29781.2,66.56
10x bigger,Diffuse into clot,15.43,0.6,100.0,16.12,0.03,43060,99.97,6.68,1.7,369550.0,562.57,22862.3,135.25
10x bigger,Diffuse into and along clot - BUGGED,15.46,0.62,100.0,16.09,0.08,43053,99.95,6.73,1.72,369901.0,657.76,22815.8,77.69
10x bigger,Diffuse into and along clot - FIXED,10.11,0.18,100.0,10.14,0.04,43073,100.0,10.56,1.83,533335.0,1322.27,28150.1,253.08


In [13]:
f"{m:.2f} \u00B1 {sd:.2f}"

'15.43 ± 0.60'

In [15]:
results["Mean number of macroscale unbinds"].unstack(0)

data,10x bigger,10x smaller,Physiological Kd
program,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Always bind,,,
Diffuse along clot,571004.1,154365.3,594867.5
Diffuse into and along clot - BUGGED,369901.0,108462.6,205944.1
Diffuse into and along clot - FIXED,533335.0,213134.2,434408.9
Diffuse into clot,369550.0,108361.4,205717.1


In [47]:
num = [56341.23545, 24535]
formats = [",.2f", ","]
f"First {num[0]:{formats[0]}} then {num[1]:{formats[1]}}"

'First 56,341.24 then 24,535'