In [None]:
import pandas as pd
import re
stats = pd.read_csv("../data/simulation/bindflow/gather/BindFlow-stats.csv", index_col=0)
stats = stats[~stats['pearson'].str.startswith('nan')]

SYSTEM_NAME = ["P38", "A2A", "PTP1B", "TYK2", "Thrombin", "MCL1", "CyclophilinD", "SAMPL6-OA"]
SYSTEM_NAME = {
    "p38": "P38", 
    "A2A": "A2A",
    "ptp1b": "PTP1B",
    "tyk2": "TYK2",
    "thrombin": "Thrombin",
    "mcl1": "MCL1",
    "CyclophilinD": "CyclophilinD",
    "SAMPL6-OA": "SAMPL6-OA"
}
FORCE_FIELD_NAME = {
    'espaloma-0.3.1': 'Espaloma',
    'gaff-2.11': 'GAFF',
    'openff-2.0.0': 'OpenFF'
}
CALC_TYPE_NAME = {
    "mbar": "FEP",
    "dh-gb": "MMGBSA",
    "dg-c2-gb": "MMGBSA-C2",
    "dg-ie-gb": "MMGBSA-IE",
    "dh-pb": "MMPBSA",
    "dg-c2-pb": "MMPBSA-C2",
    "dg-ie-pb": "MMPBSA-IE",
}


calc_types = []
ffs = []
for index in stats.index:
    _, calc_type, ff = index.split("_")
    calc_types.append(calc_type)
    ffs.append(ff)

stats["CalcType"] = calc_types
stats["ff"] = ffs
stats = stats.reset_index(drop=True)

stats["ff"] = stats["ff"].map(FORCE_FIELD_NAME)

stats['source'] = pd.Categorical(
        stats['source'].replace(SYSTEM_NAME),
        categories=SYSTEM_NAME.values(), ordered=True)
stats['CalcType'] = pd.Categorical(
        stats['CalcType'].replace(CALC_TYPE_NAME),
        categories=CALC_TYPE_NAME.values(), ordered=True)
stats['ff'] = pd.Categorical(
        stats['ff'].replace(FORCE_FIELD_NAME),
        categories=FORCE_FIELD_NAME.values(), ordered=True)


stats = stats.sort_values(by=["source", "CalcType", "ff"]).reset_index(drop=True)
stats.set_index(["source", "CalcType", "ff"])


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# Define colormap
cmap = plt.get_cmap('RdYlBu')

def get_colors(N, cmap):
    # Get the colormap

    # Generate N evenly spaced values between 0 and 1
    values = np.linspace(0, 1, N)

    # Extract RGB colors
    rgb_colors = [cmap(val)[:3] for val in values]

    # Convert to integer (0-255) if needed
    rgb_colors_255 = [(int(r*255), int(g*255), int(b*255)) for r, g, b in rgb_colors]

    return rgb_colors_255


def get_numerical_values(text):
    pattern = r"(-?\d+\.\d+)\s+\[\d+%:\s+(-?\d+\.\d+),\s+(-?\d+\.\d+)\]"
    match = re.search(pattern, text)
    if match:
        value = float(match.group(1))  # First number
        min_value = float(match.group(2))  # Min of the interval
        max_value = float(match.group(3))  # Max of the interval
        return value, min_value, max_value

# Function to apply colormap based on rank
def rank_to_color(column, colormap):
    values = column.apply(get_numerical_values)
    nominal_value = pd.Series([x[0] for x in values])
    ranks = nominal_value.rank(method='max', ascending=False).astype(int) - 1  # Compute ranks (0-indexed)
    colors = get_colors(len(values), colormap)
    return [
        f"\\cellcolor[RGB]{{{colors[rank][0]},{colors[rank][1]},{colors[rank][2]}}}${{{value[0]:.2f}}}^{{{value[2]:.2f}}}_{{{value[1]:.2f}}}$"
        for rank, value in zip(ranks, values)
    ]

# Update DataFrame with LaTeX formatted cells
columns = [
    "pearson",
    "kendall",
    "spearman",
    "rmse",
    "mse",
    "mue"
]
for column in columns:
    stats[column] = rank_to_color(stats[column], cmap)

# Export to LaTeX
latex_table = stats.set_index(["source", "CalcType", "ff"]).to_latex(escape=False)  # escape=False to keep LaTeX commands

# # Save to file or print
# with open("rank_based_color_coded_table.tex", "w") as f:
#     f.write(latex_table)

print(latex_table)