In [1]:
import pandas as pd
import numpy as np
import os, re
from typing import Union
from typing import Optional, Tuple

In [2]:
df_fd = pd.read_csv("ignore/fd_summary.csv")
df_fd.head()

Unnamed: 0,metric,avg,std,cv,notes,prob_name,domain,planner,config
0,translate,0.3410626,0.016125,0.047279,,prob10:,zenotravel,fd,ms
1,scoping,2.384334,0.03588,0.015048,,prob10:,zenotravel,fd,ms
2,translate_and_scope,2.725396,0.048151,0.017668,,prob10:,zenotravel,fd,ms
3,plan_unscoped_time,22.13183,0.4199,0.018973,,prob10:,zenotravel,fd,ms
4,plan_scoped_time,16.55866,0.339623,0.02051,,prob10:,zenotravel,fd,ms


In [3]:
def clean_col_name(s: str) -> str:
    return s.replace("_"," ").title()

fd_colmap_manual = {
    "plan_unscoped_generated_nodes": "Eval. (Unscoped)",
    "plan_unscoped_node_expansions": "Exp. (Unscoped)",
    "plan_scoped_generated_nodes": "Eval. (Scoped)",
    "plan_scoped_node_expansions": "Exp. (Scoped)",
    "plan_unscoped_time": "Planning (Unscoped)",
    "plan_scoped_time": "Planning (Scoped)",
    "encoding_size": "Encoding Size",
    "total_unscoped_time": "Total (Unscoped)",
    "total_scoped_time": "Total (Scoped)"
}

fd_colmap = {c: clean_col_name(c) for c in df_fd["metric"].unique()}
fd_colmap.update(fd_colmap_manual)
# df_fd = df_fd.rename(columns=fd_colmap)
# df_fd
df_fd["metric"] = df_fd["metric"].replace(fd_colmap)
df_fd = df_fd[df_fd["config"] == "lmcut"]
df_fd

Unnamed: 0,metric,avg,std,cv,notes,prob_name,domain,planner,config
72,Translate,0.291956,0.064680,0.221540,,prob15:,logistics,fd,lmcut
73,Scoping,0.655775,0.088893,0.135554,,prob15:,logistics,fd,lmcut
74,Translate And Scope,0.947732,0.064741,0.068312,,prob15:,logistics,fd,lmcut
75,Planning (Unscoped),11.632316,0.182590,0.015697,,prob15:,logistics,fd,lmcut
76,Planning (Scoped),3.287064,0.071655,0.021799,,prob15:,logistics,fd,lmcut
...,...,...,...,...,...,...,...,...,...
283,Eval. (Unscoped),1.049809e+07,0.000000,0.000000,,prob07:,satellite,fd,lmcut
284,Exp. (Unscoped),1.247030e+05,0.000000,0.000000,,prob07:,satellite,fd,lmcut
285,Eval. (Scoped),4.915020e+06,0.000000,0.000000,,prob07:,satellite,fd,lmcut
286,Exp. (Scoped),9.612500e+04,0.000000,0.000000,,prob07:,satellite,fd,lmcut


In [4]:
def format_float(a: float, n_dec: Optional[int]) -> str:
    a = round(a, n_dec)
    if n_dec is None:
        a = int(a)
        a = f"{a:,}"
    else:
        a = f"{a:,.1f}"
    return a

def get_table_str(df: pd.DataFrame, metric: str, use_sd: bool = False) -> str:
    n_dec = None
    long_dec_substrings = ["translate","scoping", "plan", "total"]
    for s in long_dec_substrings:
        if s in metric.lower():
            n_dec = 1
    a = df.loc[metric, "avg"]
    if isinstance(a, str):
        try:
            a = float(a)
        except:
            pass
    if isinstance(a, (float,)):
        # a = str(round(a, 2))
        a = format_float(a, n_dec=n_dec)
    sd = df.loc[metric, "std"]
    if isinstance(sd, str):
        try:
            sd = float(sd)
        except:
            pass
    if isinstance(sd, float):
        sd = format_float(sd, n_dec=n_dec)
    if use_sd:
        return f"{a} \pm {sd}"
    else:
        return str(a)
    
def get_scoped_unscoped_names(s: str):
    return s + " (Scoped)", s + " (Unscoped)"

def remove_gt(s: Union[str, float]) -> Tuple[Union[str, float], bool]:
    """Returns float, and whether it started with >"""
    if isinstance(s, float):
        return s, False
    elif isinstance(s, str):
        if len(s) > 0 and s[0] == ">":
            return float(s[1:]), True
        else:
            return float(s), False

def get_better_col(df: pd.DataFrame, metric: str) -> Optional[str]:
    metric_scoped, metric_unscoped = get_scoped_unscoped_names(metric)
    v_scoped, v_unscoped = df.loc[metric_scoped, "avg"], df.loc[metric_unscoped, "avg"]
    (v_scoped, gt_scoped) = remove_gt(v_scoped)
    (v_unscoped, gt_unscoped) = remove_gt(v_unscoped)
    # Handle NaN
    if np.isnan(v_scoped) and np.isnan(v_unscoped):
        return None
    if np.isnan(v_scoped) and not np.isnan(v_unscoped):
        return metric_unscoped
    if not np.isnan(v_scoped) and np.isnan(v_unscoped):
        return metric_scoped
    
    # Handle gt
    if gt_scoped and not gt_unscoped:
        return metric_unscoped
    if not gt_scoped and gt_unscoped:
        return metric_scoped

    if v_scoped > v_unscoped:
        return metric_unscoped
    if v_scoped < v_unscoped:
        return metric_scoped
    if v_scoped == v_unscoped:
        return None

In [5]:

dfs = []
for (domain, prob_name, planner, config), df in df_fd.groupby(["domain", "prob_name", "planner", "config"]):
    df = df.copy()
    df["metric"] = df["metric"].replace(fd_colmap)
    df = df.set_index("metric")
    # display(df)
    prob_num = re.search("\d+", prob_name).group()
    dom_prob = domain + " " + prob_num
    # df_new = pd.DataFrame({"Problem":dom_prob})
    # df_new.loc[]

        
    d = {
        "Problem": [dom_prob],
        # "Config": [config]
    }
    # col2format = {}
    paired_metrics =  ["Eval.", "Exp.", "Total", "Planning" ]
    time_metrics = ["Total", "Planning", "Translate", "Scoping"]
    for metric in paired_metrics:
        metric_scoped, metric_unscoped = get_scoped_unscoped_names(metric)
        use_sd = metric in time_metrics
        d[metric_scoped] = [get_table_str(df, metric_scoped, use_sd=use_sd)]
        d[metric_unscoped] = [get_table_str(df, metric_unscoped, use_sd=use_sd)]
        better_column = get_better_col(df, metric)
        if better_column is not None:
            d[better_column] = ["\\mathbf{" + d[better_column][0] + "}"]

    solo_metrics = ["Translate", "Scoping"]
    for metric in solo_metrics:
        use_sd = metric in time_metrics
        d[metric] = [get_table_str(df, metric,use_sd=use_sd)]

    metrics_all = solo_metrics
    for c in paired_metrics:
        metrics_all.extend(list(get_scoped_unscoped_names(c)))
        
    for c in metrics_all:
        d[c][0] = "$" + d[c][0] + "$"

    # [dom_prob, "Generated Nodes (Unscoped)", "Generated Nodes (Scoped)"]
    df_new = pd.DataFrame(data=d)
    # display(df_new)
    dfs.append(df_new)

df_out = pd.concat(dfs)
display(df_out)

Unnamed: 0,Problem,Eval. (Scoped),Eval. (Unscoped),Exp. (Scoped),Exp. (Unscoped),Total (Scoped),Total (Unscoped),Planning (Scoped),Planning (Unscoped),Translate,Scoping
0,driverlog 15,"$\mathbf{21,186}$","$22,980$","$\mathbf{1,379}$","$1,392$",$7.2 \pm 0.2$,$\mathbf{4.9 \pm 0.2}$,$\mathbf{3.1 \pm 0.1}$,$4.4 \pm 0.1$,$0.5 \pm 0.0$,$3.6 \pm 0.2$
0,driverlog 16,"$\mathbf{60,306}$","$87,465$","$\mathbf{3,087}$","$3,618$",$\mathbf{17.1 \pm 0.5}$,$20.5 \pm 0.8$,$\mathbf{8.1 \pm 0.2}$,$19.8 \pm 0.8$,$0.7 \pm 0.0$,$8.3 \pm 0.3$
0,driverlog 17,"$\mathbf{21,030}$","$28,926$",$\mathbf{985}$,"$1,058$",$\mathbf{18.3 \pm 0.5}$,$22.8 \pm 1.0$,$\mathbf{7.9 \pm 0.2}$,$22.0 \pm 0.9$,$0.8 \pm 0.0$,$9.6 \pm 0.3$
0,logistics 15,"$\mathbf{117,643}$","$153,488$","$6,395$","$6,395$",$\mathbf{4.2 \pm 0.1}$,$11.9 \pm 0.2$,$\mathbf{3.3 \pm 0.1}$,$11.6 \pm 0.2$,$0.3 \pm 0.1$,$0.7 \pm 0.1$
0,logistics 20,"$\mathbf{260,336}$","$381,235$","$\mathbf{14,390}$","$14,798$",$\mathbf{6.9 \pm 0.2}$,$27.2 \pm 0.3$,$\mathbf{5.9 \pm 0.1}$,$26.9 \pm 0.3$,$0.3 \pm 0.0$,$0.7 \pm 0.0$
0,logistics 25,"$\mathbf{1,281,693}$","$1,701,778$","$\mathbf{66,683}$","$67,931$",$\mathbf{35.8 \pm 0.4}$,$128.0 \pm 1.4$,$\mathbf{34.8 \pm 0.4}$,$127.7 \pm 1.4$,$0.3 \pm 0.0$,$0.8 \pm 0.0$
0,satellite 05,"$\mathbf{35,231}$","$63,122$","$1,034$","$1,034$",$\mathbf{1.5 \pm 0.0}$,$2.3 \pm 0.1$,$\mathbf{0.6 \pm 0.0}$,$2.0 \pm 0.0$,$0.3 \pm 0.1$,$0.6 \pm 0.1$
0,satellite 06,"$\mathbf{166,415}$","$311,695$","$\mathbf{4,886}$","$5,766$",$\mathbf{2.5 \pm 0.0}$,$6.6 \pm 0.1$,$\mathbf{1.7 \pm 0.0}$,$6.3 \pm 0.1$,$0.3 \pm 0.1$,$0.5 \pm 0.1$
0,satellite 07,"$\mathbf{4,915,020}$","$10,498,090$","$\mathbf{96,125}$","$124,703$",$\mathbf{52.2 \pm 0.3}$,$333.4 \pm 1.8$,$\mathbf{50.9 \pm 0.3}$,$333.1 \pm 1.8$,$0.4 \pm 0.1$,$0.9 \pm 0.1$
0,zenotravel 10,"$\mathbf{654,968}$","$675,945$","$\mathbf{23,649}$","$23,661$",$\mathbf{35.9 \pm 0.5}$,$38.1 \pm 0.7$,$\mathbf{33.2 \pm 0.4}$,$37.7 \pm 0.7$,$0.3 \pm 0.0$,$2.4 \pm 0.1$


In [6]:
pd.read_csv("domain_sizes.csv")

Unnamed: 0,domain,problem,scoped,state_size,operators
0,driverlog,prob17,scoped,869072546008039365599290829542785024,3770
1,driverlog,prob17,unscoped,869072546008039365599290829542785024,6170
2,driverlog,prob15,scoped,2971250135632661446656,2112
3,driverlog,prob15,unscoped,2971250135632661446656,2592
4,driverlog,prob16,scoped,2853418839091200000000000000,3540
5,driverlog,prob16,unscoped,2853418839091200000000000000,4890
6,zenotravel,prob10,scoped,719323136000,1095
7,zenotravel,prob10,unscoped,719323136000,1155
8,zenotravel,prob12,scoped,15482251172118528,3159
9,zenotravel,prob12,unscoped,15482251172118528,3375


In [7]:
df_size = pd.read_csv("domain_sizes.csv")
df_out = df_out.set_index("Problem")
# df_size["domprob"] = df_size["domain"].str + df_size["problem"].str.split("prob")[1]
df_size
for ind, r in df_size.iterrows():
    df_size.loc[ind, "Problem"] = df_size.loc[ind, "domain"] + " " + df_size.loc[ind, "problem"].split("prob")[1]
df_size = df_size.drop(columns=["domain", "problem"])
df_size
dfs_size = []
for problem, df in df_size.groupby("Problem"):
    df = df.set_index(["Problem", "scoped"])
    d = {
        # "Problem": problem,
        # "State Space (Scoped)": df.loc[(problem, "scoped"), "state_size"] ,
        # "State Space (Unscoped)": df.loc[(problem, "unscoped"), "state_size"],
        "Operators (Scoped)": df.loc[(problem, "scoped"), "operators"],
        "Operators (Unscoped)": df.loc[(problem, "unscoped"), "operators"]
    }

    # paired_metrics =  ["State Space", "Operators"]
    paired_metrics =  ["Operators"]

    for metric in paired_metrics:
        # Convert to scientific notation for state space?
        metric_scoped, metric_unscoped = get_scoped_unscoped_names(metric)
        if d[metric_scoped] > d[metric_unscoped]:
            d[metric_unscoped] = "\\mathbf{" + str(d[metric_unscoped]) + "}"
            d[metric_scoped] = str(d[metric_scoped])
        elif d[metric_scoped] < d[metric_unscoped]:
            d[metric_scoped] = "\\mathbf{" + str(d[metric_scoped]) + "}"
            d[metric_unscoped] = str(d[metric_unscoped])
        else:
            d[metric_scoped] = str(d[metric_unscoped])
            d[metric_scoped] = str(d[metric_scoped])


    for k,v in d.items():
        d[k] = "$" + v + "$"
        df_out.loc[problem, k] = d[k]
    # df = pd.DataFrame({k: [v] for k, v in d.items()})
    # dfs_size.append(df)
# df_size = pd.concat(dfs_size)
# display(df_size)
cols = ["Problem", "Operators (Unscoped)", "Operators (Scoped)", "Exp. (Unscoped)", "Exp. (Scoped)", "Eval. (Unscoped)", "Eval. (Scoped)", "Translate", "Scoping", "Planning (Unscoped)", "Planning (Scoped)", "Total (Unscoped)", "Total (Scoped)"]
df_out = df_out.reset_index(drop=False)[cols]
df_out

Unnamed: 0,Problem,Operators (Unscoped),Operators (Scoped),Exp. (Unscoped),Exp. (Scoped),Eval. (Unscoped),Eval. (Scoped),Translate,Scoping,Planning (Unscoped),Planning (Scoped),Total (Unscoped),Total (Scoped)
0,driverlog 15,$2592$,$\mathbf{2112}$,"$1,392$","$\mathbf{1,379}$","$22,980$","$\mathbf{21,186}$",$0.5 \pm 0.0$,$3.6 \pm 0.2$,$4.4 \pm 0.1$,$\mathbf{3.1 \pm 0.1}$,$\mathbf{4.9 \pm 0.2}$,$7.2 \pm 0.2$
1,driverlog 16,$4890$,$\mathbf{3540}$,"$3,618$","$\mathbf{3,087}$","$87,465$","$\mathbf{60,306}$",$0.7 \pm 0.0$,$8.3 \pm 0.3$,$19.8 \pm 0.8$,$\mathbf{8.1 \pm 0.2}$,$20.5 \pm 0.8$,$\mathbf{17.1 \pm 0.5}$
2,driverlog 17,$6170$,$\mathbf{3770}$,"$1,058$",$\mathbf{985}$,"$28,926$","$\mathbf{21,030}$",$0.8 \pm 0.0$,$9.6 \pm 0.3$,$22.0 \pm 0.9$,$\mathbf{7.9 \pm 0.2}$,$22.8 \pm 1.0$,$\mathbf{18.3 \pm 0.5}$
3,logistics 15,$650$,$\mathbf{250}$,"$6,395$","$6,395$","$153,488$","$\mathbf{117,643}$",$0.3 \pm 0.1$,$0.7 \pm 0.1$,$11.6 \pm 0.2$,$\mathbf{3.3 \pm 0.1}$,$11.9 \pm 0.2$,$\mathbf{4.2 \pm 0.1}$
4,logistics 20,$650$,$\mathbf{250}$,"$14,798$","$\mathbf{14,390}$","$381,235$","$\mathbf{260,336}$",$0.3 \pm 0.0$,$0.7 \pm 0.0$,$26.9 \pm 0.3$,$\mathbf{5.9 \pm 0.1}$,$27.2 \pm 0.3$,$\mathbf{6.9 \pm 0.2}$
5,logistics 25,$650$,$\mathbf{290}$,"$67,931$","$\mathbf{66,683}$","$1,701,778$","$\mathbf{1,281,693}$",$0.3 \pm 0.0$,$0.8 \pm 0.0$,$127.7 \pm 1.4$,$\mathbf{34.8 \pm 0.4}$,$128.0 \pm 1.4$,$\mathbf{35.8 \pm 0.4}$
6,satellite 05,$609$,$\mathbf{339}$,"$1,034$","$1,034$","$63,122$","$\mathbf{35,231}$",$0.3 \pm 0.1$,$0.6 \pm 0.1$,$2.0 \pm 0.0$,$\mathbf{0.6 \pm 0.0}$,$2.3 \pm 0.1$,$\mathbf{1.5 \pm 0.0}$
7,satellite 06,$582$,$\mathbf{362}$,"$5,766$","$\mathbf{4,886}$","$311,695$","$\mathbf{166,415}$",$0.3 \pm 0.1$,$0.5 \pm 0.1$,$6.3 \pm 0.1$,$\mathbf{1.7 \pm 0.0}$,$6.6 \pm 0.1$,$\mathbf{2.5 \pm 0.0}$
8,satellite 07,$983$,$\mathbf{587}$,"$124,703$","$\mathbf{96,125}$","$10,498,090$","$\mathbf{4,915,020}$",$0.4 \pm 0.1$,$0.9 \pm 0.1$,$333.1 \pm 1.8$,$\mathbf{50.9 \pm 0.3}$,$333.4 \pm 1.8$,$\mathbf{52.2 \pm 0.3}$
9,zenotravel 10,$1155$,$\mathbf{1095}$,"$23,661$","$\mathbf{23,649}$","$675,945$","$\mathbf{654,968}$",$0.3 \pm 0.0$,$2.4 \pm 0.1$,$37.7 \pm 0.7$,$\mathbf{33.2 \pm 0.4}$,$38.1 \pm 0.7$,$\mathbf{35.9 \pm 0.5}$


In [8]:
def df2tex(df: pd.DataFrame, label: str, caption: str) -> str:
    
    # Header
    # rows = ["\\begin{table*}[t]","\\resizebox{\\textwidth}{!}{%","\\begin{tabular}{" + "l"*len(df.columns) + "}"]
    # rows.append("\\toprule")
    # rows.append(" & ".join(df.columns) + "\\\\ \\midrule")
    rows = [r"""\begin{table*}[t]
\resizebox{\textwidth}{!}{%
\begin{tabular}{lrrrrrrrrrrrrr}
\toprule
\textbf{Problem} & \multicolumn{2}{c}{\textbf{Operators}} & \multicolumn{2}{c}{\textbf{Expansions}} & \multicolumn{2}{c}{\textbf{Evaluations}} & \textbf{Translate} & \textbf{Scoping} & \multicolumn{2}{c}{\textbf{Planning Time (s)}} & \multicolumn{2}{c}{\textbf{Total Time (s)}}\\
  & Unscoped & Scoped & Unscoped & Scoped & Unscoped & Scoped &  &  & Unscoped & Scoped & Unscoped & Scoped\\
\midrule"""]

    # Rows
    for ind, r in df.iterrows():
        rows.append(" & ".join(list(r.values)) + "\\\\")
    rows.append("\\bottomrule")
    rows.append("\\end{tabular}")
    rows.append("}")
    rows += ["\\caption{" + caption + "}",
    "\\label{" + label + "}", 
    "\\end{table*}"]
    return "\n".join(rows)

In [9]:
s_out = df2tex(df_out, label="table:fd_experiments", caption="Results for our Fast Downward experiments.")
with open("tmp.tex", "w") as f:
    f.write(s_out)

Only include lmcut in the main paper, remove ms. That can go in the appendix.
Remove Config column
Add some sort of size column(s)
Rename "Generated Nodes (...)" -> "Eval. (...)"
Rename "Node Expansions (...)" -> "Exp. (...)"
Remove decimal point and ± std from all Eval./Exp. columns
If it's still too wide, use a 2-line header
Column order:
cols = ["Problem", "Operators (Scoped)", "Operators (Unscoped)", "Exp. (Scoped)", "Exp. (Unscoped)", "Eval. (Scoped)", "Eval. (Unscoped)", "Translate", "Scoping", "Planning (Scoped)", "Planning (Unscoped)", "Total (Scoped)", "Total (Unscoped)"]