In [None]:
%load_ext autoreload
%autoreload 2

import sys
import pandas as pd
sys.path.append('..')

from rxnframe import RXN_frame
import json

In [None]:
# Multi step reaction indexes from https://www.science.org/doi/10.1126/science.abo0058

double_step_procs = {
    1: ["0034", "0010"],
    2: ["0035", "0010"],
    3: ["0078", "0037"],
    4: ["0037", "0012"],
    5: ["0075", "0004"],
    6: ["0004", "0005"],
    7: ["0056", "0022"],
    8: ["0056", "0023",],
    9: ["0038", "0079"],
    10: ["0040", "0042"],
    11: ["0043", "0044"],
    12: ["0044", "0045"],
    13: ["0046", "0047"],
}

triple_step_procs = {
    1: ["0034", "0035", "0010"],
    2: ["0078", "0037", "0012"],
    3: ["0075", "0004", "0005"],
    4: ["0043", "0044", "0045"]
}

In [None]:
with open("../data/reaction_summary.json", "r") as f:
    summary = json.load(f)

In [None]:
def get_rxn_details(run_id_suffix, summary_data):
    # run_id_suffix is like "0034"
    # summary RXN_ID is like "CHEMIFY-0034"
    target_id = f"CHEMIFY-{run_id_suffix}"
    for entry in summary_data:
        if entry.get("RXN_ID") == target_id:
            return entry.get("REACTION"), entry.get("yield") if entry.get("yield") != -1 else None, entry.get("SCALE")
    return None, None

rows = []
sorted_summary = sorted(summary, key=lambda x: x.get('RXN_ID', ''))


for entry in sorted_summary:
    rxn_id = entry.get('RXN_ID')
    rows.append({
        "ID": rxn_id,
        "Step 1": entry.get("REACTION"),
        "Step 2": None,
        "Step 3": None,
        "Yield": entry.get("yield") if entry.get("yield") != -1 else None,
        "Scale": entry.get("SCALE")
    })

for idx, steps in double_step_procs.items():
    s1, y1, scale1 = get_rxn_details(steps[0], summary)
    s2, y2, scale2 = get_rxn_details(steps[1], summary)
    
    rows.append({
        "ID": f"TwoStep-{idx} ({steps[0]}+{steps[1]})", 
        "Step 1": s1,
        "Step 2": s2,
        "Step 3": None,
        "Yield": f"{y1}, {y2}",
        "Scale": f"{scale1}, {scale2}"
    })

for idx, steps in triple_step_procs.items():
    s1, y1, scale1 = get_rxn_details(steps[0], summary)
    s2, y2, scale2 = get_rxn_details(steps[1], summary)
    s3, y3, scale3 = get_rxn_details(steps[2], summary)
    
    rows.append({
        "ID": f"ThreeStep-{idx} ({steps[0]}+{steps[1]}+{steps[2]})",
        "Step 1": s1,
        "Step 2": s2,
        "Step 3": s3,
        "Yield": f"{y1}, {y2}, {y3}",
        "Scale": f"{scale1}, {scale2}, {scale3}"
    })

df_all = pd.DataFrame(rows)


In [None]:
# Reshape: one reaction step per row, keeping metadata for grouping
step_cols = ["Step 1", "Step 2", "Step 3"]

rows_long = []
for _, row in df_all.iterrows():
    steps = [
        (step, row[step])
        for step in step_cols
        if row[step] is not None and not pd.isna(row[step])
    ]
    for step_name, rxn in steps:
        rows_long.append({
            "ID":       row["ID"],
            "Step":     step_name,
            "Reaction": rxn,
            "Yield":    row["Yield"],
            "Scale":    row["Scale"],
        })

df_long = pd.DataFrame(rows_long)
print(f"Reshaped: {len(df_all)} entries â†’ {len(df_long)} step rows")
df_long.head(6)


In [None]:
frame = RXN_frame(df_long, mol_cols=[], rxn_cols=["Reaction"], size=(700, 180))
frame.display_rxns_grouped(
    group_col="ID",
    rowspan_cols=["ID", "Yield", "Scale"],
    pdf_path="reaction_display.pdf",
)
