In [1]:
import os, sys, re
import pandas as pd
import numpy as np
import plotly.express as px
from rdkit import Chem
from rdkit.Chem import Draw

# Display the updated DataFrame in the notebook
from IPython.display import display, HTML

In [2]:
file_path = "Summary_Properties_all_species_published.xlsx"

In [3]:
# Load sheets "Select_Properties" and "Yields"
select_properties_df = pd.read_excel(file_path, sheet_name="Selected_Properties")
yields_df = df = pd.read_excel(file_path, sheet_name="Yields")

In [4]:
# Display a preview of both DataFrames
display(select_properties_df.head())
display(yields_df.head())

Unnamed: 0,Compound_Name,BDE (kcal/mol),BDFE (kcal/mol),E_spc (Hartree),E_spc (Hartree)_anion,E_spc (Hartree)_openshell,H_spc(Hartree),H_spc(Hartree)_anion,H_spc(Hartree)_openshell,qh_G(T)_spc(Hartree),...,C1_Electro-Valency_Boltz,C1_Electro-Valency_Boltz_anion,C1_FormalCharge_Boltz,C1_FormalCharge_Boltz_anion,C2_Co-Valency_Boltz,C2_Co-Valency_Boltz_anion,C2_Electro-Valency_Boltz,C2_Electro-Valency_Boltz_anion,C2_FormalCharge_Boltz,C2_FormalCharge_Boltz_anion
0,Het001,90.96998,83.154355,-441.215801,-440.620459,-440.561375,-441.043098,-440.463017,-440.40235,-441.085759,...,0.6784,0.5491,0.0006,-0.2896,3.5377,3.4047,0.4177,0.5367,-0.0308,-0.0333
1,Het002,89.224877,81.40235,-441.212895,-440.61349,-440.561089,-441.040035,-440.456235,-440.402068,-441.082728,...,0.689,0.6075,-0.0078,-0.2527,3.7837,3.5178,0.1419,0.3552,-0.0467,-0.0673
2,Het003,90.373846,82.194266,-441.213933,-440.625214,-440.560232,-441.04084,-440.467396,-440.401042,-441.082982,...,0.6948,0.5667,0.0212,-0.2805,3.8102,3.5553,0.1638,0.3499,-0.0203,-0.0444
3,Het004,91.911871,84.077421,-457.262567,-456.671952,-456.60665,-457.101761,-456.526108,-456.459512,-457.144213,...,0.6842,0.5547,0.0059,-0.2657,3.2567,3.1798,0.7021,0.712,-0.0412,-0.0943
4,Het005,91.984662,83.904856,-457.26207,-456.611366,-456.605897,-457.100938,-456.468473,-456.458573,-457.143085,...,0.7,0.5196,0.0189,-0.8775,3.4655,3.1272,0.4795,0.799,-0.0465,-0.0738


Unnamed: 0,SMILES,id,Heterobenzylic_Cl_Pdt,Britton,Maity2AP Stahl,Maity4AP Stahl,Golden Stahl,DeLuca,Fujisaki,Newkome,Xu Zhang,Schreiner,Chen,Wu,Lopez Stahl,Ariarfard,Kanai
0,CC1=NC2=C(C=CC=C2)C=C1,Het001,1b,37.0,0.0,39.0,50.0,83.0,6.0,33.0,46.0,50.0,21.0,8.9,3.0,24.0,49.0
1,CC1=CC(C=CC=C2)=C2N=C1,Het002,2b,0.5,0.0,0.0,48.0,11.0,8.1,11.5,0.0,2.8,2.7,6.7,0.0,3.1,5.4
2,CC1=CC=NC2=C1C=CC=C2,Het003,3b,13.0,12.0,22.0,6.0,28.0,6.1,2.9,18.0,12.0,5.2,8.5,0.0,14.0,22.0
3,CC1=NC2=C(C=CC=C2)C=N1,Het004,4b,0.0,0.0,0.0,0.0,0.0,0.0,2.9,2.41,1.28,1.0,0.0,0.0,0.0,5.45
4,CC1=NC=NC2=C1C=CC=C2,Het005,5b,18.0,0.0,35.0,25.0,51.0,17.9,0.0,33.0,36.0,26.0,0.0,0.0,0.0,40.0


In [5]:
# Prepare data for the grid plot
# Extract yield columns (ignoring the first few metadata columns like id, SMILES, and captions)
yield_columns = yields_df.columns[3:]  # Exclude 'SMILES', 'id', 'Heterobenzylic_Cl_Pdt'
yield_data = yields_df[yield_columns]

In [6]:
import re

# Extract headers
headers = select_properties_df.columns

# Find headers with special characters
special_chars = {header: re.findall(r"[^\w\s]", header) for header in headers if re.search(r"[^A-Za-z0-9\s]", header)}

special_chars

{'Compound_Name': [],
 'BDE (kcal/mol)': ['(', '/', ')'],
 'BDFE (kcal/mol)': ['(', '/', ')'],
 'E_spc (Hartree)': ['(', ')'],
 'E_spc (Hartree)_anion': ['(', ')'],
 'E_spc (Hartree)_openshell': ['(', ')'],
 'H_spc(Hartree)': ['(', ')'],
 'H_spc(Hartree)_anion': ['(', ')'],
 'H_spc(Hartree)_openshell': ['(', ')'],
 'qh_G(T)_spc(Hartree)': ['(', ')', '(', ')'],
 'qh_G(T)_spc(Hartree)_anion': ['(', ')', '(', ')'],
 'qh_G(T)_spc(Hartree)_openshell': ['(', ')', '(', ')'],
 'HOMO_Boltz': [],
 'HOMO_Boltz_anion': [],
 'HOMO_Boltz_openshell': [],
 'LUMO_Boltz': [],
 'LUMO_Boltz_anion': [],
 'LUMO_Boltz_openshell': [],
 'μ_Boltz': [],
 'μ_Boltz_anion': [],
 'μ_Boltz_openshell': [],
 'η_Boltz': [],
 'η_Boltz_anion': [],
 'η_Boltz_openshell': [],
 'ω_Boltz': [],
 'ω_Boltz_anion': [],
 'ω_Boltz_openshell': [],
 'polar_iso(Debye)_Boltz': ['(', ')'],
 'polar_iso(Debye)_Boltz_anion': ['(', ')'],
 'polar_iso(Debye)_Boltz_openshell': ['(', ')'],
 'polar_aniso(Debye)_Boltz': ['(', ')'],
 'polar_aniso(D

In [7]:
import plotly.express as px

# Prepare data for Plotly
heatmap_data = yields_df.melt(
    id_vars=["id"], value_vars=yield_columns, var_name="Method", value_name="Yield"
)

# Create the heatmap with controlled subblock ratios and text labels
fig = px.imshow(
    yields_df[yield_columns].T.to_numpy(),
    labels={"x": "Compound ID", "y": "Method", "color": "Yield"},
    x=yields_df["id"],
    y=yield_columns,
    text_auto=".2f",  # Add text labels automatically
    color_continuous_scale="Cividis",  # Set the color scale
    title="Yields Across Different Methods",
)

fig.update_layout(
    xaxis_title="Compound ID",
    yaxis_title="Method",
    height=len(yield_columns) * 50,
    width=len(yields_df) * 50,
    xaxis=dict(tickangle=30),
    template="plotly",
)

# Display the figure
fig.show()

In [8]:
# create a temp folder to store the images
os.makedirs("images", exist_ok=True)
# Create a dictionary of Compound_Name and RDKit molecule objects
compound_to_mol = {
    row["id"]: Chem.MolFromSmiles(row["SMILES"])
    for _, row in yields_df.iterrows()
}
# Function to draw and save molecule images
def draw_and_save_molecule(mol, mol_id, output_dir):
    drawer = Draw.rdMolDraw2D.MolDraw2DCairo(200, 200)  # Adjust dimensions if needed
    drawer.drawOptions().bondLineWidth = 3
    drawer.drawOptions().baseFontSize = 0.5
    drawer.DrawMolecule(mol)
    drawer.FinishDrawing()
    img_data = drawer.GetDrawingText()
    img_path = os.path.join(output_dir, f"{mol_id}.png")
    with open(img_path, "wb") as f:
        f.write(img_data)
    return img_path

In [9]:
# Sort compounds based on the "BDE (kcal/mol)" column
sorted_compounds = select_properties_df.sort_values("BDE (kcal/mol)")

# Generate molecule images and save the mapping into a dictionary with keys as compound names values as image paths
mol_image_paths = {
    name: draw_and_save_molecule(compound_to_mol[name], name, "images")
    for name in sorted_compounds["Compound_Name"]
}

In [10]:
# Add the new column with image paths as HTML <img> tags
sorted_compounds.insert(
    1,  # Insert as the second column
    "Molecule_Image",
    sorted_compounds["Compound_Name"].map(
        lambda name: f'<img src="{mol_image_paths.get(name)}" width="100"/>'
    )
)

In [11]:
# Render as HTML with the image paths
display(HTML(sorted_compounds.head().to_html(escape=False)))

Unnamed: 0,Compound_Name,Molecule_Image,BDE (kcal/mol),BDFE (kcal/mol),E_spc (Hartree),E_spc (Hartree)_anion,E_spc (Hartree)_openshell,H_spc(Hartree),H_spc(Hartree)_anion,H_spc(Hartree)_openshell,qh_G(T)_spc(Hartree),qh_G(T)_spc(Hartree)_anion,qh_G(T)_spc(Hartree)_openshell,HOMO_Boltz,HOMO_Boltz_anion,HOMO_Boltz_openshell,LUMO_Boltz,LUMO_Boltz_anion,LUMO_Boltz_openshell,μ_Boltz,μ_Boltz_anion,μ_Boltz_openshell,η_Boltz,η_Boltz_anion,η_Boltz_openshell,ω_Boltz,ω_Boltz_anion,ω_Boltz_openshell,polar_iso(Debye)_Boltz,polar_iso(Debye)_Boltz_anion,polar_iso(Debye)_Boltz_openshell,polar_aniso(Debye)_Boltz,polar_aniso(Debye)_Boltz_anion,polar_aniso(Debye)_Boltz_openshell,dipole(Debye)_Boltz,dipole(Debye)_Boltz_anion,dipole(Debye)_Boltz_openshell,volume(Bohr_radius³/mol)_Boltz,volume(Bohr_radius³/mol)_Boltz_anion,volume(Bohr_radius³/mol)_Boltz_openshell,SASA_surface_area(Å²)_Boltz,NBO_charge_C1_Boltz,NBO_charge_C1_Boltz_anion,NBO_charge_C1_Boltz_openshell,NBO_charge_C2_Boltz,NBO_charge_C2_Boltz_anion,NBO_charge_C2_Boltz_openshell,NMR_shift_C1_Boltz,NMR_shift_C2_Boltz,distance_C1_C2(Å)_Boltz,distance_C1_C2(Å)_Boltz_anion,distance_C1_C2(Å)_Boltz_openshell,%Vbur_C1_2.0Å_Boltz,%Vbur_C1_2.0Å_Boltz_anion,%Vbur_C1_2.0Å_Boltz_openshell,%Vbur_C2_2.0Å_Boltz,%Vbur_C2_2.0Å_Boltz_anion,%Vbur_C2_2.0Å_Boltz_openshell,Sterimol_L_C1_C2(Å)_morfeus_Boltz,Sterimol_B1_C1_C2(Å)_morfeus_Boltz,Sterimol_B1_C1_C2(Å)_morfeus_Boltz_anion,Sterimol_B1_C1_C2(Å)_morfeus_Boltz_openshell,Sterimol_B5_C1_C2(Å)_morfeus_Boltz,Sterimol_B5_C1_C2(Å)_morfeus_Boltz_anion,Sterimol_B5_C1_C2(Å)_morfeus_Boltz_openshell,pyramidalization_Gavrish_C1(°)_Boltz,pyramidalization_Agranat-Radhakrishnan_C1_Boltz,C1_C2_bond_order_total_Boltz,C1_C2_bond_order_total_Boltz_anion,C1_C2_bond_order_covalent_Boltz,C1_C2_bond_order_covalent_Boltz_anion,C1_C2_bond_order_ionic_Boltz,C1_C2_bond_order_ionic_Boltz_anion,C1_Co-Valency_Boltz,C1_Co-Valency_Boltz_anion,C1_Electro-Valency_Boltz,C1_Electro-Valency_Boltz_anion,C1_FormalCharge_Boltz,C1_FormalCharge_Boltz_anion,C2_Co-Valency_Boltz,C2_Co-Valency_Boltz_anion,C2_Electro-Valency_Boltz,C2_Electro-Valency_Boltz_anion,C2_FormalCharge_Boltz,C2_FormalCharge_Boltz_anion
14,Het015,,85.515044,77.070655,-496.562505,-495.980539,-495.916763,-496.371904,-495.805226,-495.739849,-496.417254,-495.850595,-495.785641,-0.297673,-0.04244,-0.24152,-0.043345,0.13617,-0.03164,-0.170509,0.046865,-0.13658,0.254328,0.17861,0.20988,0.057166,0.00615,0.04444,131.476736,161.827,143.213,104.19527,172.572,142.661,0.507764,1.2186,1.1798,1363.199758,1620.16,1441.282,348.692273,-0.434699,-0.36404,-0.04118,0.194263,0.08456,0.04065,152.219207,1.433237,1.50829,1.37416,1.40527,97.203147,92.235925,92.12939,96.149473,95.806431,95.861312,8.895244,1.798447,1.835675,1.836373,4.366671,4.229679,4.332991,5.976737,0.80606,0.950676,1.5444,0.937913,1.395,0.012731,0.1493,3.486878,3.4114,0.474071,0.4374,-0.005502,-0.1284,3.534259,3.4107,0.359241,0.471,-0.069887,-0.098
10,Het011,,85.648076,76.889933,-480.519611,-479.922161,-479.873287,-480.316628,-479.735423,-479.684361,-480.361978,-479.781087,-479.730653,-0.28284,-0.02678,-0.22683,-0.026556,0.14047,-0.02062,-0.154698,0.056845,-0.123725,0.256284,0.16725,0.20621,0.046691,0.00966,0.03712,135.932938,173.472,146.41,107.03585,189.788,141.713,2.293969,1.7727,2.5203,1434.203245,1327.7,1468.267,351.809626,-0.418269,-0.40168,-0.06647,-0.060009,-0.09125,-0.17461,156.085154,26.328096,1.510176,1.37935,1.40546,97.344645,92.145532,92.026085,97.044109,96.787836,96.823347,9.059194,1.838787,1.809395,1.811731,4.295046,4.228558,4.28314,5.99651,0.807894,0.967793,1.5115,0.954617,1.3522,0.013134,0.1593,3.515356,3.3616,0.456059,0.4547,-0.016586,-0.1825,3.785961,3.5615,0.115078,0.3116,-0.042989,-0.055
16,Het017,,86.280605,77.664906,-480.521016,-479.918131,-479.873736,-480.318012,-479.731272,-479.684737,-480.36328,-479.776797,-479.73072,-0.27694,-0.02401,-0.21939,-0.02718,0.13546,-0.02446,-0.15206,0.055725,-0.121925,0.24976,0.15947,0.19493,0.04629,0.00974,0.03813,135.994,173.499,148.509,104.598,194.532,149.928,1.9903,2.9366,1.5505,1534.96,1454.634,1365.426,353.312395,-0.42344,-0.38461,-0.06556,0.19758,0.12798,0.0626,149.3872,0.4796,1.50789,1.37521,1.40648,97.010589,92.242381,92.193957,96.242252,95.764463,95.858084,9.049785,1.881622,1.878454,1.880343,4.340606,4.40547,4.429667,5.892927,0.797979,0.9534,1.5811,0.9451,1.4252,0.0083,0.1559,3.5095,3.2887,0.4532,0.4133,-0.0119,-0.2043,3.5656,3.4442,0.3685,0.4737,-0.0512,-0.0759
9,Het010,,86.46823,77.996231,-480.522848,-479.929078,-479.875564,-480.320158,-479.74206,-479.686584,-480.365665,-479.787629,-479.732577,-0.284079,-0.03386,-0.23189,-0.026684,0.14462,-0.01792,-0.155381,0.05538,-0.124905,0.257395,0.17848,0.21397,0.046903,0.00859,0.03646,135.856641,167.232,146.859,103.622841,172.558,138.589,1.712249,2.7262,1.4327,1351.219233,1484.938,1429.909,354.453348,-0.432212,-0.37646,-0.04447,0.240715,0.12913,0.09014,149.023338,-4.950573,1.511166,1.3757,1.41226,97.20221,92.242381,92.029313,96.17097,95.767691,95.822572,8.879114,1.793625,1.837292,1.836186,4.601195,4.384873,4.555269,5.967972,0.805168,0.948311,1.5291,0.941528,1.3787,0.006784,0.1504,3.495996,3.3886,0.457833,0.4567,-0.009383,-0.1547,3.515217,3.4101,0.421527,0.4699,-0.041499,-0.083
15,Het016,,86.679701,77.887672,-480.520348,-479.925812,-479.872517,-480.317349,-479.73865,-479.683438,-480.362509,-479.78442,-479.729594,-0.277699,-0.03228,-0.22966,-0.027422,0.14866,-0.01353,-0.15256,0.05819,-0.121595,0.250278,0.18094,0.21613,0.046496,0.00936,0.0342,132.911596,159.005,142.225,89.12933,125.566,111.128,2.129056,3.3417,1.7279,1307.336428,1399.991,1526.816,346.06762,-0.44125,-0.35701,-0.05043,0.273424,0.14833,0.10865,153.663831,-3.518626,1.514353,1.37196,1.40336,97.748687,92.407025,92.284349,95.775521,95.6547,95.761235,7.00773,1.898872,1.996449,1.906471,5.731875,5.774241,5.744031,6.062294,0.814374,0.949043,1.5971,0.942382,1.4732,0.006631,0.1239,3.491843,3.3459,0.463187,0.3619,-0.010951,-0.2179,3.476245,3.5003,0.43956,0.4603,-0.045478,-0.0359


In [12]:
import plotly.express as px
import base64

# Extract data
x = sorted_compounds["Compound_Name"]
y = sorted_compounds["BDE (kcal/mol)"]

# Create a bar chart
fig = px.bar(x=x, y=y, labels={"x": "Molecule", "y": "BDE (kcal/mol)"})
fig.update_xaxes(tickvals=[], title_text="Molecule")
fig.update_layout(
    yaxis_range=[y.min() - 10, y.max() + 10],
    title="BDE (kcal/mol) by Compound",
    height=len(y) * 50,
    width=len(x) * 50,
    template="plotly_white",
)

# Add molecule images to the x-axis
for compound_name, bde in zip(sorted_compounds["Compound_Name"], y):
    img_path = mol_image_paths[compound_name]
    logo = base64.b64encode(open(img_path, "rb").read())
    fig.add_layout_image(
        source=f'data:image/png;base64,{logo.decode()}',
        xref="x",
        yref="paper",
        x=compound_name,
        y=0,
        xanchor="center",
        yanchor="bottom",
        sizex=1,  # Adjust size of the image
        sizey=10,   # Adjust size of the image
    )
    fig.add_layout_image(
        source=f'data:image/png;base64,{logo.decode()}',
        xref="x",
        yref="y",
        x=compound_name,
        y=bde,
        xanchor="center",
        yanchor="bottom",
        sizex=1,  # Adjust size of the image
        sizey=10,   # Adjust size of the image
    )

# Show the chart
fig.show()

In [13]:
# Select numerical columns for correlation
properties_columns = select_properties_df.select_dtypes(include="number").columns
yield_columns = yields_df.select_dtypes(include="number").columns  # Exclude non-yield columns

In [14]:
# Calculate correlation
correlation_results = pd.DataFrame(index=properties_columns, columns=yield_columns)
for property in properties_columns:
    for method in yield_columns:
        correlation_results.loc[property, method] = select_properties_df[property].corr(yields_df[method])
correlation_results = correlation_results.astype(float)

In [15]:
correlation_results

Unnamed: 0,Britton,Maity2AP Stahl,Maity4AP Stahl,Golden Stahl,DeLuca,Fujisaki,Newkome,Xu Zhang,Schreiner,Chen,Wu,Lopez Stahl,Ariarfard,Kanai
BDE (kcal/mol),0.516042,-0.220016,-0.330671,-0.448525,0.338140,-0.046801,-0.164182,-0.314159,0.252607,0.091495,-0.222955,-0.471337,-0.582487,0.040445
BDFE (kcal/mol),0.515380,-0.223493,-0.352171,-0.449100,0.320155,-0.058375,-0.152216,-0.327739,0.237275,0.091686,-0.217958,-0.450331,-0.606315,0.019419
E_spc (Hartree),0.119660,-0.157289,-0.555508,-0.078058,-0.013765,0.065980,-0.077700,-0.471281,-0.115254,0.090693,0.234323,-0.133786,-0.469489,0.032579
E_spc (Hartree)_anion,0.119649,-0.157374,-0.555553,-0.078033,-0.013717,0.066036,-0.077761,-0.471342,-0.115241,0.090653,0.234280,-0.133817,-0.469553,0.032582
E_spc (Hartree)_openshell,0.119679,-0.157297,-0.555514,-0.078076,-0.013751,0.065977,-0.077706,-0.471287,-0.115242,0.090695,0.234309,-0.133804,-0.469506,0.032580
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C2_Co-Valency_Boltz_anion,-0.362789,0.045125,0.104313,0.314148,-0.051636,0.177241,0.169858,0.033936,-0.210820,-0.023505,0.066336,0.123296,0.321108,-0.167300
C2_Electro-Valency_Boltz,0.270751,0.189855,0.134546,-0.333013,-0.061360,-0.333716,0.026603,0.209635,0.189483,0.098081,-0.024245,-0.078872,-0.030615,0.128637
C2_Electro-Valency_Boltz_anion,0.370589,-0.000236,0.018456,-0.336695,0.128409,-0.169960,-0.136262,0.077895,0.266619,-0.007020,-0.110315,-0.146226,-0.242892,0.188391
C2_FormalCharge_Boltz,0.200074,0.089700,-0.011216,0.042615,0.129785,0.112627,-0.068389,0.075714,0.037320,-0.151810,0.006359,-0.139058,0.146676,0.201589


In [16]:
# Identify the top 5 important factors (positive or negative) for each method
top_factors_dict = {}

for method in correlation_results.columns:
    # Sort factors by absolute correlation values and select top 5
    top_factors = correlation_results[method].abs().sort_values(ascending=False).head(5).index.tolist()
    top_factors_dict[method] = top_factors

top_factors_dict

{'Britton': ['BDE (kcal/mol)',
  'BDFE (kcal/mol)',
  'NBO_charge_C1_Boltz',
  '%Vbur_C1_2.0Å_Boltz',
  '%Vbur_C1_2.0Å_Boltz_openshell'],
 'Maity2AP Stahl': ['C1_C2_bond_order_ionic_Boltz_anion',
  'NMR_shift_C2_Boltz',
  'C1_C2_bond_order_ionic_Boltz',
  'HOMO_Boltz_anion',
  'Sterimol_B1_C1_C2(Å)_morfeus_Boltz_openshell'],
 'Maity4AP Stahl': ['Sterimol_B5_C1_C2(Å)_morfeus_Boltz',
  'Sterimol_B5_C1_C2(Å)_morfeus_Boltz_openshell',
  'Sterimol_B5_C1_C2(Å)_morfeus_Boltz_anion',
  'E_spc (Hartree)_anion',
  'qh_G(T)_spc(Hartree)_anion'],
 'Golden Stahl': ['C1_FormalCharge_Boltz',
  'HOMO_Boltz_openshell',
  'μ_Boltz_openshell',
  'distance_C1_C2(Å)_Boltz',
  'BDFE (kcal/mol)'],
 'DeLuca': ['C1_Co-Valency_Boltz_anion',
  '%Vbur_C1_2.0Å_Boltz_anion',
  'NBO_charge_C1_Boltz',
  '%Vbur_C1_2.0Å_Boltz_openshell',
  'NBO_charge_C1_Boltz_openshell'],
 'Fujisaki': ['dipole(Debye)_Boltz_openshell',
  'dipole(Debye)_Boltz',
  'C2_Co-Valency_Boltz',
  'C2_Electro-Valency_Boltz',
  '%Vbur_C2_2.0Å_Bolt

In [17]:
import plotly.express as px

# Create bar charts for each method
for method, factors in top_factors_dict.items():
    # Prepare data for the chart
    data = correlation_results.loc[factors, method].reset_index()
    data.columns = ["Factor", "Correlation"]
    data["Absolute Correlation"] = data["Correlation"].abs()

    # Create the bar chart
    fig = px.bar(
        data,
        x="Factor",
        y="Absolute Correlation",
        text="Correlation",  # Add original correlation values as text
        title=f"Top 5 Factors for {method}",
        labels={"Absolute Correlation": "Correlation Value (|r|)", "Factor": "Property"},
    )

    # Customize layout
    fig.update_traces(texttemplate="%{text:.2f}", textposition="outside")  # Format text
    fig.update_layout(
        height=600,
        width=800,
        template="plotly_white",
        xaxis_title="Factors",
        yaxis_title="Absolute Correlation",
    )

    # Show the chart
    fig.show()

In [22]:
filtered_matrix

Unnamed: 0,Britton,Maity2AP Stahl,Maity4AP Stahl,Golden Stahl,DeLuca,Fujisaki,Newkome,Xu Zhang,Schreiner,Chen,Wu,Lopez Stahl,Ariarfard,Kanai
BDE (kcal/mol),0.516042,0.0,0.000000,0.0000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,-0.471337,0.000000,0.0
BDFE (kcal/mol),0.515380,0.0,0.000000,-0.4491,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,-0.450331,-0.606315,0.0
E_spc (Hartree),0.000000,0.0,0.000000,0.0000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0
E_spc (Hartree)_anion,0.000000,0.0,-0.555553,0.0000,0.0,0.000000,0.0,-0.471342,0.0,0.0,0.0,0.000000,0.000000,0.0
E_spc (Hartree)_openshell,0.000000,0.0,0.000000,0.0000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C2_Co-Valency_Boltz_anion,0.000000,0.0,0.000000,0.0000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0
C2_Electro-Valency_Boltz,0.000000,0.0,0.000000,0.0000,0.0,-0.333716,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0
C2_Electro-Valency_Boltz_anion,0.000000,0.0,0.000000,0.0000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0
C2_FormalCharge_Boltz,0.000000,0.0,0.000000,0.0000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0


In [23]:
filtered_matrix

Index(['Britton', 'Maity2AP Stahl', 'Maity4AP Stahl', 'Golden Stahl', 'DeLuca',
       'Fujisaki', 'Newkome', 'Xu Zhang', 'Schreiner', 'Chen', 'Wu',
       'Lopez Stahl', 'Ariarfard', 'Kanai'],
      dtype='object')

In [24]:
import plotly.express as px

# Prepare a matrix with only top 5 factors for each method
filtered_matrix = correlation_results.copy()
filtered_matrix[:] = 0  # Initialize with zeros

for method, factors in top_factors_dict.items():
    # Retain only the top 5 factors
    filtered_matrix.loc[factors, method] = correlation_results.loc[factors, method]

# Plot the 2D matrix
fig = px.imshow(
    filtered_matrix.T,
    labels={"x": "Property", "y": "Method", "color": "Correlation"},
    color_continuous_scale="Cividis",  # Diverging color scale
    title="Top 5 Factors for Each Method",
)

# Customize layout
fig.update_layout(
    height=len(filtered_matrix.columns) * 50,
    width=len(filtered_matrix) * 20,
    xaxis_title="Property",
    yaxis_title="Method Yields",
    template="plotly_white",
)

# Show the plot
fig.show()