# Imports

In [None]:
from pathlib import Path
from tqdm import tqdm

In [None]:
from asapdiscovery.data.schema.ligand import Ligand
from asapdiscovery.data.backend.openeye import load_openeye_sdfs

# Define Paths

In [None]:
data_path = Path("/lila/data/chodera/asap-datasets/retro_docking/sars_fragalysis_retrospective/20240403_multi_pose_docking_self_docking")

# combine results

In [None]:
sdf_paths = list(data_path.glob("*.sdf/docking_results.sdf"))

In [None]:
multipose_ligs = [Ligand.from_sdf(sdf_path) for sdf_path in sdf_paths]

In [None]:
multipose_ligs[0].num_poses

In [None]:
len(multipose_ligs[0].conf_tags["docking-confidence-POSIT"])

In [None]:
test_lig = multipose_ligs[0]

In [None]:
alt_poses = test_lig.conf_tags.pop("Alternate Pose")

In [None]:
len(alt_poses)

In [None]:
test_lig.conf_tags.update({"Alternate Pose": [1] + alt_poses})

In [None]:
test_lig.num_poses

In [None]:
test_lig.conf_tags

## for this to work i'll need to make sure where the alternate poses counting starts

In [None]:
alt_pose_possibilities = [lig.conf_tags.get("Alternate Pose") for lig in multipose_ligs]

In [None]:
alt_pose_possibilities

## starts at 1! so I'll add 0

In [None]:
sdf_paths = list(data_path.glob("*.sdf/docking_results.sdf"))

In [None]:
multipose_ligs = [Ligand.from_sdf(sdf_path) for sdf_path in sdf_paths]

In [None]:
for lig in multipose_ligs:
    if lig.conf_tags.get("Alternate Pose"):

        lig.conf_tags.update({"Alternate Pose": [0] + lig.conf_tags.get("Alternate Pose")})
    else:
        lig.conf_tags.update({"Alternate Pose": [0]})
    lig.tags.update({"Alternate Pose": 0})

In [None]:
n_poses = [lig.num_poses for lig in multipose_ligs]

# Save Combined Results

In [None]:
out_dir = data_path / "processed_sdfs"

In [None]:
out_dir.mkdir()

In [None]:
for lig in multipose_ligs:
    lig.to_sdf(out_dir / f"{lig.compound_name}.sdf")

In [None]:
from asapdiscovery.docking.analysis import calculate_rmsd_openeye

In [None]:
orig_ligs = [Ligand.from_oemol(mol) for mol in load_openeye_sdfs(Path("/data/chodera/asap-datasets/mpro_fragalysis-04-01-24_curated_cache/combined_3d.sdf"))]
orig_lig_dict = {lig.compound_name: lig for lig in orig_ligs}

In [None]:
pose_rmsds = []
for pose in lig.to_single_conformers():
    pose_rmsds.append(calculate_rmsd_openeye(orig_lig_dict[pose.compound_name].to_oemol(), pose.to_oemol()))

In [None]:
pose_rmsds

In [None]:
rmsds = []
lig_name = []
pose_num = []
for lig in tqdm(multipose_ligs):
    for i, pose in enumerate(lig.to_single_conformers()):
        rmsds.append(calculate_rmsd_openeye(orig_lig_dict[pose.compound_name].to_oemol(), pose.to_oemol()))
        lig_name.append(pose.compound_name)
        pose_num.append(i)

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame({"RMSD": rmsds, "Compound_Name":lig_name, "Pose_ID":pose_num})

In [None]:
import plotly.express as px

In [None]:
fig = px.line(df, x="Pose_ID", y="RMSD", color="Compound_Name")

In [None]:
fig.write_image("self_docked_rmsds.png")
fig.write_image("self_docked_rmsds.svg")

In [None]:
df.to_csv("self_docked_rmsds.csv")