# the goal of this notebook is to identify docked poses at set RMSD values away from the crystal pose

# Imports

In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/20240424_multi_pose_docking_cross_docking/results_csvs/20240503_combined_results_with_data.csv', index_col=0)

In [None]:
cmpd_id = ('EDJ-MED-705e09b8-1')

In [None]:
examples = df[df['Query_Ligand']== cmpd_id].groupby("RMSD").head(1)

In [None]:
# for each rmsd, find a pose that is within that rmsd of the crystal pose
df["RMSD_Rounded"] = df.RMSD.apply(lambda x: np.round(x, 1))
rounded_rmsds = df.groupby(["Query_Ligand", "RMSD_Rounded"]).head(1).sort_values("RMSD_Rounded")

In [None]:
cmpd_id = 'MAT-POS-a54ce14d-2'
example = rounded_rmsds[rounded_rmsds['Query_Ligand']== cmpd_id]

In [None]:
# Get the Query Lig, Ref Structure, and Pose for each row that has rmsd in the list
rmsds = [0.1, 0.5, 1.0, 1.5, 2.0, 3.0, 5.0, 7.9]
final_df = example[example['RMSD_Rounded'].isin(rmsds)][['Query_Ligand', 'Reference_Structure', 'Pose_ID', 'RMSD_Rounded']]

In [None]:
final_df

In [None]:
local_path = '/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/20240424_multi_pose_docking_cross_docking/20240424_multi_pose_docking_cross_docking89/docking_results'

# write a script to load in the sdfs and extract the relevant poses

In [None]:
# for each row in the final_df, load in the sdf, extract the pose, and write it to a new sdf
from pathlib import Path
from asapdiscovery.data.schema.ligand import Ligand
final_poses = []
for index, row in final_df.iterrows():
    # Load in the sdf
    sdf_path = list(Path(local_path).glob(f'./*{row["Reference_Structure"]}*{cmpd_id}*/*.sdf'))[0]
    rmsd = row["RMSD_Rounded"]
    pose_id = row["Pose_ID"]
    ref_structure = row["Reference_Structure"]
    
    # Get the correct pose
    lig = Ligand.from_sdf(sdf_path)
    poses = lig.to_single_conformers()
    pose = [pose for pose in poses if pose.tags['Pose_ID'] == str(row['Pose_ID'])][0]
    pose.to_sdf(f'RMSD{rmsd}_{pose.compound_name}_{ref_structure}.sdf')