# Imports

In [None]:
from pathlib import Path
import pandas as pd
from asapdiscovery.docking.analysis import calculate_rmsd_openeye
from asapdiscovery.data.openeye import load_openeye_sdf

In [None]:
local_analysis = Path("/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/")

In [None]:
csvs = local_analysis.glob("20231016_self_docked*/run_docking_oe.*-results.csv")

In [None]:
df = pd.concat(map(pd.read_csv, csvs), ignore_index= True)

In [None]:
df.groupby(["POSIT_method"]).nunique()

In [None]:
df.columns

# Cleanup

In [None]:
df.columns = ["Compound_ID", 
                 "Structure_Source", 
                 "Docked_File", 
                 "Pose_ID", 
                 "RMSD", 
                 "POSIT", 
                 "POSIT_Method", 
                 "Chemgauss4", 
                 "Clash", 
                 "SMILES", 
                 "GAT_Score",
                "SCHNET_score"]

In [None]:
df["POSIT_R"] = 1-df["POSIT"]
df["Complex_ID"] = df.Compound_ID.apply(str) + "_" + df.Structure_Source

## remove non-p series structures

In [None]:
p_only = df[df.Structure_Source.str.contains("Mpro-P")]

In [None]:
p_only.groupby(["POSIT_Method"]).count()

## get intersection

In [None]:
p_only.groupby(["Compound_ID", "POSIT_Method"]).count()

In [None]:
p_only[p_only.Compound_ID == "ALP-POS-133e7cd9-2"]

In [None]:
print([file for file in p_only[p_only.Compound_ID == "ALP-POS-133e7cd9-2"]["Docked_File"]])

# get intersection

In [None]:
cmpds = p_only[p_only.POSIT_Method == "SHAPEFIT"]["Compound_ID"]

In [None]:
intersection = p_only[p_only.Compound_ID.isin(cmpds)]

In [None]:
intersection.groupby(["POSIT_Method"]).count()

In [None]:
#cmpds = intersection[intersection.POSIT_Method == "FRED"]["Compound_ID"]

In [None]:
intersection = intersection[intersection.Compound_ID.isin(cmpds)]

In [None]:
intersection.groupby(["POSIT_Method"]).count()

In [None]:
cmpds = intersection[intersection.POSIT_Method == "HYBRID"]["Compound_ID"]

In [None]:
intersection = intersection[intersection.Compound_ID.isin(cmpds)]

In [None]:
intersection.groupby(["POSIT_Method"]).count()

# save csv

In [None]:
intersection.to_csv(local_analysis / "20231016_combined.csv")

In [None]:
intersection

# Calculate RMSD

In [None]:
docked_mols = [load_openeye_sdf(str(local_analysis.joinpath(*Path(file).parts[-3:]))) for file in intersection.Docked_File]

In [None]:
ref_mols = [load_openeye_sdf(str(local_analysis.joinpath("sdf_lsf_array_p_only_by_name", f"{compound_id.strip()}.sdf")))  for compound_id in intersection.Compound_ID]

In [None]:
len(docked_mols)

In [None]:
len(ref_mols)

In [None]:
rmsds = [calculate_rmsd_openeye(docked, ref) for docked, ref in zip(docked_mols, ref_mols)]

In [None]:
intersection.RMSD = rmsds

In [None]:
intersection.to_csv(local_analysis / "20231016_combined.csv")