# Imports

In [None]:
from asapdiscovery.data.schema_v2.ligand import Ligand
from asapdiscovery.docking.openeye import POSITDockingResults
from asapdiscovery.docking import DockingResults
from pathlib import Path
import pandas as pd
from tqdm import tqdm

## load as csv

In [None]:
results_files = list(Path("/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/20231116_cross_docking_p_only_2d_multi_reference/").glob("*/docking_results_final.csv"))

In [None]:
len(results_files)

In [None]:
dfs = [pd.read_csv(results_file) for results_file in tqdm(results_files)]

In [None]:
df = pd.concat(dfs)

In [None]:
df.to_csv("combined.csv")

## Load as Docking Results Object

In [None]:
# results_files = list(Path("/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/20231116_cross_docking_p_only_2d_multi_reference/").glob("*/*/*/*.json"))

In [None]:
# len(results_files)

In [None]:
# import json

In [None]:
# with open(results_files[0]) as f:
#     loaded = json.load(f)
#    # pdr = POSITDockingResults.parse_obj() 

In [None]:
# results = [POSITDockingResults.from_json_file(json_file) for json_file in results_files]

## unfortunately this object doesn't contain the chemgauss results

# Load reference sdfs

In [None]:
sdf_dir = Path("/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/full_frag_prepped_mpro_20230603/sdf_lsf_array_p_only_by_name/")
ligs = [Ligand.from_sdf(sdf) for sdf in sdf_dir.glob("*.sdf")]
lig_dict = {lig.compound_name: lig for lig in ligs}

# Load docked SDFs

In [None]:
# load docked mols
docked_sdfs = [path for path in Path("/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/20231116_cross_docking_p_only_2d_multi_reference/").glob("*/*.sdf")]

In [None]:
docked_ligs = [Ligand.from_sdf(sdf) for sdf in tqdm(docked_sdfs)]

# Calculate RMSD

In [None]:
df.head()

In [None]:
docked_ligs[0]

In [None]:
from asapdiscovery.docking.analysis import calculate_rmsd_openeye
compound_ids = []
rmsds = []
smiles = []
posit_method = []
posit_score = []

for lig in tqdm(docked_ligs):
    smiles.append(lig.smiles)
    posit_method.append(lig.tags["_POSIT_method"])
    posit_score.append(lig.tags["docking-confidence-POSIT"])
    ref_lig = lig_dict[lig.compound_name]
    compound_ids.append(lig.compound_name)
    rmsds.append(calculate_rmsd_openeye(ref_lig.to_oemol(), lig.to_oemol()))

# Make Complete File

In [None]:
data_df = pd.DataFrame({
                        "POSIT_Method": posit_method, 
                        "POSIT": posit_score,
                       "Compound_ID": compound_ids,
                        "RMSD": rmsds
                       })
data_df.to_csv("rmsd_data_with_all_info.csv")

In [None]:
df = df.rename(columns = {"ligand_id": "Compound_ID"})

In [None]:
combined = pd.merge(df, data_df, on="Compound_ID")

In [None]:
combined["Dataset"] = combined.in_silico_SARS_CoV_2_Mpro_docking_structure_POSIT_msk.apply(lambda x: "_".join(x.split("_")[0:2]))

In [None]:
combined["Complex_ID"] = combined.Compound_ID +"_"+ combined.Dataset

# save combined

In [None]:
combined.to_csv("rmsd_calculated_combined.csv")

# Add tanimoto analysis

In [None]:
docked_ligs[0]

In [None]:
combined["Reference_Ligand"] = combined.in_silico_SARS_CoV_2_Mpro_docking_structure_POSIT_msk.apply(lambda x: x.split("_")[2])

In [None]:
from importlib import reload
from asapdiscovery.docking import analysis as a

In [None]:
reload(a)

In [None]:
a.calculate_tanimoto_oe(docked_ligs[0], ref_lig)

In [None]:
combined["TanimotoCombo"] = [a.calculate_tanimoto_oe(docked_lig, lig_dict[docked_lig.compound_name]) for docked_lig in tqdm(docked_ligs)]

In [None]:
combined.to_csv("rmsd_calculated_combined.csv")