# Imports

In [None]:
import pandas as pd, numpy as np
import plotly.express as px
from plotly.graph_objs import Figure
from pathlib import Path
from asapdiscovery.docking.analysis import get_df_subset, calc_perc_good, calculate_perc_good
from importlib import reload
import asapdiscovery.docking.analysis as a
from functools import reduce
import operator
import shutil

# Load Paths

In [None]:
import sys
sys.path.append(str(Path("../../../").resolve()))
from software.paths import paths

In [None]:
local_analysis = Path("/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/")

In [None]:
df = pd.read_csv(local_analysis / "20230611-combined.csv", index_col=0)

## a bit of fixing

In [None]:
df["TanimotoCombo_R"] = 2-df.TanimotoCombo

# Functions

# Find a Compound_ID with RMSD values close to desired

In [None]:
def get_cmpds_at_rmsds(df, rmsds, tolerance=0.1):
    all_cmpds = df.Compound_ID.unique()
    bool_arrays = [np.isclose(df.RMSD, rmsd, atol=tolerance) for rmsd in rmsds]
    compound_ids = [np.isin(all_cmpds, df[array]["Compound_ID"].unique()) for array in bool_arrays]
    selector = reduce(operator.and_, compound_ids)
    
    selected_cmpds = all_cmpds[selector]
    if len(selected_cmpds) == 0:
        return None
    
    dfs = []
    for cmpd in selected_cmpds:
        cmpd_dfs = [df[(df.Compound_ID == cmpd) & (bool_array)].head(1) for bool_array in bool_arrays]
        dfs.append(pd.concat(cmpd_dfs))
    return pd.concat(dfs)

In [None]:
selected_df = get_cmpds_at_rmsds(df, (0.1,2), tolerance=0.05)

In [None]:
selected_df.Compound_ID.unique()

In [None]:
sdf_paths = []
rmsds = []
for record in selected_df.to_dict(orient="records"):
    lilac_path = Path(record["Docked_File"])
    mounted_path = paths.ap_local / lilac_path.relative_to(list(lilac_path.parents)[-5])
    mounted_path.exists()
    new_path = local_analysis / f"{record['Compound_ID']}_{record['RMSD']:.1f}.sdf"
    shutil.copy(mounted_path, new_path)
    
    

$$Tanimoto(f,g) = \frac{Intersection(f,g)}{Union(f,g)}$$

$$Intersection(f,g)=\int f(x,y,z)g(x,y,z)dV$$

$$I(f,g) = I(g,f)$$

$$ShapeTanimoto(f,g) = \frac{I(f,g)}{I(f,f) + I(g,g) - 2*I(f,g)}$$