# Imports

In [None]:
from openeye import oechem, oedepict, oeshape
from sklearn.cluster import KMeans
import numpy as np
import importlib
from pathlib import Path
from asapdiscovery.docking import docking as d
from asapdiscovery.data import openeye as oe, schema, utils
from asapdiscovery.data.fragalysis import parse_xtal
from tqdm import tqdm

# paths

In [None]:
combined_sdf = Path("/Users/alexpayne/lilac-mount-point/asap-datasets/mpro_fragalysis_2022_10_12/Mpro_combined.sdf")

In [None]:
combined_sdf.resolve()

# load molecules

In [None]:
mols = oe.load_openeye_sdfs(str(combined_sdf))

In [None]:
len(mols)

In [None]:
mols[0]

In [None]:
prep = oeshape.OEOverlapPrep()
prep.Prep(mols[0])

In [None]:
shapeFunc = oeshape.OEExactShapeFunc()
shapeFunc.SetupRef(mols[0])

In [None]:
results = oeshape.OEOverlapResults()

In [None]:
prep.Prep(mols[1])
shapeFunc.Overlap(mols[1], results)

In [None]:
results.GetTanimoto()

In [None]:
results.GetTanimotoCombo()

In [None]:
func = oeshape.OEOverlapFunc()

In [None]:
func.SetupRef(mols[0])

In [None]:
overlaps = [(fitmol.GetTitle(), results.GetTanimotoCombo(),
               results.GetTanimoto(), results.GetColorTanimoto()) for fitmol in mols]

In [None]:
results_tuples = []
for fitmol in mols:
    prep.Prep(fitmol)
    func.Overlap(fitmol, results)
    results_tuples.append((fitmol.GetTitle(), results.GetTanimotoCombo(),
               results.GetTanimoto(), results.GetColorTanimoto()))

In [None]:
results_tuples

In [None]:
trouble = mols[2]

In [None]:
results_tuples = []
for reference in tqdm(mols):
    prep.Prep(reference)
    func.SetupRef(reference)
    for fitmol in mols:
        prep.Prep(fitmol)
        func.Overlap(fitmol, results)
        results_tuples.append(results.GetTanimotoCombo())

In [None]:
matrix = np.matrix(results_tuples)

In [None]:
mtx = matrix.reshape(len(mols), len(mols))

In [None]:
np.shape(mtx)

In [None]:
def check_symmetric(a, tol=1e-8):
    return np.all(np.abs(a-a.T) < tol)

In [None]:
check_symmetric(mtx)

In [None]:
mtx

In [None]:
diff = mtx - mtx.T

In [None]:
diff.max()

In [None]:
mtx.min(axis=0)

In [None]:
import plotly.express as px
import pandas as pd

In [None]:
df = pd.DataFrame(mtx)

In [None]:
df

In [None]:
px.scatter(df[0])

In [None]:
tc_gt1 = mtx > 1
n_gt1 = tc_gt1.sum(axis=0)

In [None]:
n_gt1

In [None]:
np.matrix(n_gt1 > 1).sum()

In [None]:
n_gt1_df = pd.DataFrame(n_gt1.T - 1)

In [None]:
n_gt1_df.columns = ['# Greater than 1']

In [None]:
n_gt1_df

In [None]:
px.scatter(n_gt1_df, color='value')

In [None]:
# fig = px.histogram(n_gt1_df[n_gt1_df["# Greater than 1"] > 0], nbins=100)
fig = px.histogram(n_gt1_df, nbins=100)
fig.update_xaxes(title="Number of TanimotoCombo Scores Greater than 1 for Each Ligand")
fig.update_yaxes(title="# Ligands")
fig.update_layout(title="Pairwise Tanimoto Combo Scores for 803 Fragalysis Structures")
fig.show()
fig.write_image("n-to-n-tc-histogram.png")

In [None]:
at_least_1 =  > 1

In [None]:
at_least_1.sum()

In [None]:
tc_gt1

In [None]:
overlay = oeshape.OEFlexiOverlay()
results_tuples = []
for reference in tqdm(mols):
    prep.Prep(reference)
    overlay.SetupRef(reference)
    for fitmol in mols:
        prep.Prep(fitmol)
        results = overlay.Overlay(fitmol)
        results_tuples.append(results.GetTanimotoCombo())

In [None]:
type(fitmol)

In [None]:
good_mols = [mol for mol in mols if not "_0B" in mol.GetTitle() and not "_1A" in mol.GetTitle()]

In [None]:
len(good_mols)

In [None]:
good_results_tuples = []
for reference in tqdm(good_mols):
    prep.Prep(reference)
    func.SetupRef(reference)
    for fitmol in good_mols:
        prep.Prep(fitmol)
        func.Overlap(fitmol, results)
        good_results_tuples.append(results.GetTanimotoCombo())

In [None]:
matrix = np.matrix(good_results_tuples)
mtx = matrix.reshape(len(good_mols), len(good_mols))
tc_gt1 = mtx > 1
n_gt1 = tc_gt1.sum(axis=0)
n_gt1_df = pd.DataFrame(n_gt1.T - 1)
n_gt1_df.columns = ['# Greater than 1']

In [None]:
fig = px.histogram(n_gt1_df, nbins=100)
fig.show()

In [None]:
np.matrix(n_gt1 > 1).sum()