Skip to content

Commit

Permalink
Isim (#107)
Browse files Browse the repository at this point in the history
* integrate isim_comp.py module from: https://doi.org/10.26434/chemrxiv-2023-fxlxg
  • Loading branch information
eloyfelix committed Feb 15, 2024
1 parent ae02e00 commit 5f62f01
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ jobs:

steps:
- uses: actions/checkout@v3
- uses: pypa/cibuildwheel@v2.16.2
- uses: pypa/cibuildwheel@v2.16.5
env:
CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }}
CIBW_ARCHS_LINUX: x86_64 aarch64
Expand Down
2 changes: 1 addition & 1 deletion FPSim2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
except Exception as e:
pass

__version__ = "0.5.0"
__version__ = "0.5.1"
Empty file added FPSim2/contrib/__init__.py
Empty file.
Empty file added FPSim2/contrib/isim/__init__.py
Empty file.
165 changes: 165 additions & 0 deletions FPSim2/contrib/isim/isim_comp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
from .utils import unpack_fpe, get_c_total_from_fpe
import numpy as np

""" iSIM_MODULES
----------------------------------------------------------------------
Miranda-Quintana Group, Department of Chemistry, University of Florida
----------------------------------------------------------------------
Please, cite the original paper on iSIM:
https://doi.org/10.26434/chemrxiv-2023-fxlxg
"""


def _calculate_counters(data, n_objects, k=1):
"""Calculate 1-similarity, 0-similarity, and dissimilarity counters
Arguments
---------
data : np.ndarray
Array of arrays, each sub-array contains the binary object
OR Array with the columnwise sum, if so specify n_objects
n_objects : int
Number of objects.
k : int
Integer indicating the 1/k power used to approximate the average of the
similarity values elevated to 1/k.
Returns
-------
counters : dict
Dictionary with the weighted and non-weighted counters.
"""
if data.ndim == 1:
c_total = data
else:
c_total = np.sum(data, axis=0)

# Calculate a, d, b + c
a_array = c_total * (c_total - 1) / 2
off_coincidences = n_objects - c_total
d_array = off_coincidences * (off_coincidences - 1) / 2
dis_array = off_coincidences * c_total

a = np.sum(np.power(a_array, 1 / k))
d = np.sum(np.power(d_array, 1 / k))
total_dis = np.sum(np.power(dis_array, 1 / k))

total_sim = a + d
p = total_sim + total_dis

counters = {"a": a, "d": d, "total_sim": total_sim, "total_dis": total_dis, "p": p}
return counters


def get_sim_dict(fpe, k=1):
"""Calculate a dictionary containing all the available similarity indexes
Arguments
---------
fpe : FPSim2.FPSim2Engine
FPSim2 engine
k : int
Integer indicating the 1/k power used to approximate the average of the
similarity values elevated to 1/k.
Returns
-------
sim_dict : dict
Dictionary with the weighted and non-weighted similarity indexes."""
data = get_c_total_from_fpe(fpe)
n_objects = fpe.fps.shape[0]
return _gen_sim_dict(data, n_objects, k=k)


def _gen_sim_dict(data, n_objects, k=1):
"""Calculate a dictionary containing all the available similarity indexes
Arguments
---------
data : np.ndarray
Array of arrays, each sub-array contains the binary object
OR Array with the columnwise sum, if so specify n_objects
n_objects : int
Number of objects.
k : int
Integer indicating the 1/k power used to approximate the average of the
similarity values elevated to 1/k.
Returns
-------
sim_dict : dict
Dictionary with the weighted and non-weighted similarity indexes."""

# Calculate the similarity and dissimilarity counters
counters = _calculate_counters(data=data, n_objects=n_objects, k=k)
jt = (counters["a"]) / (counters["a"] + counters["total_dis"])
rr = (counters["a"]) / (counters["p"])
sm = (counters["total_sim"]) / (counters["p"])

# Dictionary with all the results
Indices = {
"JT": jt, # JT: Jaccard-Tanimoto
"RR": rr, # RR: Russel-Rao
"SM": sm, # SM: Sokal-Michener
}
return Indices


def calculate_medoid(fpe, n_ary="RR"):
index = np.argmin(calculate_comp_sim(fpe, n_ary=n_ary)["index"])
return fpe.fps[index][0]


def calculate_outlier(fpe, n_ary="RR"):
index = np.argmax(calculate_comp_sim(fpe, n_ary=n_ary)["index"])
return fpe.fps[index][0]


def calculate_comp_sim(fpe, n_ary="RR"):
"""Calculate the complementary similarity for RR, JT, or SM
Arguments
---------
fpe: FPSim2 engine
FPSim2 engine
n_ary : str
String with the initials of the desired similarity index to calculate the iSIM from.
Only RR, JT, or SM are available. For other indexes use gen_sim_dict.
Returns
-------
comp_sims : nd.array
1D array with the complementary similarities of all the molecules in the set.
"""

data = unpack_fpe(fpe)
c_total = np.sum(data, axis=0)
n_objects = fpe.fps.shape[0] - 1
m = len(c_total)

comp_matrix = c_total - data
a = comp_matrix * (comp_matrix - 1) / 2

if n_ary == "RR":
comp_sims = np.sum(a, axis=1) / (m * n_objects * (n_objects - 1) / 2)

elif n_ary == "JT":
comp_sims = np.sum(a, axis=1) / np.sum(
(a + comp_matrix * (n_objects - comp_matrix)), axis=1
)

elif n_ary == "SM":
comp_sims = np.sum(
(a + (n_objects - comp_matrix) * (n_objects - comp_matrix - 1) / 2), axis=1
) / (m * n_objects * (n_objects - 1) / 2)
return np.rec.fromarrays([fpe.fps[:, 0], comp_sims], names=("mol_id", "index"))
14 changes: 14 additions & 0 deletions FPSim2/contrib/isim/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import numpy as np

def unpack_fpe(fpe):
# need to unpack the fingerprints for iSIM functions
data = fpe.fps[:, 1:-1].view("uint8")
bits = np.unpackbits(data[:, np.newaxis], axis=1).ravel()
data = bits.reshape(int(bits.size / (data.shape[1] * 8)), data.shape[1] * 8)
return data

def get_c_total_from_fpe(fpe):
data = np.zeros(((fpe.fps.shape[1] - 2) * 64), dtype="uint64")
for m in fpe.fps[:, 1:-1].view("uint8"):
data += np.unpackbits(m[:, np.newaxis], axis=1).ravel()
return data

0 comments on commit 5f62f01

Please sign in to comment.