In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tqdm.notebook import tqdm
from functools import partial
import gudhi as gd

with open("data/drugbank.json", mode = "r") as drugbank:
    db = json.load(drugbank)

In [2]:
%matplotlib inline
def build_point_cloud(
    ms : dict,
    num : int,
    sigma : float
):
    def molmass_range(
        ms : dict
    ):
        return (
            min([ min(v["mz"]) for k,v in ms.items() if k.startswith("energy") ]),
            max([ max(v["mz"]) for k,v in ms.items() if k.startswith("energy") ])
        )
    
    assert sigma > 0
    assert num > 0
    num = int(num)
    # equidistant sampling
    mm_range = molmass_range(ms)
    x_def = np.linspace(
        *mm_range,
        num = num,
    )
    energies = [v for k,v in ms.items() if k.startswith("energy")]
    mixtures = np.zeros((num, len(energies)))
    for idx, energy in enumerate(energies):        
        mix = [
            intens * np.exp(-.5*np.power((x_def - peak)/sigma,2))
            for peak,intens in zip(energy["mz"], energy["intens"])
        ]
        mix = np.sum(mix, axis = 0)
        mixtures[:,idx] = mix / mix.max()
        
    return (mm_range, mixtures)

point_clouds = [ build_point_cloud(ms, 1e3, 4) for ms in tqdm(list(db.values())) ]

  0%|          | 0/9701 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
from random import choice
mm_range, mx = choice(point_clouds)
fig = plt.figure(figsize = (20,10))
x_def = np.linspace(
    *mm_range,
    num = int(1e3),
)
for idx in range(mx.shape[1]):
    ax = fig.add_subplot(1,mx.shape[1],idx+1)
    ax.plot(x_def, mx[:,idx])
plt.show()

In [None]:
%matplotlib inline
_, mx = choice(point_clouds)
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(projection = "3d")
ax.scatter(mx[:,0], mx[:,1], mx[:,2], s = 10, marker = ".")

def tf_distance_matrix(pc):
    _, mx = pc
    mx = tf.convert_to_tensor(mx, dtype=tf.float32)
    t1 = tf.reshape(mx, (1,mx.shape[0],mx.shape[1]))
    t2 = tf.reshape(mx, (mx.shape[0],1,mx.shape[1]))
    result = tf.norm(t1-t2, ord='euclidean', axis=2,)
    skeleton = gd.RipsComplex(
        distance_matrix = result,
        max_edge_length = .3
    )
    st = skeleton.create_simplex_tree(max_dimension = 2)

In [None]:
_, mx = choice(point_clouds)
mx = tf.convert_to_tensor(mx, dtype=tf.float32)
t1 = tf.reshape(mx, (1,mx.shape[0],mx.shape[1]))
t2 = tf.reshape(mx, (mx.shape[0],1,mx.shape[1]))

result = tf.norm(t1-t2, ord='euclidean', axis=2,)

fig = plt.figure()
result = result.numpy()
result = (result < 0.4)
for i in range(result.shape[0]):
    for j in range(result.shape[1]):
        if i == j:
            result[i,j] = False
        
plt.imshow(result)

In [None]:
import networkx as nx
%matplotlib inline

G = nx.Graph(result)

fig = plt.figure(figsize=(10,10))
nx.draw(G, pos = { idx : pos for idx,pos in enumerate(mx[:,:2]) }, node_size = 10)