In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt

with open("data/Drugbank2019_MS_CFMID4.0_FRAGMENTS.json", mode = "r") as drugbank:
    db = json.load(drugbank)

In [2]:
%matplotlib notebook
point_clouds = list()
for m,ms in db.items():
    try:
        ms.pop("frag")
    except KeyError:
        pass
    
    sigma = 10
    max_mass = max([ max( db[m][e]["mz"] ) for e in ms.keys() ])
    min_mass = min([ min( db[m][e]["mz"] ) for e in ms.keys() ])
    x = np.linspace(min_mass-100,max_mass+100, num = 1000)
    mixtures = np.zeros((1000, len(ms.values())))
    for idx, energy in enumerate(ms.values()):
        mix = [
            intens * np.exp(-.5*np.power((x - peak)/sigma,2))
            for peak,intens in zip(energy["mz"], energy["intens"])
        ]
        mix = np.sum(mix, axis = 0)
        mixtures[:,idx] = mix
    point_clouds.append(mixtures)
    #
    #fig = plt.figure()
    #ax = fig.add_subplot(projection="3d")
    #ax.scatter(mixtures[:,0],mixtures[:,1],mixtures[:,2], marker="x", s= 3)
    #break
#

In [7]:
from gtda.homology import VietorisRipsPersistence as Persistence
from gtda.diagrams import PersistenceEntropy
from gtda.pipeline import Pipeline
from gtda.diagrams import Filtering

pipeline  = Pipeline([
    ('diagram', Persistence(
        homology_dimensions = [0,1],
        n_jobs              = 3
    )),
    ('filter', Filtering(
        epsilon             = .01
    )),
    ('entropy', PersistenceEntropy(
        normalize           = False,
        n_jobs              = 3
    )),
])

features = pipeline.fit_transform(point_clouds)

KeyboardInterrupt: 