In [None]:
import keras
import numpy
from matchms.importing import load_from_msp
import tensorflow as tf
import tensorflow_datasets as tfds


In [None]:
spectra = list(load_from_msp("GCMS DB-Public-KovatsRI-VS3.msp"))



In [None]:
smiles = [x.get("smiles") for x in spectra]
exact_mass = [x.get("exactmass") for x in spectra]
ri = [x.get("retentionindex") for x in spectra]

In [None]:
def safe_convert_float(x):
    try:
        val = float(x)
    except:
        val = 0
    return val
    

In [None]:
exact_mass = [safe_convert_float(x) for x in exact_mass]
ri = [safe_convert_float(x) for x in ri]

In [None]:
exact_mass_idx = list(numpy.nonzero(exact_mass)[0])
ri_idx = list(numpy.nonzero(ri)[0])
indices = numpy.intersect1d(exact_mass_idx, ri_idx)

In [None]:
x = numpy.take(exact_mass, indices)
y = numpy.take(ri, indices)


In [None]:
x_demean = x - numpy.mean(x)
y_demean = y - numpy.mean(y)

In [None]:
x_max = numpy.max(numpy.abs(x_demean))
y_max = numpy.max(numpy.abs(y_demean))

In [None]:
x_normalized = x_demean / max(x_demean)
y_normalized = y_demean / max(y_demean)

In [None]:
x_train = tf.data.Dataset.from_tensor_slices(x_normalized).shuffle(2048).batch(64)
y_train = tf.data.Dataset.from_tensor_slices(y_normalized).shuffle(2048).batch(64)
ds = tf.data.Dataset.zip((x_train, y_train))


In [None]:
inputs = tf.keras.Input(shape=(1,))
act0 = tf.keras.activations.relu(inputs, threshold = -1)
dense0 = tf.keras.layers.Dense(1000, kernel_initializer='normal')(act0)
dense1 = tf.keras.layers.Dense(2000, activation=tf.nn.relu, kernel_initializer='normal')(dense0)
dense2 = tf.keras.layers.Dense(2000, activation=tf.nn.relu, kernel_initializer='normal')(dense1)
dense3 = tf.keras.layers.Dense(1000, activation=tf.nn.relu, kernel_initializer='normal')(dense2)
dense4 = tf.keras.layers.Dense(1000, activation=tf.nn.relu, kernel_initializer='normal')(dense3)
outputs = tf.keras.layers.Dense(1, activation=tf.nn.tanh, kernel_initializer='normal')(dense4)
model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model.compile(
    loss=tf.keras.losses.MeanAbsoluteError(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"],
)

In [None]:
history = model.fit(ds, epochs=5)

In [None]:
history = model.fit(
    x=x_normalized,
    y=y_normalized,
    batch_size=64,
    epochs=10,
    verbose="auto",
    validation_split=0.2,
    validation_data=None,
    shuffle=True,
    class_weight=None,
    sample_weight=None,
    initial_epoch=0,
    steps_per_epoch=None,
    validation_freq=1,
    max_queue_size=10,
    workers=8,
    use_multiprocessing=True,
)

In [None]:
print(history)