Load dependencies

In [1]:
import numpy as np
import pandas as pd
import os
import sys
import json
import tensorflow as tf
import NN_Predictor

In [2]:
from pathlib import Path
path = Path.cwd()
sys.path.append(path)
file_path = os.path.join(path.parent,'files', 'NN')
output_path = os.path.join(path.parent,'files', 'custom scores')
rxn_ids = np.load(file_path+'/rxn_ids.npy').astype('str')

#Tensorflow complains because the NN is not optimized perfectly
tf.get_logger().setLevel('ERROR')

## Introduction

The purpose of this notebook is to show some examples of using the NN

## Making predictions using the NN

Load in your favourite Neural Network

In [3]:
NN = NN_Predictor.load_NN(path=file_path+'/NN_full.h5')

2022-01-20 16:24:33.114324: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags.
2022-01-20 16:24:33.144216: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3192695000 Hz
2022-01-20 16:24:33.145568: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55975952cff0 executing computations on platform Host. Devices:
2022-01-20 16:24:33.145603: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): Host, Default Version
2022-01-20 16:24:33.149296: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Load in the data, the current NN requires a binary list indicating presences of reactions, important is that this is in the right order (maybe something that I can solve).
First lets try it with a single reaction set

In [4]:
input_data = np.load(file_path+'/example_binary.npy')

This example already has the right order so no worries, we can immediately make our prediction

In [5]:
prediction = NN_Predictor.make_prediction(input_data,NN)

We can then save this as an dictionary (.json) so we know which prediction corresponds to which reaction

In [6]:
pdic = dict(zip(rxn_ids,prediction.astype('float')))
p_file = open(output_path+"/prediction_example.json", 'w')
json.dump(pdic, p_file)
p_file.close()

In [7]:
pdic

{'rxn00708_c': 0.760888397693634,
 'rxn03236_c': 0.9464927315711975,
 'rxn00611_c': 0.15143105387687683,
 'rxn07435_c': 0.9705636501312256,
 'rxn05746_c': 0.21239891648292542,
 'rxn01678_c': 0.947871208190918,
 'rxn00289_c': 0.7557255625724792,
 'rxn02201_c': 0.12500518560409546,
 'rxn05561_c': 0.11122691631317139,
 'rxn02286_c': 0.28739410638809204,
 'rxn00394_c': 0.20634570717811584,
 'rxn07433_c': 0.33714747428894043,
 'rxn07431_c': 0.9566936492919922,
 'rxn05567_c': 0.8707337379455566,
 'rxn01265_c': 0.22565758228302002,
 'rxn00836_c': 0.9200906157493591,
 'rxn08977_c': 0.8182903528213501,
 'rxn00642_c': 0.6981823444366455,
 'rxn00874_c': 0.2900041341781616,
 'rxn00834_c': 0.29496458172798157,
 'rxn05527_c': 0.9285405278205872,
 'rxn08180_c': 0.15021759271621704,
 'rxn01269_c': 0.8876037001609802,
 'rxn00872_c': 0.46971264481544495,
 'rxn05563_c': 0.7852461338043213,
 'rxn05318_c': 0.751314640045166,
 'rxn00782_c': 0.565354585647583,
 'rxn00392_c': 0.8438130617141724,
 'rxn00747_c'

What if we only have a list of reaction ids? (following part can maybe be integrated in make prediction function, this was a bit of a random idea but it seems to work)

In [8]:
#creates a random list of 500 reactions
rand_reaction_set = np.random.choice(rxn_ids, 500, replace=False)

#converts this to the right format
b_list = NN_Predictor.convert_reaction_list(rand_reaction_set)


# Multiple predictions

We can also make predictions for multiple models at the same time, starting with a reaction presence dataframe where the rows are the different reactions and the columns are the model ids:

In [9]:
#multiple binary reaction lists (csv)
input_path = file_path+'/Sample_reaction_presence.csv'
df = pd.read_csv(input_path, index_col=0)
model_ids = df.columns
rxn_ids = df.index
input_data= df.to_numpy().T

we can then create a prediction with the same order of reaction and model ids

In [10]:
#   load network and make prediction
prediction = NN_Predictor.make_prediction(input_data,NN)
df_p = pd.DataFrame(index=rxn_ids, columns=model_ids, data=prediction)
df_p.to_csv(output_path+'/Multiple_NN_predictions.csv')

In [11]:
df_p

Unnamed: 0,573074.3,1038927.4,2136177.3,693444.3,1267768.4,1952948.3,1952881.3,1163752.6,84724.3
rxn00836_c,9.998161e-01,9.999973e-01,0.884641,9.999725e-01,0.769475,0.998747,0.918376,0.000431,0.996692
rxn02201_c,9.994148e-01,1.000000e+00,0.996596,9.999999e-01,0.996973,0.999912,0.981015,0.006763,0.997870
rxn09126_c,1.192093e-07,4.738569e-06,0.000000,1.430511e-06,0.000000,0.000133,0.000010,0.000009,0.000000
rxn05440_c,9.721518e-05,9.992779e-01,0.999786,9.996909e-01,0.999981,0.999955,0.999986,0.001043,1.000000
rxn05156_c,7.726848e-04,5.960464e-08,0.000486,4.172325e-07,0.000071,0.000036,0.000097,0.000031,0.000044
...,...,...,...,...,...,...,...,...,...
rxn03395_c,6.884336e-06,9.999667e-01,0.999933,9.999970e-01,0.999990,0.000002,0.002735,0.005030,0.000239
rxn00330_c,9.318737e-01,9.961246e-01,0.953479,9.990464e-01,0.948009,0.000918,0.092615,0.000207,0.971920
rxn05147_c,1.303196e-03,1.788139e-07,0.002296,5.364418e-07,0.000022,0.000664,0.000491,0.000112,0.000124
rxn03445_c,2.600658e-02,9.999993e-01,0.999630,9.999993e-01,0.999614,0.991813,0.999343,0.126049,0.999765
