In [1]:
import sys
import os
sys.path.append('..')
from lib.path import get_training_data_dir, get_predicted_labeled_water_path, get_displaceable_water_path, get_non_displaceable_water_path
from lib.pdb import filter_atoms_and_create_new_pdb
from data_loader.DataLoader import DataLoader
from models.LeNet import LeNet
from lib.helper import get_latest_checkpoint
from tensorflow.keras.losses import BinaryCrossentropy
from custom_losses.dice import dice_loss, dice_coefficient
from tensorflow.keras.metrics import Recall, Precision
import numpy as np

2024-04-28 16:47:54.331031: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-28 16:47:54.331479: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-28 16:47:54.334610: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-28 16:47:54.372359: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
DATA_TYPE = 'gr'
DATA_VOXEL_NUM = 10
CLASSIFYING_RULE = 'WaterClassifyingRuleSurface'
LIGAND_POCKET_DEFINER = 'LigandPocketDefinerOriginal'
LIGAND_VOXEL_NUM = 8
MODEL_NAME = 'LeNet'
training_data_dir = get_training_data_dir(DATA_TYPE, DATA_VOXEL_NUM, CLASSIFYING_RULE, LIGAND_POCKET_DEFINER, LIGAND_VOXEL_NUM)

data_loader = DataLoader(training_data_dir)
test_data_displaceable, dis_water_ids = data_loader.get_test_data_and_water_ids('4lkk', os.path.join(training_data_dir, 'displaceable/'))
test_data_non_displaceable, non_dis_water_ids = data_loader.get_test_data_and_water_ids('4lkk', os.path.join(training_data_dir, 'non_displaceable/'))

In [3]:
dis_water_ids

array([ 656,  420,  711,  129,  788,  941,  561,  666,  188, 1069,  216,
        667,  229,  131,   34])

In [5]:
input_shape = (DATA_VOXEL_NUM*2+1, DATA_VOXEL_NUM*2+1, DATA_VOXEL_NUM*2+1, 1)
n_base = 16
learning_rate = 1e-5
metrics = ['accuracy', dice_coefficient, Recall(), Precision()]
BN = True

2024-04-22 09:18:54.950659: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [6]:
checkpoint_dir = f'../checkpoints/{DATA_TYPE}/data_voxel_num_{DATA_VOXEL_NUM}/{LIGAND_POCKET_DEFINER}/ligand_pocket_voxel_num_{LIGAND_VOXEL_NUM}/{CLASSIFYING_RULE}/{MODEL_NAME}/'
latest_checkpoint = get_latest_checkpoint(checkpoint_dir)
model = LeNet(n_base, input_shape, learning_rate, BinaryCrossentropy(), metrics, BN=BN)
model.load_weights(latest_checkpoint)


  super().__init__(


  trackable.load_own_variables(weights_store.get(inner_path))


In [10]:
prediction_displaceable = model.predict(test_data_displaceable)
prediction_values_displaceable = prediction_displaceable.reshape(prediction_displaceable.shape[0])
predicted_label_dis = prediction_displaceable.round()

prediction_non_displaceable = model.predict(test_data_non_displaceable)
prediction_values_non_displaceable = prediction_non_displaceable.reshape(prediction_non_displaceable.shape[0])
predicted_label_non_dis = prediction_non_displaceable.round()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step


In [11]:
indices_of_TP = np.where(predicted_label_dis == 1)[0] # np.where returns 2D indices
indices_of_FN = np.where(predicted_label_dis == 0)[0]

indices_of_FP = np.where(predicted_label_non_dis == 1)[0] # np.where returns 2D indices
indices_of_TN = np.where(predicted_label_non_dis == 0)[0]

In [12]:
water_ids_TP = dis_water_ids[indices_of_TP]
water_ids_FN = dis_water_ids[indices_of_FN]
water_ids_FP = non_dis_water_ids[indices_of_FP]
water_ids_TN = non_dis_water_ids[indices_of_TN]

In [13]:
prediction_values_TP = prediction_values_displaceable[indices_of_TP]
prediction_values_FN = prediction_values_displaceable[indices_of_FN]
prediction_values_FP = prediction_values_non_displaceable[indices_of_FP]
prediction_values_TN = prediction_values_non_displaceable[indices_of_TN]

In [17]:
water_id2prediction_value_TP = {water_id : prediction_value for water_id, prediction_value in zip(water_ids_TP, prediction_values_TP)}
water_id2prediction_value_FN = {water_id : prediction_value for water_id, prediction_value in zip(water_ids_FN, prediction_values_FN)}

water_id2prediction_value_FP = {water_id : prediction_value for water_id, prediction_value in zip(water_ids_FP, prediction_values_FP)}
water_id2prediction_value_TN = {water_id : prediction_value for water_id, prediction_value in zip(water_ids_TN, prediction_values_TN)}

print("TP: ", water_id2prediction_value_TP)
print("FN: ", water_id2prediction_value_FN)
print("FP: ", water_id2prediction_value_FP)
print("TN: ", water_id2prediction_value_TN)

TP:  {941: 0.6366372, 666: 0.7736607, 1069: 0.6957607, 216: 0.80631757, 667: 0.82130253, 131: 0.5225806}
FN:  {656: 0.30708146, 420: 0.13442233, 711: 0.47150803, 129: 0.29293793, 788: 0.17954832, 561: 0.34224707, 188: 0.24387303, 229: 0.27798364, 34: 0.2891343}
FP:  {550: 0.6481192, 671: 0.5033118, 668: 0.5660274}
TN:  {682: 0.30184636, 708: 0.25542265, 491: 0.24988605, 458: 0.3332024, 81: 0.15289299, 1028: 0.32673353, 559: 0.4017425, 881: 0.32944325, 78: 0.3838709, 95: 0.28970626, 706: 0.46545634, 372: 0.36406153, 48: 0.17018513}


In [15]:
output_path = get_predicted_labeled_water_path(DATA_TYPE,
                                 '4lkk',
                                  DATA_VOXEL_NUM,
                                  LIGAND_VOXEL_NUM,
                                  CLASSIFYING_RULE,
                                  LIGAND_POCKET_DEFINER,
                                  MODEL_NAME,
                                  "TP")
input_path = get_displaceable_water_path('4lkk', LIGAND_VOXEL_NUM, CLASSIFYING_RULE, LIGAND_POCKET_DEFINER)