In [13]:
import os
import json
from data_loader.SingleDataLoader import SingleDataLoader
from data_loader.DoubleDataLoader import DoubleDataLoader
from models.LeNet import LeNet
from models.AlexNet import Alexnet
from trainer.train import train_func
from lib.path import get_training_data_dir
from custom_losses.dice import dice_loss, dice_coefficient
from tensorflow.keras.metrics import Recall, Precision
from tensorflow.keras.losses import BinaryFocalCrossentropy, BinaryCrossentropy

In [2]:
data_dir = '../data'
train_list = os.path.join(data_dir, 'train_list')
test_list = os.path.join(data_dir, 'test_list')
val_list = os.path.join(data_dir, 'val_list')

In [3]:
DATA_TYPE1 = 'gr'
DATA_TYPE2 = 'gist'
DATA_VOXEL_NUM = 10
CLASSIFYING_RULE = 'WaterClassifyingRuleSurface'
LIGAND_POCKET_DEFINER = 'LigandPocketDefinerOriginal'
LIGAND_VOXEL_NUM = 8

training_data_dir1 = get_training_data_dir(DATA_TYPE1, DATA_VOXEL_NUM, CLASSIFYING_RULE, LIGAND_POCKET_DEFINER, LIGAND_VOXEL_NUM)
training_data_dir2 = get_training_data_dir(DATA_TYPE2, DATA_VOXEL_NUM, CLASSIFYING_RULE, LIGAND_POCKET_DEFINER, LIGAND_VOXEL_NUM)

print(training_data_dir1, "\n", training_data_dir2)

/home/ito/research/data/training_data/gr/data_voxel_num_10/LigandPocketDefinerOriginal/ligand_pocket_voxel_num_8/WaterClassifyingRuleSurface/ 
 /home/ito/research/data/training_data/gist/data_voxel_num_10/LigandPocketDefinerOriginal/ligand_pocket_voxel_num_8/WaterClassifyingRuleSurface/


In [5]:
data_loader = SingleDataLoader(training_data_dir1)

In [4]:
# data_loader = DoubleDataLoader(training_data_dir1, training_data_dir2)

In [6]:
train_data, train_labels = data_loader.load_data(train_list)
test_data, test_labels = data_loader.load_data(test_list)
val_data, val_labels = data_loader.load_data(val_list)

NameError: name 'data_loader' is not defined

In [6]:
print('Train data shape: ', train_data.shape)
print('Train labels shape: ', train_labels.shape)
print('Test data shape: ', test_data.shape)
print('Test labels shape: ', test_labels.shape)
print('Val data shape: ', val_data.shape)
print('Val labels shape: ', val_labels.shape)

Train data shape:  (36927, 21, 21, 21, 1)
Train labels shape:  (36927,)
Test data shape:  (18287, 21, 21, 21, 1)
Test labels shape:  (18287,)
Val data shape:  (18877, 21, 21, 21, 1)
Val labels shape:  (18877,)


In [10]:
input_shape = (DATA_VOXEL_NUM*2+1, DATA_VOXEL_NUM*2+1, DATA_VOXEL_NUM*2+1, 1)
epochs = 10
batch_size = 64
n_base = 16
learning_rate = 1e-5
early_stopping = 300
BN = False
dropout = 0.4
model_func = LeNet
losses = [BinaryCrossentropy(), dice_loss]
loss= losses[0]
metrics = ['accuracy', dice_coefficient, Recall(), Precision()]
path_type = '/'.join(training_data_dir1.split('/')[6:11])
checkpoint_path = f"./checkpoints/{path_type}/LeNet/" + "cp-{epoch:04d}.weights.h5"
model_checkpoint = True

In [11]:
pos = train_labels.sum()
neg = train_labels.shape[0] - pos
total = train_labels.shape[0]

weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)
class_weight = {0: weight_for_0, 1: weight_for_1}
print(class_weight)

{0: 1.0427232168069125, 1: 0.960639958376691}


In [12]:
clf, clf_hist, clf_eval = train_func(
                                    x_train=train_data,
                                    y_train=train_labels,
                                    x_test=test_data,
                                    y_test=test_labels,
                                    x_val=val_data,
                                    y_val=val_labels,
                                    input_shape=input_shape,
                                    model_func=model_func,
                                    loss=loss,
                                    metrics=metrics,
                                    epochs=epochs,
                                    batch_size=batch_size,
                                    n_base=n_base,
                                    learning_rate=learning_rate,
                                    early_stopping=early_stopping,
                                    checkpoint_path=checkpoint_path,
                                    model_checkpoint=model_checkpoint,
                                    class_weight=class_weight,
                                    BN = BN,
                                    dropout=dropout
                                )

Epoch 1/10
[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 13ms/step - accuracy: 0.5149 - dice_coefficient: 0.5041 - loss: 0.7069 - precision_1: 0.5338 - recall_1: 0.4457 - val_accuracy: 0.5638 - val_dice_coefficient: 0.5169 - val_loss: 0.6845 - val_precision_1: 0.6044 - val_recall_1: 0.4609
Epoch 2/10
[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.5685 - dice_coefficient: 0.5262 - loss: 0.6795 - precision_1: 0.5876 - recall_1: 0.5576 - val_accuracy: 0.6078 - val_dice_coefficient: 0.5288 - val_loss: 0.6694 - val_precision_1: 0.6405 - val_recall_1: 0.5559
Epoch 3/10
[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.6066 - dice_coefficient: 0.5428 - loss: 0.6601 - precision_1: 0.6300 - recall_1: 0.5974 - val_accuracy: 0.6203 - val_dice_coefficient: 0.5323 - val_loss: 0.6603 - val_precision_1: 0.6681 - val_recall_1: 0.5329
Epoch 4/10
[1m577/577[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [20]:
from lib.helper import make_dir

history_save_path = f"./history/{path_type}/LeNet/training_history.json"
make_dir(history_save_path)
with open(history_save_path, 'w') as f:
    json.dump(clf_hist.history, f)

In [None]:
prediction = clf.predict(test_data)

[1m524/524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


In [11]:
prediction.round().sum()

2417.0