# With rgb images

### Load data

In [12]:
import pickle

train_filename = "C:/Users/behl/Desktop/lung disease/train_data_sample_rgb.p"
(train_labels, train_data, train_tensors) = pickle.load(open(train_filename, mode='rb'))

valid_filename = "C:/Users/behl/Desktop/lung disease/valid_data_sample_rgb.p"
(valid_labels, valid_data, valid_tensors) = pickle.load(open(valid_filename, mode='rb'))

test_filename = "C:/Users/behl/Desktop/lung disease/test_data_sample_rgb.p"
(test_labels, test_data, test_tensors) = pickle.load(open(test_filename, mode='rb'))

### CNN model

In [13]:
import time

from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dropout, Flatten, Dense
from tensorflow.keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras import regularizers, initializers, optimizers

model = Sequential()

model.add(Conv2D(filters=16, 
                 kernel_size=7,
                 padding='same', 
                 activation='relu', 
                 input_shape=train_tensors.shape[1:]))
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=32, 
                 kernel_size=5,
                 padding='same', 
                 activation='relu'))
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=64, 
                 kernel_size=5,
                 padding='same', 
                 activation='relu'))
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=128, 
                 kernel_size=5,
                 strides=2,
                 padding='same', 
                 activation='relu'))
model.add(MaxPooling2D(pool_size=2))

model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 64, 64, 16)        2368      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 32, 32, 16)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 32, 32, 32)        12832     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 16, 16, 64)        51264     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 8, 8, 64)          0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 4, 4, 128)        

In [14]:
from keras import backend as K

def binary_accuracy(y_true, y_pred):
    return K.mean(K.equal(y_true, K.round(y_pred)))

def precision_threshold(threshold = 0.5):
    def precision(y_true, y_pred):
        threshold_value = threshold
        y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), threshold_value), K.floatx())
        true_positives = K.round(K.sum(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(y_pred)
        precision_ratio = true_positives / (predicted_positives + K.epsilon())
        return precision_ratio
    return precision

def recall_threshold(threshold = 0.5):
    def recall(y_true, y_pred):
        threshold_value = threshold
        y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), threshold_value), K.floatx())
        true_positives = K.round(K.sum(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.clip(y_true, 0, 1))
        recall_ratio = true_positives / (possible_positives + K.epsilon())
        return recall_ratio
    return recall

def fbeta_score_threshold(beta = 1, threshold = 0.5):
    def fbeta_score(y_true, y_pred):
        threshold_value = threshold
        beta_value = beta
        p = precision_threshold(threshold_value)(y_true, y_pred)
        r = recall_threshold(threshold_value)(y_true, y_pred)
        bb = beta_value ** 2
        fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
        return fbeta_score
    return fbeta_score

In [15]:
model.compile(optimizer='sgd', loss='binary_crossentropy', 
              metrics=[precision_threshold(threshold = 0.5), 
                       recall_threshold(threshold = 0.5), 
                       fbeta_score_threshold(beta=0.5, threshold = 0.5),
                      'accuracy'])

In [16]:
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, EarlyStopping
import numpy as np

epochs = 20
batch_size = 32

earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=1, mode='auto')
log = CSVLogger('C:/Users/behl/Desktop/lung disease/log_bCNN_rgb.csv')
checkpointer = ModelCheckpoint(filepath='C:/Users/behl/Desktop/lung disease/bCNN.best.from_scratch.hdf5', 
                               verbose=1, save_best_only=True)

start = time.time()

model.fit(train_tensors, train_labels, 
          validation_data=(valid_tensors, valid_labels),
          epochs=epochs, batch_size=batch_size, callbacks=[checkpointer, log, earlystop], verbose=1)

# Show total training time
print("training time: %.2f minutes"%((time.time()-start)/60))

Train on 3400 samples, validate on 1100 samples
Epoch 1/20
Epoch 00001: val_loss improved from inf to 0.68607, saving model to C:/Users/behl/Desktop/lung disease/bCNN.best.from_scratch.hdf5
Epoch 2/20
Epoch 00002: val_loss improved from 0.68607 to 0.68453, saving model to C:/Users/behl/Desktop/lung disease/bCNN.best.from_scratch.hdf5
Epoch 3/20
Epoch 00003: val_loss improved from 0.68453 to 0.68240, saving model to C:/Users/behl/Desktop/lung disease/bCNN.best.from_scratch.hdf5
Epoch 4/20
Epoch 00004: val_loss improved from 0.68240 to 0.68048, saving model to C:/Users/behl/Desktop/lung disease/bCNN.best.from_scratch.hdf5
Epoch 5/20
Epoch 00005: val_loss did not improve from 0.68048
Epoch 6/20
Epoch 00006: val_loss improved from 0.68048 to 0.66701, saving model to C:/Users/behl/Desktop/lung disease/bCNN.best.from_scratch.hdf5
Epoch 7/20
Epoch 00007: val_loss improved from 0.66701 to 0.66497, saving model to C:/Users/behl/Desktop/lung disease/bCNN.best.from_scratch.hdf5
Epoch 8/20
Epoch 0

### Metric

In [17]:
model.load_weights('C:/Users/behl/Desktop/lung disease/bCNN.best.from_scratch.hdf5')
prediction = model.predict(test_tensors)
print("prediction : ",prediction)

prediction :  [[0.35952947]
 [0.48853812]
 [0.35816407]
 [0.5495918 ]
 [0.4896742 ]
 [0.3756183 ]
 [0.33958054]
 [0.41460478]
 [0.39357018]
 [0.62492836]
 [0.41811556]
 [0.42484617]
 [0.66258085]
 [0.431732  ]
 [0.34676644]
 [0.45423985]
 [0.5434803 ]
 [0.4915854 ]
 [0.28965187]
 [0.26826322]
 [0.59350455]
 [0.31966802]
 [0.3396483 ]
 [0.6227741 ]
 [0.3854418 ]
 [0.32744223]
 [0.3828693 ]
 [0.60828936]
 [0.42556056]
 [0.4118018 ]
 [0.47780445]
 [0.38084736]
 [0.41738263]
 [0.4094715 ]
 [0.5622003 ]
 [0.49149832]
 [0.4980952 ]
 [0.35620612]
 [0.4367329 ]
 [0.629926  ]
 [0.56741977]
 [0.47670692]
 [0.34167254]
 [0.57914835]
 [0.52061766]
 [0.48598462]
 [0.28888804]
 [0.34921688]
 [0.40357167]
 [0.6367646 ]
 [0.42901433]
 [0.52885664]
 [0.40109435]
 [0.45560098]
 [0.2793401 ]
 [0.5718026 ]
 [0.49445602]
 [0.3300076 ]
 [0.33885825]
 [0.41859818]
 [0.43269372]
 [0.29652205]
 [0.5615706 ]
 [0.5477281 ]
 [0.60410756]
 [0.661713  ]
 [0.47408876]
 [0.38235903]
 [0.3300176 ]
 [0.4449988 ]
 [0.41

In [18]:
threshold = 0.5
beta = 0.5

pre = K.eval(precision_threshold(threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))
rec = K.eval(recall_threshold(threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))
fsc = K.eval(fbeta_score_threshold(beta = beta, threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))

print ("Precision: %f %%\nRecall: %f %%\nFscore: %f %%"% (pre, rec, fsc))

AttributeError: module 'tensorflow' has no attribute 'get_default_session'

In [9]:
K.eval(binary_accuracy(K.variable(value=test_labels),
                                   K.variable(value=prediction)))

0.50258511

In [10]:
prediction[:30]

array([[ 0.46597424],
       [ 0.57600302],
       [ 0.30108091],
       [ 0.50593966],
       [ 0.61561286],
       [ 0.6416322 ],
       [ 0.29955843],
       [ 0.30611175],
       [ 0.42266327],
       [ 0.40697429],
       [ 0.48799837],
       [ 0.34801716],
       [ 0.55648535],
       [ 0.52279401],
       [ 0.62823325],
       [ 0.35642451],
       [ 0.50304908],
       [ 0.42197177],
       [ 0.72991049],
       [ 0.50801474],
       [ 0.31001693],
       [ 0.49956188],
       [ 0.50922167],
       [ 0.47676209],
       [ 0.36952221],
       [ 0.3691574 ],
       [ 0.63229394],
       [ 0.49165967],
       [ 0.53164726],
       [ 0.54903966]], dtype=float32)

In [11]:
threshold = 0.4
beta = 0.5

pre = K.eval(precision_threshold(threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))
rec = K.eval(recall_threshold(threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))
fsc = K.eval(fbeta_score_threshold(beta = beta, threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))

print ("Precision: %f %%\nRecall: %f %%\nFscore: %f %%"% (pre, rec, fsc))

Precision: 0.543257 %
Recall: 0.816444 %
Fscore: 0.582220 %


In [12]:
threshold = 0.6
beta = 0.5

pre = K.eval(precision_threshold(threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))
rec = K.eval(recall_threshold(threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))
fsc = K.eval(fbeta_score_threshold(beta = beta, threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))

print ("Precision: %f %%\nRecall: %f %%\nFscore: %f %%"% (pre, rec, fsc))

Precision: 0.672000 %
Recall: 0.321224 %
Fscore: 0.551543 %


# With gray images

In [13]:
import pickle

train_filename = "data_preprocessed/train_data_sample_gray.p"
(train_labels, train_data, train_tensors) = pickle.load(open(train_filename, mode='rb'))

valid_filename = "data_preprocessed/valid_data_sample_gray.p"
(valid_labels, valid_data, valid_tensors) = pickle.load(open(valid_filename, mode='rb'))

test_filename = "data_preprocessed/test_data_sample_gray.p"
(test_labels, test_data, test_tensors) = pickle.load(open(test_filename, mode='rb'))

In [15]:
import time

from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dropout, Flatten, Dense
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras import regularizers, initializers, optimizers

model = Sequential()

model.add(Conv2D(filters=16, 
                 kernel_size=7,
                 padding='same', 
                 activation='relu', 
                 input_shape=train_tensors.shape[1:]))
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=32, 
                 kernel_size=5,
                 padding='same', 
                 activation='relu'))
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=64, 
                 kernel_size=5,
                 padding='same', 
                 activation='relu'))
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=128, 
                 kernel_size=5,
                 strides=2,
                 padding='same', 
                 activation='relu'))
model.add(MaxPooling2D(pool_size=2))

model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 64, 64, 16)        800       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 32, 32, 16)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 32, 32, 32)        12832     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 16, 16, 64)        51264     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 8, 8, 64)          0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 4, 4, 128)         204928    
__________

In [16]:
from keras import backend as K

def binary_accuracy(y_true, y_pred):
    return K.mean(K.equal(y_true, K.round(y_pred)))

def precision_threshold(threshold = 0.5):
    def precision(y_true, y_pred):
        threshold_value = threshold
        y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), threshold_value), K.floatx())
        true_positives = K.round(K.sum(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(y_pred)
        precision_ratio = true_positives / (predicted_positives + K.epsilon())
        return precision_ratio
    return precision

def recall_threshold(threshold = 0.5):
    def recall(y_true, y_pred):
        threshold_value = threshold
        y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), threshold_value), K.floatx())
        true_positives = K.round(K.sum(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.clip(y_true, 0, 1))
        recall_ratio = true_positives / (possible_positives + K.epsilon())
        return recall_ratio
    return recall

def fbeta_score_threshold(beta = 1, threshold = 0.5):
    def fbeta_score(y_true, y_pred):
        threshold_value = threshold
        beta_value = beta
        p = precision_threshold(threshold_value)(y_true, y_pred)
        r = recall_threshold(threshold_value)(y_true, y_pred)
        bb = beta_value ** 2
        fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
        return fbeta_score
    return fbeta_score

In [17]:
model.compile(optimizer='sgd', loss='binary_crossentropy', 
              metrics=[precision_threshold(threshold = 0.5), 
                       recall_threshold(threshold = 0.5), 
                       fbeta_score_threshold(beta=0.5, threshold = 0.5),
                      'accuracy'])

In [18]:
from keras.callbacks import ModelCheckpoint, CSVLogger, EarlyStopping
import numpy as np

epochs = 20
batch_size = 32

earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=1, mode='auto')
log = CSVLogger('saved_models/log_bCNN_gray.csv')
checkpointer = ModelCheckpoint(filepath='saved_models/bCNN_gray.best.from_scratch.hdf5', 
                               verbose=1, save_best_only=True)

start = time.time()

model.fit(train_tensors, train_labels, 
          validation_data=(valid_tensors, valid_labels),
          epochs=epochs, batch_size=batch_size, callbacks=[checkpointer, log, earlystop], verbose=1)

# Show total training time
print("training time: %.2f minutes"%((time.time()-start)/60))

Train on 3400 samples, validate on 1100 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 00012: early stopping
training time: 0.40 minutes


In [19]:
model.load_weights('saved_models/bCNN_gray.best.from_scratch.hdf5')
prediction = model.predict(test_tensors)

In [20]:
threshold = 0.5
beta = 0.5

pre = K.eval(precision_threshold(threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))
rec = K.eval(recall_threshold(threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))
fsc = K.eval(fbeta_score_threshold(beta = beta, threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))

print ("Precision: %f %%\nRecall: %f %%\nFscore: %f %%"% (pre, rec, fsc))

Precision: 0.577114 %
Recall: 0.480331 %
Fscore: 0.554758 %


In [21]:
K.eval(binary_accuracy(K.variable(value=test_labels),
                                   K.variable(value=prediction)))

0.51713276

In [22]:
prediction[:30]

array([[ 0.3081094 ],
       [ 0.24159142],
       [ 0.52262437],
       [ 0.59462857],
       [ 0.3100515 ],
       [ 0.62393486],
       [ 0.47555083],
       [ 0.48481095],
       [ 0.47963724],
       [ 0.46049529],
       [ 0.52123272],
       [ 0.38751996],
       [ 0.35624275],
       [ 0.53882909],
       [ 0.63341409],
       [ 0.47135681],
       [ 0.61958778],
       [ 0.42561847],
       [ 0.51211774],
       [ 0.29424879],
       [ 0.38310093],
       [ 0.28851342],
       [ 0.35126474],
       [ 0.65281165],
       [ 0.48659411],
       [ 0.43335259],
       [ 0.32977027],
       [ 0.65944982],
       [ 0.6016748 ],
       [ 0.62601507]], dtype=float32)

In [23]:
threshold = 0.4
beta = 0.5

pre = K.eval(precision_threshold(threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))
rec = K.eval(recall_threshold(threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))
fsc = K.eval(fbeta_score_threshold(beta = beta, threshold = threshold)(K.variable(value=test_labels),
                                   K.variable(value=prediction)))

print ("Precision: %f %%\nRecall: %f %%\nFscore: %f %%"% (pre, rec, fsc))

Precision: 0.523416 %
Recall: 0.786749 %
Fscore: 0.560968 %
