In [1]:
import numpy as np
import os
from dicompylercore import dicomparser
import cv2
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.utils import np_utils
from sklearn.utils import shuffle
from keras.preprocessing.image import ImageDataGenerator
from keras.models import model_from_json
import matplotlib.pyplot as plt
import math
import sys

Using TensorFlow backend.


In [2]:
def proportional_resize(image, max_side):
    if image.shape[0] > max_side or image.shape[1] > max_side:
        if image.shape[0] > image.shape[1]:
            height = max_side
            width = int(height / image.shape[0] * image.shape[1])
        else:
            width = max_side
            height = int(width / image.shape[1] * image.shape[0])
    else:
        height = image.shape[0]
        width = image.shape[1]
    return cv2.resize(image, (width, height))

In [3]:
# new = true - read dicom, new = false - read png
def prepare_dataset(path, new):
    d_set = []
    labels = []
    folder = path.split(os.sep)[-1]
    if new:
        for path, directories, files in os.walk(path):
            files = [file for file in files if not file[0] == "."]
            for file in files:
                f, extension = os.path.splitext(file)
                label = path.split(os.sep)[-1]
                if extension == ".dcm":
                    parsed = dicomparser.DicomParser(path + os.sep + file)
                    print(file)
                    image = np.array(parsed.GetImage(), dtype=np.uint8)
                    if parsed.GetImageData()["photometricinterpretation"] == "MONOCHROME1":
                        image = 255 - image
                    image = cv2.equalizeHist(image)
                    image = cv2.medianBlur(image, 3)
                    scaled_image = proportional_resize(image, 512)
                    lungs_image = lf.get_lungs(scaled_image, 7)
                    cv2.imwrite("lungs-set" + os.sep + folder + os.sep + label + os.sep + f + ".png", lungs_image)
                    lungs_image = cv2.resize(lungs_image, (128, 128))
                    if label == "norm":
                        d_set.append(lungs_image)
                        labels.append(1)
                    elif label == "pathology":
                        d_set.append(lungs_image)
                        labels.append(0)
                    else:
                        continue
    else:
        path = "lungs-set" + os.sep + folder
        for path, directories, files in os.walk(path):
            files = [file for file in files if not file[0] == "."]
            for file in files:
                _, extension = os.path.splitext(file)
                label = path.split(os.sep)[-1]
                parsed = cv2.imread(path + os.sep + file, 0)
                image = np.array(parsed, dtype=np.uint8)
                lungs_image = cv2.resize(image, (256, 256))
                if label == "norm":
                    d_set.append(lungs_image)
                    labels.append(1)
                elif label == "pathology":
                    d_set.append(lungs_image)
                    labels.append(0)
                else:
                    continue
    return np.array(d_set), np.array(labels)

In [4]:
# downloading the dataset
np.random.seed(42)
def_width = 256
def_height = 256
train = "/Volumes/DATA/KAMI/lung_dataset/train"
test = "/Volumes/DATA/KAMI/lung_dataset/test"
x_train, y_train = prepare_dataset(train, False)
x_test, y_test = prepare_dataset(test, False)
# optional to shuffle the dataset
#x_train, y_train = shuffle(x_train, y_train, random_state=0)
#x_test, y_test = shuffle(x_test, y_test, random_state=0)
x_train = x_train.astype(dtype="float32")
x_test = x_test.astype(dtype="float32")
x_train /= 255
x_test /= 255
x_train = x_train.reshape(x_train.shape[0], def_width, def_height, 1)
x_test = x_test.reshape(x_test.shape[0], def_width, def_height, 1)
y_train = np_utils.to_categorical(y_train, 2)
y_test = np_utils.to_categorical(y_test, 2)

In [5]:
# load json and create model
json_file = open('model_256.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("weights_256.h5")
# evaluate loaded model on test data
loaded_model.compile(loss="categorical_crossentropy", optimizer="SGD", metrics=["accuracy"])
score = loaded_model.evaluate(x_test, y_test, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

acc: 70.50%


In [7]:
predictions = loaded_model.predict(x_test)

In [34]:
def count_base_metrics(predictions, y_test):
    tp=0
    tn=0
    fp=0
    fn=0
    cond_pos = 0
    cond_neg = 0
    for idx, item in enumerate(predictions):
        if(round(item[0])==0 and y_test[idx][0]==0):
            tn+=1
        elif (round(item[0])==0 and y_test[idx][0]==1):
            fn+=1
        elif (round(item[0])==1 and y_test[idx][0]==0):
            fp+=1
        else:
            tp+=1
        if (y_test[idx][0]==0):
            cond_neg+=1
        else:
            cond_pos+=1
    return (tp,tn,fp,fn,cond_pos,cond_neg)

In [35]:
def count_metrics(tp,tn,fp,fn,cond_pos,cond_neg):
    TPR = tp/cond_pos
    TNR = tn/cond_neg
    FPR = fp/cond_neg
    FNR = fn/cond_pos
    return(TPR,TNR,FPR,FNR)

In [36]:
(tp,tn,fp,fn,cond_pos,cond_neg) = count_base_metrics(predictions,y_test)
(TPR,TNR,FPR,FNR) = count_metrics(tp,tn,fp,fn,cond_pos,cond_neg)

In [45]:
print("True Positive Rate: ",TPR)
print("True Negative Rate: ",TNR)
print("False Positive Rate: ",FPR)
print("False Negative Rate: ",FNR)

True Positive Rate:  0.875
True Negative Rate:  0.5327510917030568
False Positive Rate:  0.4672489082969432
False Negative Rate:  0.125
