In [60]:
import tensorflow
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
import scipy
import dicom
import numpy as np
import os

In [98]:
# dimensions of our images.
img_width, img_height = 64, 64

pathTrainNeg = "./fibrosis_patches_8/train/norm"
pathTrainPos = "./fibrosis_patches_8/train/path"
pathTestPos = "./fibrosis_patches_8/test/path"
pathTestNeg = "./fibrosis_patches_8/test/norm"
nb_train_samples = 10000
nb_validation_samples = 2000
epochs = 50
batch_size = 16

In [18]:
# kind of preprocessing and normalizing dicom images
def load_and_normalize_dicom(path):
    dicom1 = dicom.read_file(path)
    dicom_img = dicom1.pixel_array.astype(np.float64)
    mn = dicom_img.min()
    mx = dicom_img.max()
    if (mx - mn) != 0:
        dicom_img = (dicom_img - mn)/(mx - mn)
    else:
        dicom_img[:, :] = 0
    return dicom_img

In [65]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [69]:
if K.image_data_format() == 'channels_first':
    input_shape = (1, img_width, img_height)
else:
    input_shape = (img_width, img_height, 1)

In [112]:
model = Sequential()

model.add(Conv2D(16, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
#model.add(Dropout(0.25))

model.add(Conv2D(16, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
#model.add(Dropout(0.25))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [95]:
lstFilesTrainNeg = []
lstFilesTrainPos = []
lstFilesTestNeg = []
lstFilesTestPos = []
for dirName, subdirList, fileList in os.walk(pathTrainNeg):
    for filename in fileList:
        if ".dcm" in filename.lower():
            lstFilesTrainNeg.append(os.path.join(dirName,filename))
for dirName, subdirList, fileList in os.walk(pathTrainPos):
    for filename in fileList:
        if ".dcm" in filename.lower():
            lstFilesTrainPos.append(os.path.join(dirName,filename))
for dirName, subdirList, fileList in os.walk(pathTestPos):
    for filename in fileList:
        if ".dcm" in filename.lower():
            lstFilesTestPos.append(os.path.join(dirName,filename))
for dirName, subdirList, fileList in os.walk(pathTestNeg):
    for filename in fileList:
        if ".dcm" in filename.lower():
            lstFilesTestNeg.append(os.path.join(dirName,filename))

In [99]:
# reading the dataset
X_train=[]
y_train=[]
X_test=[]
y_test=[]

for idx,img in enumerate(lstFilesTrainNeg):
    X_train.append(load_and_normalize_dicom(img))
    y_train.append(0)
    if idx>nb_train_samples:
        break

for idx,img in enumerate(lstFilesTrainPos):
    X_train.append(load_and_normalize_dicom(img))
    y_train.append(1)
    if idx>nb_train_samples:
        break

(X_train, y_train) = unison_shuffled_copies(np.asarray(X_train), np.asarray(y_train))
X_train = np.asanyarray(X_train)
X_train = X_train.reshape(X_train.shape[0], img_width, img_height,1)
for idx,img in enumerate(lstFilesTestNeg):
    X_test.append(load_and_normalize_dicom(img))
    y_test.append(0)
    if idx>nb_validation_samples:
        break
for idx,img in enumerate(lstFilesTestPos):
    X_test.append(load_and_normalize_dicom(img))
    y_test.append(1)
    if idx>nb_validation_samples:
        break
X_test = np.asanyarray(X_test)
X_test = X_test.reshape(X_test.shape[0], img_width, img_height,1)

In [113]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,  validation_data=(X_test, y_test), shuffle=False)

Train on 19736 samples, validate on 4004 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
 3488/19736 [====>.........................] - ETA: 64s - loss: 0.1898 - acc: 0.9289

KeyboardInterrupt: 

Train on 19736 samples, validate on 4004 samples
Epoch 1/50
19736/19736 [==============================] - 81s - loss: 0.6475 - acc: 0.6190 - val_loss: 0.5621 - val_acc: 0.7373
Epoch 2/50
19736/19736 [==============================] - 80s - loss: 0.5807 - acc: 0.6891 - val_loss: 0.5320 - val_acc: 0.7110
Epoch 3/50
19736/19736 [==============================] - 82s - loss: 0.5337 - acc: 0.7239 - val_loss: 0.5859 - val_acc: 0.6496
Epoch 4/50
19736/19736 [==============================] - 80s - loss: 0.4795 - acc: 0.7614 - val_loss: 0.7152 - val_acc: 0.5937

In [118]:
# some paragraphs about augumentation
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from PIL import Image
dest_path = os.path.dirname(os.path.realpath("./")) + "/augum_data/"
datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')
x = load_and_normalize_dicom('fibrosis_patches_8/train/norm/1_1.dcm')
x = x.reshape((1,)+x.shape+(1,))
# the .flow() command below generates batches of randomly transformed images
# and saves the results to the `preview/` directory
i = 0
for batch in datagen.flow(x, batch_size=1,
                          save_to_dir=dest_path, save_prefix='aug', save_format='jpeg'):
    i += 1
    if i > 20:
        break  # otherwise the generator would loop indefinitely

FileNotFoundError: [Errno 2] No such file or directory: '/Users/ilyas/Documents/Учеба/KAMI/augum_data/aug_0_3155.jpeg'