In [1]:
import pandas as pd
import numpy as np
import Preprocess5
import pydicom
import matplotlib.pyplot as plt
import cv2
from pympler import asizeof

In [2]:
path = '../../input/rsna-intracranial-hemorrhage-detection/'
preprocess = Preprocess5.Preprocess(path)

In [3]:
N = 500

In [4]:
path_dcmFiles = '../../input/rsna-intracranial-hemorrhage-detection/stage_1_train_images/*.dcm'
dcm_files = Preprocess5.Preprocess.read_dcm_files(path_dcmFiles)[:N]
len(dcm_files)
# type(dcm_files)

500

In [5]:
desired_size = (128, 128)

converted_training_images = []
# converted_training_dcmData = [] # This will be used for 
for file_name in dcm_files:
    dcm_data = pydicom.dcmread(file_name)
    img = preprocess.resizing(dcm_data, desired_size)
    converted_training_images.append(img)
#     converted_training_dcmData.append(temp[1])

In [6]:
len(converted_training_images)
print(f'{N} pixel arrays: {asizeof.asizeof(converted_training_images)//2**20} MB')
print(f'dtype: {converted_training_images[0].dtype}')

500 pixel arrays: 187 MB
dtype: float64


In [7]:
# fig=plt.figure(figsize=(15, 10))
# columns = 5; rows = 6
# for i in range(1, columns*rows +1):
#     fig.add_subplot(rows, columns, i) 
#     plt.imshow(converted_training_images[i], cmap=plt.cm.bone)



In [8]:
training_imgs = preprocess.transform_all_pixel_arrays(converted_training_images)
print(f'{N} normalized pixel arrays: {asizeof.asizeof(training_imgs)//2**20} MB')
print(f'dtype: {training_imgs[0].dtype}')
training_imgs = np.array(training_imgs)
print(f'training_imgs.shape: {training_imgs.shape}')

500 normalized pixel arrays: 187 MB
dtype: float64
training_imgs.shape: (500, 128, 128, 3)


## Finally reading in the labels for training

In [9]:
labels_all = preprocess.get_all_labels('stage_1_train.csv')
labels = labels_all[:6*N]
labels.head(12)

Unnamed: 0,ID,Label,Sub_type,PatientID
361025,ID_000039fa0_any,0,any,000039fa0
361020,ID_000039fa0_epidural,0,epidural,000039fa0
361021,ID_000039fa0_intraparenchymal,0,intraparenchymal,000039fa0
361022,ID_000039fa0_intraventricular,0,intraventricular,000039fa0
361023,ID_000039fa0_subarachnoid,0,subarachnoid,000039fa0
361024,ID_000039fa0_subdural,0,subdural,000039fa0
3626117,ID_00005679d_any,0,any,00005679d
3626112,ID_00005679d_epidural,0,epidural,00005679d
3626113,ID_00005679d_intraparenchymal,0,intraparenchymal,00005679d
3626114,ID_00005679d_intraventricular,0,intraventricular,00005679d


In [10]:
training_labels = labels['Label']
len(training_labels)

3000

In [11]:
training_labels = np.array(training_labels)
training_labels = preprocess.categorize(training_labels)
len(training_labels)

500

In [12]:
training_labels = np.array(training_labels) 

In [13]:
import tensorflow as tf

model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(128, 128, 3)),
  tf.keras.layers.MaxPooling2D(2, 2),
  tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(64, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 126, 126, 64)      1792      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 63, 63, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 61, 61, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 30, 30, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 57600)             0         
_________________________________________________________________
dense (Dense)                (None, 128)               7372928   
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8

In [14]:
model.fit(training_imgs, training_labels, epochs=2, validation_split=0.10,
          shuffle=True, batch_size=10)

Train on 450 samples, validate on 50 samples
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x1c3d06b7f0>

## Saving the model

In [15]:
model.save_weights('weights')

## Alternative version to save the model and the weights

In [16]:
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

Saved model to disk


## Creating new model with the same weights

In [17]:
model2 = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(128, 128, 3)),
  tf.keras.layers.MaxPooling2D(2, 2),
  tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(64, activation='softmax')
])
model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model2.summary()

model2.load_weights('weights')

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 126, 126, 64)      1792      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 63, 63, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 61, 61, 64)        36928     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 30, 30, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 57600)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               7372928   
_________________________________________________________________
dense_3 (Dense)              (None, 64)               

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1c3cde6dd8>

## Create new model from the Json and the H5 weights

In [18]:
import tensorflow as tf
# from tf.keras.models import model_from_json
# from keras.models import model_from_json

# tf.keras.models.model_from_json
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = tf.keras.models.model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")
loaded_model.summary()

Loaded model from disk
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 126, 126, 64)      1792      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 63, 63, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 61, 61, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 30, 30, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 57600)             0         
_________________________________________________________________
dense (Dense)                (None, 128)               7372928   
_________________________________________________________________
dense_1 (Dense)              (Non

## Let's test/evaluate on the next N images

In [19]:
dcm_files2 = Preprocess5.Preprocess.read_dcm_files(path_dcmFiles)[N:2*N]
len(dcm_files2)


500

In [20]:
desired_size = (128, 128)

converted_testing_images = []
# converted_training_dcmData = [] # This will be used for 
for file_name in dcm_files2:
    dcm_data = pydicom.dcmread(file_name)
    img = preprocess.resizing(dcm_data, desired_size)
    converted_testing_images.append(img)
#     converted_training_dcmData.append(temp[1])

In [21]:
len(converted_testing_images)
print(f'{N} pixel arrays: {asizeof.asizeof(converted_testing_images)//2**20} MB')
print(f'dtype: {converted_testing_images[0].dtype}')

500 pixel arrays: 187 MB
dtype: float64


In [22]:
testing_imgs = preprocess.transform_all_pixel_arrays(converted_testing_images)
print(f'{N} normalized pixel arrays: {asizeof.asizeof(testing_imgs)//2**20} MB')
print(f'dtype: {testing_imgs[0].dtype}')
testing_imgs = np.array(testing_imgs)
print(f'training_imgs.shape: {testing_imgs.shape}')

500 normalized pixel arrays: 187 MB
dtype: float64
training_imgs.shape: (500, 128, 128, 3)


In [23]:
labels2 = labels_all[6*N: 2*6*N]
labels2.head(12)

Unnamed: 0,ID,Label,Sub_type,PatientID
1870853,ID_00318f225_any,0,any,00318f225
1870848,ID_00318f225_epidural,0,epidural,00318f225
1870849,ID_00318f225_intraparenchymal,0,intraparenchymal,00318f225
1870850,ID_00318f225_intraventricular,0,intraventricular,00318f225
1870851,ID_00318f225_subarachnoid,0,subarachnoid,00318f225
1870852,ID_00318f225_subdural,0,subdural,00318f225
1738613,ID_0031b5cf8_any,1,any,0031b5cf8
1738608,ID_0031b5cf8_epidural,0,epidural,0031b5cf8
1738609,ID_0031b5cf8_intraparenchymal,1,intraparenchymal,0031b5cf8
1738610,ID_0031b5cf8_intraventricular,0,intraventricular,0031b5cf8


In [24]:
testing_labels = labels2['Label']
len(testing_labels)

3000

In [25]:
testing_labels = np.array(testing_labels)
testing_labels = preprocess.categorize(testing_labels)

testing_labels = np.array(testing_labels)
len(testing_labels)

500

In [26]:
test_loss, test_accuracy = model.evaluate(testing_imgs, testing_labels, verbose=0)
print(f'Test loss: {test_loss}, Test accuracy: {test_accuracy}')

Test loss: 0.6629268136024475, Test accuracy: 0.8740000128746033


## Prediction

In [27]:
predictions = model.predict(testing_imgs, verbose=0)
predictions[0]

array([8.37070882e-01, 1.13038674e-04, 1.45212776e-04, 8.00281018e-03,
       1.55167378e-04, 1.93863679e-02, 5.49856981e-04, 3.10031086e-04,
       1.25942708e-04, 1.28439385e-02, 4.27910039e-04, 3.68300622e-04,
       9.40275568e-05, 6.17166143e-03, 1.19708908e-04, 3.19614119e-05,
       1.68291619e-04, 2.92985421e-02, 1.92215943e-04, 7.61692238e-04,
       2.29988975e-04, 7.03549897e-03, 7.01124445e-05, 9.55161959e-05,
       3.41043313e-04, 5.14581567e-03, 8.19731562e-04, 3.33591779e-05,
       3.78512079e-04, 1.21367211e-03, 1.83366690e-04, 8.62542656e-04,
       1.04124592e-04, 4.06914800e-02, 1.20759221e-04, 3.92936025e-04,
       6.21730796e-05, 3.31179891e-03, 7.51925691e-05, 4.68610699e-04,
       1.51188040e-04, 4.02721763e-03, 1.07004489e-04, 3.44214961e-04,
       7.12275578e-05, 3.05827078e-03, 1.11504269e-04, 1.18191434e-04,
       2.61053909e-04, 5.87284006e-03, 5.97130856e-04, 7.54681387e-05,
       1.40968186e-04, 5.23125380e-03, 5.12547267e-05, 1.71776090e-04,
      

## Looping/Loading next N training images

In [None]:
def load_next_training_batch(batch_index=0):
    i = batch_index
    
    dcm_files2 = Preprocess5.Preprocess.read_dcm_files(path_dcmFiles)[i*N : (i+1)*N]
    len(dcm_files2)

    desired_size = (128, 128)

    converted_testing_images = []
    for file_name in dcm_files2:
        dcm_data = pydicom.dcmread(file_name)
        img = preprocess.resizing(dcm_data, desired_size)
        converted_testing_images.append(img)

    len(converted_testing_images)
    print(f'{N} pixel arrays: {asizeof.asizeof(converted_testing_images)//2**20} MB')
    print(f'dtype: {converted_testing_images[0].dtype}')

    testing_imgs = preprocess.transform_all_pixel_arrays(converted_testing_images)
    print(f'{N} normalized pixel arrays: {asizeof.asizeof(testing_imgs)//2**20} MB')
    print(f'dtype: {testing_imgs[0].dtype}')
    testing_imgs = np.array(testing_imgs)
    print(f'training_imgs.shape: {testing_imgs.shape}')

    labels2 = labels_all[6*N: 2*6*N]
    labels2.head(12)

    testing_labels = labels2['Label']
    len(testing_labels)

    testing_labels = np.array(testing_labels)
    testing_labels = preprocess.categorize(testing_labels)

    testing_labels = np.array(testing_labels)
    len(testing_labels)

    model.fit(training_imgs, training_labels, epochs=2, validation_split=0.10,
              shuffle=True, batch_size=10)

# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")



In [39]:
def load_next_training_batch(batch_index=0):
    i = batch_index
    
    dcm_files = Preprocess5.Preprocess.read_dcm_files(path_dcmFiles)[i*N : (i+1)*N]
    len(dcm_files)

    desired_size = (128, 128)

    converted_training_images = []
    for file_name in dcm_files:
        dcm_data = pydicom.dcmread(file_name)
        img = preprocess.resizing(dcm_data, desired_size)
        converted_training_images.append(img)

    len(converted_training_images)
    print(f'{N} pixel arrays: {asizeof.asizeof(converted_training_images)//2**20} MB')
    print(f'dtype: {converted_training_images[0].dtype}')

    training_images = preprocess.transform_all_pixel_arrays(converted_training_images)
    print(f'{N} normalized pixel arrays: {asizeof.asizeof(training_images)//2**20} MB')
    print(f'dtype: {training_images[0].dtype}')
    training_images = np.array(training_images)
    print(f'training_images.shape: {training_images.shape}')

    labels = labels_all[i*(6*N): (i+1)*(6*N)]
    labels.head(12)

    training_labels = labels['Label']
    len(training_labels)

    training_labels = np.array(training_labels)
    training_labels = preprocess.categorize(training_labels)

    training_labels = np.array(training_labels)
    len(training_labels)

    return training_images, training_labels

def train_model(model, training_images, training_labels):
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(training_images, training_labels, epochs=2, validation_split=0.10,
                shuffle=True, batch_size=10)
    return model

def save_model(model):
    # serialize model to JSON
    model_json = model.to_json()
    with open("model.json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights("model.h5")
    print("Saved model to disk")

def load_model_with_weights():
    import tensorflow as tf
    json_file = open('model.json', 'r')
    model = json_file.read()
    json_file.close()
    model = tf.keras.models.model_from_json(loaded_model_json)
    # load weights into new model
    model.load_weights("model.h5")
    print("Loaded model from disk")
#     model.summary()

    return model

In [40]:
N = 100
for i in range(3):
    training_images, training_labels = load_next_training_batch(i)
    model = train_model(model, training_images, training_labels)
    save_model(model)
    model = load_model_with_weights()


100 pixel arrays: 37 MB
dtype: float64
100 normalized pixel arrays: 37 MB
dtype: float64
training_images.shape: (100, 128, 128, 3)
Train on 90 samples, validate on 10 samples
Epoch 1/2
Epoch 2/2
Saved model to disk
Loaded model from disk
100 pixel arrays: 37 MB
dtype: float64
100 normalized pixel arrays: 37 MB
dtype: float64
training_images.shape: (100, 128, 128, 3)
Train on 90 samples, validate on 10 samples
Epoch 1/2
Epoch 2/2
Saved model to disk
Loaded model from disk
100 pixel arrays: 37 MB
dtype: float64
100 normalized pixel arrays: 37 MB
dtype: float64
training_images.shape: (100, 128, 128, 3)
Train on 90 samples, validate on 10 samples
Epoch 1/2
Epoch 2/2
Saved model to disk
Loaded model from disk
