# FRACTURE DETECTION

### PROJECT: https://github.com/iamndlovu/trauma_series_detector

### Import all the Dependencies

In [2]:
import tensorflow as tf
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt
from IPython.display import HTML

### Set all the Constants

In [3]:
BATCH_SIZE = 32
IMAGE_SIZE = 256
CHANNELS=3
EPOCHS=60

### Import data into tensorflow dataset object

### We will use image_dataset_from_directory api to load all images in tensorflow dataset: https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image_dataset_from_directory

In [4]:
from struct import unpack
from tqdm import tqdm
import os


marker_mapping = {
    0xffd8: "Start of Image",
    0xffe0: "Application Default Header",
    0xffdb: "Quantization Table",
    0xffc0: "Start of Frame",
    0xffc4: "Define Huffman Table",
    0xffda: "Start of Scan",
    0xffd9: "End of Image"
}


class JPEG:
    def __init__(self, image_file):
        with open(image_file, 'rb') as f:
            self.img_data = f.read()
    
    def decode(self):
        data = self.img_data
        while(True):
            marker, = unpack(">H", data[0:2])
            # print(marker_mapping.get(marker))
            if marker == 0xffd8:
                data = data[2:]
            elif marker == 0xffd9:
                return
            elif marker == 0xffda:
                data = data[-2:]
            else:
                lenchunk, = unpack(">H", data[2:4])
                data = data[2+lenchunk:]            Epoch 1/60
119/119 [==============================] - 29s 245ms/step - loss: 0.8141 - accuracy: 0.6478 - val_loss: 0.7950 - val_accuracy: 0.6339
Epoch 2/60
119/119 [==============================] - 29s 244ms/step - loss: 0.7659 - accuracy: 0.6788 - val_loss: 0.8341 - val_accuracy: 0.6205
Epoch 3/60
119/119 [==============================] - 29s 247ms/step - loss: 0.7442 - accuracy: 0.6822 - val_loss: 0.6618 - val_accuracy: 0.7210
Epoch 4/60
119/119 [==============================] - 29s 240ms/step - loss: 0.7260 - accuracy: 0.6972 - val_loss: 0.8722 - val_accuracy: 0.6652
Epoch 5/60
119/119 [==============================] - 29s 247ms/step - loss: 0.7136 - accuracy: 0.7009 - val_loss: 0.6899 - val_accuracy: 0.7589
Epoch 6/60
119/119 [==============================] - 29s 245ms/step - loss: 0.6730 - accuracy: 0.7214 - val_loss: 0.7622 - val_accuracy: 0.7188
Epoch 7/60
119/119 [==============================] - 29s 243ms/step - loss: 0.6755 - accuracy: 0.7198 - val_loss: 0.6175 - val_accuracy: 0.7679
Epoch 8/60
119/119 [==============================] - 29s 245ms/step - loss: 0.6473 - accuracy: 0.7256 - val_loss: 0.6501 - val_accuracy: 0.7612
Epoch 9/60
119/119 [==============================] - 29s 246ms/step - loss: 0.6208 - accuracy: 0.7518 - val_loss: 0.5585 - val_accuracy: 0.7701
Epoch 10/60
119/119 [==============================] - 29s 244ms/step - loss: 0.6156 - accuracy: 0.7377 - val_loss: 0.7614 - val_accuracy: 0.6942
Epoch 11/60
119/119 [==============================] - 29s 247ms/step - loss: 0.5975 - accuracy: 0.7537 - val_loss: 0.6583 - val_accuracy: 0.7634
Epoch 12/60
119/119 [==============================] - 29s 241ms/step - loss: 0.5855 - accuracy: 0.7566 - val_loss: 0.5837 - val_accuracy: 0.7500
Epoch 13/60
119/119 [==============================] - 30s 249ms/step - loss: 0.5618 - accuracy: 0.7658 - val_loss: 0.6750 - val_accuracy: 0.7478
Epoch 14/60
119/119 [==============================] - 29s 246ms/step - loss: 0.5751 - accuracy: 0.7644 - val_loss: 0.5311 - val_accuracy: 0.7679
Epoch 15/60
119/119 [==============================] - 29s 245ms/step - loss: 0.5580 - accuracy: 0.7755 - val_loss: 0.5165 - val_accuracy: 0.7835
Epoch 16/60
119/119 [==============================] - 30s 255ms/step - loss: 0.5255 - accuracy: 0.7847 - val_loss: 0.5180 - val_accuracy: 0.7991
Epoch 17/60
119/119 [==============================] - 29s 245ms/step - loss: 0.5387 - accuracy: 0.7776 - val_loss: 0.5256 - val_accuracy: 0.7879
Epoch 18/60
119/119 [==============================] - 29s 244ms/step - loss: 0.5039 - accuracy: 0.7946 - val_loss: 0.7164 - val_accuracy: 0.7567
Epoch 19/60
119/119 [==============================] - 29s 246ms/step - loss: 0.4988 - accuracy: 0.7965 - val_loss: 0.8025 - val_accuracy: 0.7388
Epoch 20/60
119/119 [==============================] - 30s 249ms/step - loss: 0.4942 - accuracy: 0.7999 - val_loss: 0.5322 - val_accuracy: 0.8147
Epoch 21/60
119/119 [==============================] - 30s 254ms/step - loss: 0.4888 - accuracy: 0.8030 - val_loss: 0.4536 - val_accuracy: 0.8080
Epoch 22/60
119/119 [==============================] - 29s 247ms/step - loss: 0.4916 - accuracy: 0.7962 - val_loss: 0.4809 - val_accuracy: 0.8192
Epoch 23/60
119/119 [==============================] - 30s 249ms/step - loss: 0.4603 - accuracy: 0.8099 - val_loss: 0.5151 - val_accuracy: 0.7946
Epoch 24/60
119/119 [==============================] - 29s 246ms/step - loss: 0.4567 - accuracy: 0.8099 - val_loss: 0.6040 - val_accuracy: 0.7879
Epoch 25/60
119/119 [==============================] - 29s 245ms/step - loss: 0.4694 - accuracy: 0.8028 - val_loss: 0.5190 - val_accuracy: 0.8192
Epoch 26/60
119/119 [==============================] - 29s 248ms/step - loss: 0.4609 - accuracy: 0.8093 - val_loss: 0.5211 - val_accuracy: 0.7768
Epoch 27/60
119/119 [==============================] - 29s 245ms/step - loss: 0.4706 - accuracy: 0.8067 - val_loss: 0.6202 - val_accuracy: 0.7634
Epoch 28/60
119/119 [==============================] - 29s 245ms/step - loss: 0.4328 - accuracy: 0.8283 - val_loss: 0.7511 - val_accuracy: 0.7768
Epoch 29/60
119/119 [==============================] - 29s 243ms/step - loss: 0.4292 - accuracy: 0.8314 - val_loss: 0.6164 - val_accuracy: 0.7790
Epoch 30/60
119/119 [==============================] - 29s 243ms/step - loss: 0.4316 - accuracy: 0.8306 - val_loss: 0.5768 - val_accuracy: 0.7879
Epoch 31/60
119/119 [==============================] - 29s 244ms/step - loss: 0.4215 - accuracy: 0.8248 - val_loss: 0.6536 - val_accuracy: 0.7522
Epoch 32/60
119/119 [==============================] - 29s 243ms/step - loss: 0.4383 - accuracy: 0.8204 - val_loss: 0.5611 - val_accuracy: 0.7991
Epoch 33/60
119/119 [==============================] - 30s 248ms/step - loss: 0.4186 - accuracy: 0.8311 - val_loss: 0.4469 - val_accuracy: 0.8281
Epoch 34/60
119/119 [==============================] - 29s 241ms/step - loss: 0.4224 - accuracy: 0.8311 - val_loss: 0.6275 - val_accuracy: 0.7812
Epoch 35/60
119/119 [==============================] - 29s 244ms/step - loss: 0.4087 - accuracy: 0.8306 - val_loss: 0.5234 - val_accuracy: 0.7835
Epoch 36/60
119/119 [==============================] - 29s 246ms/step - loss: 0.4058 - accuracy: 0.8327 - val_loss: 0.5370 - val_accuracy: 0.7634
Epoch 37/60
119/119 [==============================] - 29s 243ms/step - loss: 0.4024 - accuracy: 0.8380 - val_loss: 0.3788 - val_accuracy: 0.8549
Epoch 38/60
119/119 [==============================] - 29s 245ms/step - loss: 0.4145 - accuracy: 0.8343 - val_loss: 0.3504 - val_accuracy: 0.8661
Epoch 39/60
119/119 [==============================] - 30s 249ms/step - loss: 0.3968 - accuracy: 0.8438 - val_loss: 0.4662 - val_accuracy: 0.8438
Epoch 40/60
119/119 [==============================] - 29s 246ms/step - loss: 0.3896 - accuracy: 0.8419 - val_loss: 0.4496 - val_accuracy: 0.8147
Epoch 41/60
119/119 [==============================] - 30s 248ms/step - loss: 0.3695 - accuracy: 0.8495 - val_loss: 0.3662 - val_accuracy: 0.8661
Epoch 42/60
119/119 [==============================] - 29s 245ms/step - loss: 0.3680 - accuracy: 0.8529 - val_loss: 0.6423 - val_accuracy: 0.7902
Epoch 43/60
119/119 [==============================] - 29s 244ms/step - loss: 0.3614 - accuracy: 0.8571 - val_loss: 0.5981 - val_accuracy: 0.7991
Epoch 44/60
119/119 [==============================] - 29s 245ms/step - loss: 0.3774 - accuracy: 0.8558 - val_loss: 0.5863 - val_accuracy: 0.7746
Epoch 45/60
119/119 [==============================] - 29s 245ms/step - loss: 0.3863 - accuracy: 0.8480 - val_loss: 0.7150 - val_accuracy: 0.7612
Epoch 46/60
119/119 [==============================] - 29s 243ms/step - loss: 0.3712 - accuracy: 0.8524 - val_loss: 0.5173 - val_accuracy: 0.8125
Epoch 47/60
119/119 [==============================] - 29s 241ms/step - loss: 0.3663 - accuracy: 0.8569 - val_loss: 0.6095 - val_accuracy: 0.7946
Epoch 48/60
            if len(data)==0:
                raise TypeError("issue reading jpeg file")        


bads = []
# img_dir = '../input/chestpelviscspinescans'

# !cp -r ../input/chestpelviscspinescans ./

img_dir = './imgs'

for dirName, subdirList, fileList in os.walk(img_dir):
#     imagesList = fileList
#     print(subdirList)
    for subdir in subdirList:
        subdirPath = os.path.join(dirName,subdir)
#         print(subdirPath)
        imagesList = os.listdir(subdirPath)
#         print(imgList)
        for img in tqdm(imagesList):
#             print(img)
            imagePath = os.path.join(subdirPath,img)
#             print(imagePath)
            image = JPEG(imagePath) 
            try:
                image.decode()   
            except:
                bads.append(imagePath)


for imagePath in bads:
#     print(imagePaths)
    os.remove(imagePath)

In [5]:
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    "./imgs",
    seed=123,
    shuffle=True,
    image_size=(IMAGE_SIZE,IMAGE_SIZE),
    batch_size=BATCH_SIZE
)

**Watch video below on tensorflow input pipeline first if you don't know about tensorflow datasets**

In [6]:
HTML("""
<iframe width="560" height="315" src="https://www.youtube.com/embed/VFEOskzhhbc" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
""")

In [7]:
class_names = dataset.class_names
class_names

In [8]:
for image_batch, labels_batch in dataset.take(1):
    print(image_batch.shape)
    print(labels_batch.numpy())

### Visualize some of the images from our dataset

In [9]:
plt.figure(figsize=(10, 10))
for image_batch, labels_batch in dataset.take(1):
    for i in range(12):
        ax = plt.subplot(3, 4, i + 1)
        plt.imshow(image_batch[i].numpy().astype("uint8"))
        plt.title(class_names[labels_batch[i]])
        plt.axis("off")

### Function to Split Dataset

Dataset should be bifurcated into 3 subsets, namely:
1. Training: Dataset to be used while training
2. Validation: Dataset to be tested against while training
3. Test: Dataset to be tested against after we trained a model

In [10]:
def get_dataset_partitions_tf(ds, train_split=0.8, val_split=0.1, test_split=0.1, shuffle=True, shuffle_size=10000):
    assert (train_split + test_split + val_split) == 1
    
    ds_size = len(ds)
    
    if shuffle:
        ds = ds.shuffle(shuffle_size, seed=12)
    
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)
    
    train_ds = ds.take(train_size)    
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)
    
    return train_ds, val_ds, test_ds

In [11]:
train_ds, val_ds, test_ds = get_dataset_partitions_tf(dataset)

In [12]:
len(train_ds)

In [13]:
len(val_ds)

In [14]:
len(test_ds)

### Cache, Shuffle, and Prefetch the Dataset

In [15]:
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.cache().shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.cache().shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)

## Building the Model

### Creating a Layer for Resizing and Normalization
Before we feed our images to network, we should be resizing it to the desired size. 
Moreover, to improve model performance, we should normalize the image pixel value (keeping them in range 0 and 1 by dividing by 256).
This should happen while training as well as inference. Hence we can add that as a layer in our Sequential Model.

You might be thinking why do we need to resize (256,256) image to again (256,256). You are right we don't need to but this will be useful when we are done with the training and start using the model for predictions. At that time somone can supply an image that is not (256,256) and this layer will resize it

In [16]:
resize_and_rescale = tf.keras.Sequential([
  layers.experimental.preprocessing.Resizing(IMAGE_SIZE, IMAGE_SIZE),
  layers.experimental.preprocessing.Rescaling(1./255),
])

### Data Augmentation
Data Augmentation is needed when we have less data, this boosts the accuracy of our model by augmenting the data.

In [17]:
data_augmentation = tf.keras.Sequential([
  layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
  layers.experimental.preprocessing.RandomRotation(0.2),
])

#### Applying Data Augmentation to Train Dataset

In [18]:
train_ds = train_ds.map(
    lambda x, y: (data_augmentation(x, training=True), y)
).prefetch(buffer_size=tf.data.AUTOTUNE)

**Watch below video if you are not familiar with data augmentation**

In [19]:
HTML("""
<iframe width="560" height="315" src="https://www.youtube.com/embed/mTVf7BN7S8w" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
""")

### Model Architecture
We use a CNN coupled with a Softmax activation in the output layer. We also add the initial layers for resizing, normalization and Data Augmentation.

**We are going to use convolutional neural network (CNN) here. CNN is popular for image classification tasks. Watch below video to understand fundamentals of CNN**

In [20]:
HTML("""
<iframe width="560" height="315" src="https://www.youtube.com/embed/zfiSAzpy9NM" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
""")

In [21]:
input_shape = (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS)
n_classes = len(class_names)

model = models.Sequential([
    resize_and_rescale,
    layers.Conv2D(32, kernel_size = (3,3), activation='relu', input_shape=input_shape),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64,  kernel_size = (3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64,  kernel_size = (3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(n_classes, activation='softmax'),
])

model.build(input_shape=input_shape)

In [22]:
model.summary()

### Compiling the Model
We use `adam` Optimizer, `SparseCategoricalCrossentropy` for losses, `accuracy` as a metric

In [23]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

In [None]:
history = model.fit(
    train_ds,
    batch_size=BATCH_SIZE,
    validation_data=val_ds,
    verbose=1,
    epochs=EPOCHS,
)

In [None]:
scores = model.evaluate(test_ds)

### Plotting the Accuracy and Loss Curves

In [None]:
history

In [None]:
type(history.history['loss'])

In [None]:
history.history['loss'][:5] # show loss for first 5 epochs

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(range(EPOCHS), acc, label='Training Accuracy')
plt.plot(range(EPOCHS), val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(range(EPOCHS), loss, label='Training Loss')
plt.plot(range(EPOCHS), val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

### Run prediction on a sample image

In [None]:
import numpy as np
for images_batch, labels_batch in test_ds.take(1):
    
    first_image = images_batch[0].numpy().astype('uint8')
    first_label = labels_batch[0].numpy()
    
    print("first image to predict")
    plt.imshow(first_image)
    print("actual label:",class_names[first_label])
    
    batch_prediction = model.predict(images_batch)
    print("predicted label:",class_names[np.argmax(batch_prediction[0])])

### Write a function for inference

In [None]:
def predict(model, img):
    img_array = tf.keras.preprocessing.image.img_to_array(images[i].numpy())
    img_array = tf.expand_dims(img_array, 0)

    predictions = model.predict(img_array)

    predicted_class = class_names[np.argmax(predictions[0])]
    confidence = round(100 * (np.max(predictions[0])), 2)
    return predicted_class, confidence

**Now run inference on few sample images**

In [None]:
plt.figure(figsize=(15, 15))
for images, labels in test_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        
        predicted_class, confidence = predict(model, images[i].numpy())
        actual_class = class_names[labels[i]] 
        
        plt.title(f"Actual: {actual_class},\n Predicted: {predicted_class}.\n Confidence: {confidence}%")
        
        plt.axis("off")

### Saving the Model
We append the model to the list of models as a new version

In [None]:
model.save("./models/trauma_series_model.h5")