<a href="https://colab.research.google.com/github/dmunger27/dental-xray-segmentation/blob/main/Model_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install cloudpathlib
!pip install boto3
!pip install tensorflow
!pip install pillow
!pip install opencv-python
!pip install focal_loss

In [3]:
import os
import shutil
import pandas as pd
import numpy as np
from cloudpathlib import CloudPath
from cloudpathlib import S3Client
from cloudpathlib import S3Path
import matplotlib.pyplot as plt
import boto3
import glob
import PIL
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import load_img, img_to_array, array_to_img
from focal_loss import BinaryFocalLoss
from sklearn.model_selection import train_test_split


In [5]:
# Set environment variables
key_data = pd.read_csv('rootkey.csv')
#os.environ['AWS_ACCESS_KEY'] = ''
#os.environ['AWS_SECRET_KEY'] = ''

In [6]:
# Create files in local storage for x-ray data

def gatherS3Data(s3_path, folder_name):
  s3_client = S3Client(aws_access_key_id=os.getenv('AWS_ACCESS_KEY'), aws_secret_access_key=os.getenv('AWS_SECRET_KEY'))
  x_rays_images = s3_client.CloudPath(s3_path)
  x_rays_images.download_to(folder_name)

gatherS3Data('s3://ads-599-capstone-data/ads-599-team7/cleaned_abnormalities/', 'abnormality_masks')
gatherS3Data('s3://ads-599-capstone-data/ads-599-team7/cleaned_original/', 'original_images')
gatherS3Data('s3://ads-599-capstone-data/ads-599-team7/cleaned_teeth/', 'teeth_masks')

In [7]:
# Load and prepare the data
img_size = (256, 256)
def load_prep(directory, dim, dtype, color_mode):
  data = sorted(glob.glob(directory + '/*'))
  num_imgs = len(data)
  img_size = (256, 256)
  model_imgs = np.zeros((num_imgs,) + img_size + (dim,), dtype=dtype)
  for i in range(num_imgs):
    if color_mode=='grayscale':
      model_imgs[i] = img_to_array(load_img(data[i], color_mode = color_mode, target_size=img_size)) > 100
      model_imgs[i] = model_imgs[i].astype('uint8')
    else:
      model_imgs[i] = img_to_array(load_img(data[i], color_mode = color_mode, target_size=img_size))
  return model_imgs

original = load_prep('original_images', 3, 'float32', 'rgb')
teeth = load_prep('teeth_masks', 1, 'uint8', 'grayscale')
abnormality = load_prep('abnormality_masks', 1, 'uint8', 'grayscale')

In [8]:
# Filter abnormalities to include only visible masks
def subset_abnormalities(ab_list):
  filtered = []
  for i in range(len(ab_list)):
    if np.sum(ab_list[i]) > 0:
      filtered.append(i)
  return filtered

subset_list = subset_abnormalities(abnormality)
ab_subset = [abnormality[i] for i in subset_list]
orig_subset = [original[i] for i in subset_list]
num_imgs = len(subset_list)
img_size = (256, 256)
ab_subset_imgs = np.zeros((num_imgs,) + img_size + (1,), dtype='uint8')
orig_subset_imgs = np.zeros((num_imgs,) + img_size + (3,), dtype='float32')
for i in range(num_imgs):
  ab_subset_imgs[i] = ab_subset[i]
  orig_subset_imgs[i] = orig_subset[i]

In [9]:
# Train-test-valid split of data
def train_valid_test(array_name):
  train, test = train_test_split(array_name, test_size=0.1, random_state=27)
  # Train test split again to obtain validation set
  train, val = train_test_split(train, test_size=0.1, random_state=27)
  return train, val, test


train_orig, val_orig, test_orig = train_valid_test(original)
train_teeth, val_teeth, test_teeth = train_valid_test(teeth)
train_ab, val_ab, test_ab = train_valid_test(ab_subset_imgs)

# Subset train and valid
sub_train_orig, sub_val_orig, sub_test_orig = train_valid_test(orig_subset_imgs)

In [10]:
# Create up block function
def up_block(neurons, concat_layer, input):
  up = layers.Conv2DTranspose(neurons, (2,2), strides=(2,2), activation='relu', padding='same')(input)
  up = layers.concatenate([up, concat_layer])
  conv = layers.Conv2D(neurons, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(up)
  conv = layers.Conv2D(neurons, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(conv)
  return conv

In [14]:
#  Function for U-Net model
def unet_model(img_size):
  inputs = keras.Input(shape=img_size + (3,))
  x = layers.Rescaling(1./255)(inputs)
  # Base model
  base = keras.applications.MobileNetV2(
      include_top=False,
      weights="imagenet",
      input_shape=img_size + (3,),
      input_tensor=x
  )
  base.trainable = False
  # Extract decoder path
  down0 = base.get_layer('input_3').output
  down1 = base.get_layer('block_1_expand_relu').output
  down2 = base.get_layer('block_3_expand_relu').output
  down3 = base.get_layer('block_6_expand_relu').output
  x = base.get_layer('block_13_expand_relu').output 
  # Upsample path
  conv6 = up_block(256, down3, x)
  conv7 = up_block(128, down2, conv6)
  conv8 = up_block(64, down1, conv7)
  conv9 = up_block(32, down0, conv8)
  outputs = layers.Conv2D(1, 1, activation='sigmoid', padding='same')(conv9)

  model = keras.Model(inputs, outputs)
  return model

In [15]:
# Get model structure and print the summary
model = unet_model(img_size=img_size)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 256, 256, 3  0           []                               
                                )]                                                                
                                                                                                  
 rescaling_2 (Rescaling)        (None, 256, 256, 3)  0           ['input_3[0][0]']                
                                                                                                  
 Conv1 (Conv2D)                 (None, 128, 128, 32  864         ['rescaling_2[0][0]']            
                                )                                                                 
                                                                                              

In [None]:
train_ab[0]

In [24]:
# Compile and train the model for tooth detection
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt, loss=BinaryFocalLoss(gamma=2), metrics=['accuracy', keras.metrics.MeanIoU(num_classes=2)])
# Save best model
callback = [
    keras.callbacks.ModelCheckpoint("abnormality_segmentation.keras", 
                                    save_best_only=True)
]
class_weight = {0: 1.,
                1: 100.}
teeth_model = model.fit(sub_train_orig, train_ab,
                        epochs=20,
                        callbacks=callback,
                        batch_size=64,
                        validation_data=(sub_val_orig, val_ab),
                        class_weight=class_weight)

ValueError: ignored