# Data preprocessing

In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow.python.keras.engine import data_adapter
from tensorflow.keras import backend as K

In [2]:
train_csv_directory = '../hdadataset2021/Bone Age Datasets/Bone Age Training Set/'
train_directory = '../hdadataset2021/Bone Age Datasets/Bone Age Training Set/boneage-training-dataset/'
df_train = pd.read_csv(train_csv_directory + 'train.csv')

val_csv_directory = '../hdadataset2021/'
val_directory = '../hdadataset2021/boneage-validation-dataset/'
df_val = pd.read_csv(val_csv_directory + 'Validation Dataset.csv')

In [3]:
ids = df_train['id'].values.astype(str)
labels = df_train['male'].values.astype(float)
ages = df_train['boneage'].values.astype(float)
train_ds = tf.data.Dataset.from_tensor_slices((ids, labels, ages))


ids = df_val['Image ID'].values.astype(str)
labels = df_val['male'].values.astype(float)
ages = df_val['Bone Age (months)'].values.astype(float)
val_ds = tf.data.Dataset.from_tensor_slices((ids, labels, ages))

IMG_SIZE = (300, 300, 1)
BATCH_SIZE = 16

In [4]:
def read_train_image(image_file, label, age):
    image = tf.io.read_file(train_directory + image_file+ '.png')
    image = tf.io.decode_png(image, channels=1)
    return image, [label], [age] #lists are necessary for training the model (otherwise dims are unknown)

def read_val_image(image_file, label, age):
    image = tf.io.read_file(val_directory + image_file + '.png')
    image = tf.io.decode_png(image, channels=1)
    return image, [label], [age] #lists are necessary for training the model (otherwise dims are unknown)

resize_layer = tf.keras.layers.experimental.preprocessing.Resizing(height=IMG_SIZE[0], width=IMG_SIZE[1])
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1/255.)
zoom_layer = tf.keras.layers.experimental.preprocessing.RandomZoom(height_factor=(0.2,-0.3),
                                                                   width_factor=(0.2,-0.3),
                                                                  fill_mode='constant')
flip_layer = tf.keras.layers.experimental.preprocessing.RandomFlip(mode='horizontal')

def brightness_function(image):
    image = tf.image.random_brightness(image, max_delta=0.3)
    image = tf.math.maximum(image, 0.)
    image = tf.math.minimum(image, 1.)
    return image

def augment(image, label, age):
    # data augmentation here
    image = zoom_layer(image)
    image = flip_layer(image)
    image = brightness_function(image)
    return image, label, age

In [5]:
AUTOTUNE = tf.data.experimental.AUTOTUNE #tf.data.AUTOTUNE
train_ds = train_ds.map(read_train_image).map(lambda image,label,age: (normalization_layer((resize_layer(image))), label, age)).cache('./train_cache_300')
train_ds = train_ds.shuffle(buffer_size=1000).batch(BATCH_SIZE).map(augment,num_parallel_calls=AUTOTUNE)
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)

val_ds = val_ds.map(read_val_image).map(lambda image,label,age: (normalization_layer(resize_layer(image)), label, age)).cache('./val_cache_300')
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE).batch(BATCH_SIZE)

In [6]:
#image = list(val_ds.take(1).as_numpy_iterator())[0][0][1]

In [7]:
%matplotlib inline
#import matplotlib.pyplot as plt
#_ = plt.imshow(image)

# Inception Model

In [8]:
class BoneAgeInception(tf.keras.Model):  
    def __init__(self, gender=True):
        super(BoneAgeInception, self).__init__()
        
        model = tf.keras.applications.InceptionV3(
                                include_top=False,
                                weights=None,
                                input_shape=IMG_SIZE)
        
        
        
        #dense_model = DenseNet()
        
        #last_conv=relu_final_blk
        #for layer in dense_model.model.layers:
            #global_average
            #if layer.name.startswith('global_max'):
                #model_global_max = layer.output
                
        
        if gender:
            initializer = tf.keras.initializers.GlorotNormal()
            gender_input = tf.keras.Input(shape=(1))
            gender_output = tf.keras.layers.Dense(100, activation='relu', kernel_initializer=initializer)(gender_input)
            
            model_global_avg = tf.keras.layers.GlobalAveragePooling2D()(model.output)
            
            concat_output = tf.keras.layers.Concatenate(name='gender_and_feat._concat')([model_global_avg, gender_output])
            
            #dense_output = tf.keras.layers.Dense(2000,activation='relu', kernel_regularizer='l2', kernel_initializer=initializer)(concat_output)
            
            overall_output = tf.keras.layers.Dense(1,activation='relu', kernel_regularizer='l2', kernel_initializer=initializer)(concat_output)
            
            self.model = tf.keras.Model(inputs=[model.input, gender_input], outputs = overall_output)
        else:
            pass #COMPLETE!
                
        
        
    def call(self, x, training=False):
        imgs, labels = x
        
        y = self.model([imgs, labels],training)
       
        return y

    def train_step(self, data):
        
        data = data_adapter.expand_1d(data)
        imgs, labels, ages = data
        
        with tf.GradientTape() as tape:
            y_pred = self((imgs, labels), training=True)  # Forward pass

            loss = self.compiled_loss(ages, y_pred)

        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        self.compiled_metrics.update_state(ages, y_pred)
        _dict = {m.name: m.result() for m in self.metrics}
        
        return _dict

    def test_step(self, data):
        
        data = data_adapter.expand_1d(data)
        imgs, labels, ages = data

        y_pred = self((imgs, labels), training=False)  # Forward pass

        loss = self.compiled_loss(ages, y_pred)

        self.compiled_metrics.update_state(ages, y_pred)
        _dict = {m.name: m.result() for m in self.metrics}
        
        return _dict
    
    def summary(self):
        print(self.model.summary())

In [9]:
bonageinception = BoneAgeInception()
ckpt_callback = tf.keras.callbacks.ModelCheckpoint(
                filepath='./ckpts/inception/best_val_inception_100ep.h5', monitor='val_mae', verbose=1, save_best_only=True,
                save_weights_only=True, mode='min')

def scheduler(epoch, lr):
    if epoch < 20:
        return lr
    elif epoch < 40:
        return 1e-3
    elif epoch < 60:
        return 1e-4
    else:
        return 1e-5

lr_scheduler_callback = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1)

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
metric = tf.keras.metrics.MeanAbsoluteError()
bonageinception.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

bonageinception.fit(train_ds, validation_data=val_ds,callbacks = [ckpt_callback,lr_scheduler_callback],
                    epochs=100, verbose=2)


Epoch 00001: LearningRateScheduler reducing learning rate to 0.009999999776482582.
Epoch 1/100


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.


Epoch 00001: val_mae improved from inf to 37.20489, saving model to ./ckpts/inception/best_val_inception_100ep.h5
789/789 - 109s - loss: 1644.1091 - mae: 31.6735 - val_loss: 2010.2118 - val_mae: 37.2049

Epoch 00002: LearningRateScheduler reducing learning rate to 0.009999999776482582.
Epoch 2/100

Epoch 00002: val_mae improved from 37.20489 to 32.90630, saving model to ./ckpts/inception/best_val_inception_100ep.h5
789/789 - 108s - loss: 1568.6165 - mae: 31.5525 - val_loss: 1888.4045 - val_mae: 32.9063

Epoch 00003: LearningRateScheduler reducing learning rate to 0.009999999776482582.
Epoch 3/10

### test

In [9]:
test_directory = '../hdadataset2021/Bone Age Datasets/Bone Age Test Set/Test Set Images/'

df_test = pd.read_csv('./test.csv')

ids = df_test['id'].values.astype(str)
labels = (df_test['Sex'].values == 'M').astype(float)

# replace ',' with '.' and convert to float
ages_str = df_test['boneage'].values
ages = np.empty_like(ages_str)
for i, item in enumerate(ages_str):
    replaced = item.replace(',','.')
    replaced_float = float(replaced)
    ages[i] = replaced_float
ages = ages.astype(float)

test_ds = tf.data.Dataset.from_tensor_slices((ids, labels, ages))

def read_test_image(image_file, label, age):
    image = tf.io.read_file(test_directory + image_file + '.png')
    image = tf.io.decode_png(image, channels=1)
    return image, [label], [age] #lists are necessary for training the model (otherwise dims are unknown)

test_ds = test_ds.map(read_test_image).map(lambda image,label,age: (normalization_layer(resize_layer(image)), label, age)).cache()
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

test_iterator = test_ds.as_numpy_iterator()
N = 200 #number of samples
test_images = np.empty(shape=(N, IMG_SIZE[0], IMG_SIZE[1], 1), dtype=np.float32)
test_labels = np.empty(shape=(N,), dtype=np.float32)
test_ages = np.empty(shape=(N,), dtype=np.float32)
for i, item in enumerate(test_iterator):
    test_images[i] = item[0]
    test_labels[i] = item[1]
    test_ages[i] = item[2]
    
print(test_images.shape)
print(test_labels.shape)
print(test_ages.shape)

(200, 300, 300, 1)
(200,)
(200,)


In [12]:
bonageinception.load_weights('./ckpts/inception/best_val_inception_100ep.h5')

In [13]:
bonageinception_pred = bonageinception.predict((test_images, test_labels))

In [14]:
np.mean(np.abs(bonageinception_pred.squeeze() - test_ages.squeeze()))

7.2804284

In [None]:
bonageinception.summary()