# **I. Downloading dataset**


Install the Kaggle package.

In [None]:
! pip install -q kaggle

Upload the credentials in json format of your Kaggle account (enter your profile and "Create New API Token").

In [None]:
from google.colab import files
files.upload()

Move json file to ~/.kaggle folder.

In [None]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

Import dataset from Kaggle.

In [None]:
! kaggle datasets download -d kmader/rsna-bone-age

Mount to Google Drive and extract files at the selected directory.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
! unzip rsna-bone-age.zip -d "/content/drive/My Drive/Colab Notebooks/SSIP2021_v2"

# **II. Importing dataset**

Move to the selected directory.

In [None]:
%cd "/content/drive/MyDrive/Colab Notebooks/SSIP2021_v2"

Read the CSV data.

In [None]:
import pandas as pd

df_train = pd.read_csv("boneage-training-dataset.csv")

# **III. Data exploration**

Print the first 5 cases.

In [None]:
df_train.head(10)

Present descriptive statistics.

In [None]:
df_train.describe()

Count the number of females and males.

In [None]:
df_train.male.value_counts()

Plot age distribution and age distribution split by gender.

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.hist(df_train['boneage'], edgecolor='black', color='green', alpha=0.7)
plt.show()

df_male = df_train['boneage'].where(df_train['male'] == True)
plt.hist(df_male, edgecolor='black', color='blue', alpha=0.7)
plt.show()

df_female = df_train['boneage'].where(df_train['male'] == False)
plt.hist(df_female, edgecolor='black', color='red', alpha=0.7)
plt.show()

Print the number of training images.

In [None]:
import os
len(os.listdir("/content/drive/MyDrive/Colab Notebooks/SSIP2021_v2/boneage-training-dataset/boneage-training-dataset"))

Plot some selected images.

In [None]:
import cv2
from matplotlib import pyplot as plt

figsize = (5,5)
images = ["1425.png", "15555.png", "15003.png"]

for file_name in images:
  image = cv2.imread("/content/drive/MyDrive/Colab Notebooks/SSIP2021_v2/boneage-training-dataset/boneage-training-dataset/" + file_name)
  plt.figure(figsize=figsize)
  plt.imshow(image)
  plt.title(file_name)

# **IV. Data preprocessing**

Preprocessing of images with CLAHE.

In [None]:
import cv2
import os

DIR_PATH = "/content/drive/MyDrive/Colab Notebooks/SSIP2021_v2/boneage-training-dataset/boneage-training-dataset/"

NEW_DIR_PATH = "/content/drive/MyDrive/Colab Notebooks/SSIP2021_v2/boneage-training-dataset/boneage-training-dataset/HE/"

def clahe_equal(file_name):
  image = cv2.imread(DIR_PATH + file_name)
  lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
  lab_planes = cv2.split(lab)
  clahe = cv2.createCLAHE(clipLimit=2.0,tileGridSize=(16,16))
  #lab_planes[0] = clahe.apply(lab_planes[0])
  lab = cv2.merge(lab_planes)
  final_image = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
  cv2.imwrite(NEW_DIR_PATH + file_name, final_image)

for file_name in os.listdir(DIR_PATH):
  clahe_equal(file_name)

In [None]:
DIR_PATH = "/content/drive/MyDrive/Colab Notebooks/SSIP2021_v2/boneage-test-dataset/boneage-test-dataset"

NEW_DIR_PATH = "/content/drive/MyDrive/Colab Notebooks/SSIP2021_v5/boneage-test-dataset/boneage-test-dataset/"

def clahe_equal(file_name):
  image = cv2.imread(DIR_PATH + file_name)
  lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
  lab_planes = cv2.split(lab)
  clahe = cv2.createCLAHE(clipLimit=2.0,tileGridSize=(16,16))
  lab_planes[0] = clahe.apply(lab_planes[0])
  lab = cv2.merge(lab_planes)
  final_image = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
  cv2.imwrite(NEW_DIR_PATH + file_name, final_image)

for file_name in os.listdir(DIR_PATH):
  clahe_equal(file_name)

Feature engineering.

In [None]:
df_test['id'] = df_test['Case ID']
del(df_test['Case ID'])

In [None]:
df_train['sex'] = df_train['male'].apply(lambda x: 0 if x else 1)
del(df_train['male'])

df_test['sex'] = df_test['Sex']
df_test['sex'] = df_test['sex'].apply(lambda x: 0.0 if x else 1.0)
del(df_test['Sex'])

In [None]:
df_train['id'] = df_train['id'].apply(lambda x: str(x) + '.png')
df_test['id'] = df_test['id'].apply(lambda x: str(x) +'.png')

# **V. Transfer learning**

In [None]:
import tensorflow.keras as K
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input, decode_predictions
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
image_size = 256
batch_size = 8
epochs = 10

In [None]:
input_t = K.Input(shape=(image_size, image_size, 3))
base_model = VGG16(include_top=False, weights="imagenet", input_tensor=input_t)
for l in base_model.layers:
  l.trainable = True

In [None]:
last_layer = base_model.output
m1 = K.layers.Flatten()(last_layer)
output = K.layers.Dropout(0.3)(m1)
output = K.layers.Dense(128, activation='relu')(output)
output = K.layers.Dense(1, activation='linear')(output)
model_only_regression = K.Model(inputs=input_t, outputs=output)
model_only_regression.summary()

In [None]:
import tensorflow as tf

base_model = tf.keras.applications.MobileNet(weights='imagenet', include_top=False, input_tensor=input_t)

output = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
output = tf.keras.layers.Dense(1, activation='linear')(output)

model_only_regression = tf.keras.Model(inputs=base_model.input, outputs=output)

model_only_regression.summary()

In [None]:
data_gen=ImageDataGenerator(
  height_shift_range=0.2,
  width_shift_range=0.2,
  horizontal_flip=True,
  vertical_flip=False,
  preprocessing_function=preprocess_input,
  zoom_range=0.2,
  validation_split=0.20
)

data_gen_test=ImageDataGenerator(
  preprocessing_function=preprocess_input,
)

train_generator=data_gen.flow_from_dataframe(
  dataframe=df_train,
  directory="/content/drive/MyDrive/Colab Notebooks/SSIP2021/boneage-training-dataset/boneage-training-dataset/",
  x_col="id",
  y_col="boneage",
  label_mode=None,
  subset="training",
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  target_size=(image_size,image_size),
  class_mode='raw',
  color_mode='rgb',
)

validation_generator=data_gen.flow_from_dataframe(
  dataframe=df_train,
  directory="/content/drive/MyDrive/Colab Notebooks/SSIP2021_v2/boneage-training-dataset/boneage-training-dataset",
  x_col="id",
  y_col="boneage",
  label_mode=None,
  subset="validation",
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  target_size=(image_size,image_size),
  class_mode='raw',
  color_mode='rgb'
)

In [None]:
def define_model_parameters(model):
  model.compile(loss='mse', optimizer='adam', metrics=['MeanSquaredError', 'MeanAbsoluteError'])
  print(model.summary())

In [None]:
define_model_parameters(model_only_regression)

In [None]:
history_only_regression = model_only_regression.fit(train_generator, validation_data=validation_generator, epochs=epochs, callbacks=[model_checkpoint_callback])