In [None]:
from IPython.display import HTML, display

In [None]:
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models

print(f"TensorFlow V{tf.__version__} 🦾")

In [None]:
# Auxiliary libraries
import os
import functools
import pandas as pd
import numpy as np
import requests
import pathlib
import matplotlib.pyplot as plt

%matplotlib inline

from PIL import Image
from google import auth
from sklearn.model_selection import train_test_split
from datetime import datetime

In [None]:
# bone_base_dir = os.path.join('..', '/kaggle/input', 'i2a2-bone-age-regression')

# train_data_file = f"{bone_base_dir}/train.csv"
# test_data_file = f"{bone_base_dir}/test.csv"
# sample_data_file = f"{bone_base_dir}/sample_submission.csv"

# raw_train_df = pd.read_csv(train_data_file)
# test_df = pd.read_csv(test_data_file)
# sample_df = pd.read_csv(sample_data_file)

In [None]:
from keras.utils import get_file

BONE_AGE_DATASET_URL = 'https://storage.googleapis.com/bone-age-regression.henriquesilva.dev'

path_to_zip = get_file('images.zip', origin = f"{BONE_AGE_DATASET_URL}/images.zip", archive_format = 'zip', extract = True)

train_data_file = get_file('train.csv', origin = f"{BONE_AGE_DATASET_URL}/train.csv", extract = False)
test_data_file = get_file('test.csv', origin = f"{BONE_AGE_DATASET_URL}/test.csv", extract = False)

raw_train_df = pd.read_csv(train_data_file)
test_df = pd.read_csv(test_data_file)

In [None]:
raw_train_df['boneage_category'] = pd.cut(raw_train_df['boneage'], 10)

boneage_mean = raw_train_df['boneage'].mean()
boneage_div = 2*raw_train_df['boneage'].std()

raw_train_df['boneage_zscore'] = raw_train_df['boneage'].map(lambda x: (x-boneage_mean)/boneage_div)

raw_train_df['male'] = raw_train_df['patientSex'].map(lambda x: np.bitwise_xor(x == 'F', 1))

In [None]:
HTML(raw_train_df.sample(5).to_html(escape=False))

In [None]:
raw_train_df['boneage_category'] = pd.cut(raw_train_df['boneage'], 10)
train_df = raw_train_df.groupby(['boneage_category', 'male']).apply(lambda x: x.sample(400, replace = True)
                                                      ).reset_index(drop = 1)
print('New Data Size:', train_df.shape[0], 'Old Size:', raw_train_df.shape[0])
train_df[['boneage', 'male']].hist(figsize = (10, 5))

In [None]:
data_dir = pathlib.Path("/root/.keras/datasets/images/")

In [None]:
raw_train_df['path'] = raw_train_df['fileName'].map(lambda fn: data_dir)

In [None]:
female_test_df['path'] = female_test_df['fileName'].map(lambda fn: data_dir)

In [None]:
raw_female_train_df = raw_train_df[raw_train_df['female'] == 1]
raw_male_train_df = raw_train_df[raw_train_df['male'] == 1]

print('female', raw_female_train_df.shape[0], 'male', raw_male_train_df.shape[0])

In [None]:
female_test_df = test_df[raw_train_df['patientSex'] == 'F']
male_test_df = test_df[raw_train_df['patientSex'] == 'M']

print('female', female_test_df.shape[0], 'male', male_test_df.shape[0])

In [None]:
train_df, valid_df = train_test_split(
  train_df, 
  test_size = 0.25, 
  random_state = 2018,
  stratify = train_df['boneage_category']
)

print('train', train_df.shape[0], 'validation', valid_df.shape[0])

In [None]:
male_train_df, male_valid_df = train_test_split(
  raw_male_train_df, 
  test_size = 0.25, 
  random_state = 2018,
  stratify = raw_male_train_df['boneage_category']
)

print('train', male_train_df.shape[0], 'validation', male_valid_df.shape[0])

In [None]:
data_dir = pathlib.Path("/root/.keras/datasets/images/")

image_count = len(list(data_dir.glob('*.png')))
display(HTML(f"<h1>{image_count} images</h1>"))

In [None]:
plt.figure(figsize=(15, 15))
row, col = 3, 3
i = 0

for fileName, patientSex, boneage in raw_train_df[['fileName','patientSex','boneage']].sample(9).values:
    i += 1
    file_id = str(fileName)
    image = Image.open(f"{data_dir}/{file_id}")
    
    plt.subplot(row, col, i)
    plt.title(f"Boneage: {boneage} months | Sex: {'Male' if patientSex == 'M' else 'Female'}")
    plt.imshow(image)
    plt.axis('off')
    
plt.show()

In [None]:
plt.figure(figsize=(15, 15))
row, col = 3, 3
i = 0

for fileName, patientSex in test_df[['fileName','patientSex']].sample(9).values:
    i += 1
    file_id = str(fileName)
    image = Image.open(f"{data_dir}/{file_id}")
    
    plt.subplot(row, col, i)
    plt.title(f"Sex: {'Male' if patientSex == 'M' else 'Female'}")
    plt.imshow(image)
    plt.axis('off')
    
plt.show()

In [None]:
# maior idade do dataset
print('MAX age: ' + str(raw_train_df['boneage'].max()) + ' months')

# menor idade do dataset
print('MIN age: ' + str(raw_train_df['boneage'].min()) + ' months')

# idade media
mean_bone_age = raw_train_df['boneage'].mean()
print('mean: ' + str(mean_bone_age))

# mediana
print('median: ' +str(raw_train_df['boneage'].median()))

# desvio padrão das idades
std_bone_age = raw_train_df['boneage'].std()

# modelos podem performar melho quando são normalizados os dados
raw_train_df['bone_age_z'] = (raw_train_df['boneage'] - mean_bone_age)/(std_bone_age)

# visualizando o dataset novamente
print(raw_train_df.head())

In [None]:
def flipped(original, augmented):
  fig = plt.figure()
  plt.subplot(1,2,1)
  plt.imshow(original)
  plt.axis('off')
  plt.subplot(1,2,2)
  plt.imshow(augmented)
  plt.axis('off')



In [None]:
from keras.preprocessing.image import ImageDataGenerator

images = []

train_df['fileName'].map(lambda fn: images.append(f"{data_dir}/{fn}"))


IMG_SIZE = (512, 512)

core_idg = ImageDataGenerator(samplewise_center=True, 
                              samplewise_std_normalization=True, 
                              horizontal_flip = True, 
                              vertical_flip = False, 
                              height_shift_range = 0.1, 
                              width_shift_range = 0.1, 
                              rotation_range = 10, 
                              shear_range = 0.05,
                              fill_mode = 'nearest',
                              zoom_range=0.15)

In [None]:
for image in images[:5]:
    image_string=tf.io.read_file(image)
    image=tf.image.decode_jpeg(image_string,channels=3)
    ImageDataGenerator( tf.image.flip_left_right(image))

In [None]:
def flow_from_dataframe(img_data_gen, in_df, path_col, y_col, **dflow_args):
    base_dir = os.path.dirname(in_df[path_col].values[0])
    print('## Ignore next message from keras, values are replaced anyways')
    df_gen = img_data_gen.flow_from_directory(base_dir, 
                                     class_mode = 'sparse',
                                    **dflow_args)
    df_gen.filenames = in_df[path_col].values
    df_gen.classes = np.stack(in_df[y_col].values)
    df_gen.samples = in_df.shape[0]
    df_gen.n = in_df.shape[0]
    df_gen._set_index_array()
    df_gen.directory = '' # since we have the full path
    print('Reinserting dataframe: {} images'.format(in_df.shape[0]))
    return df_gen

In [None]:
train_gen = flow_from_dataframe(core_idg, train_df, 
                             path_col = 'path',
                            y_col = 'boneage_zscore', 
                            target_size = IMG_SIZE,
                             color_mode = 'grayscale',
                            batch_size = 128)

valid_gen = flow_from_dataframe(core_idg, valid_df, 
                             path_col = 'path',
                            y_col = 'boneage_zscore', 
                            target_size = IMG_SIZE,
                             color_mode = 'grayscale',
                            batch_size = 128) # we can use much larger batches for evaluation
# used a fixed dataset for evaluating the algorithm
test_X, test_Y = next(flow_from_dataframe(core_idg, 
                               valid_df, 
                             path_col = 'path',
                            y_col = 'boneage_zscore', 
                            target_size = IMG_SIZE,
                             color_mode = 'grayscale',
                            batch_size = 500)) # one big batc

In [None]:
t_x, t_y = next(train_gen)
fig, m_axs = plt.subplots(4, 4, figsize = (16, 16))
for (c_x, c_y, c_ax) in zip(t_x, t_y, m_axs.flatten()):
    c_ax.imshow(c_x[:,:,0], cmap = 'bone', vmin = -3, vmax = 3)
    c_ax.set_title('%2.0f months' % (c_y*boneage_div+boneage_mean))
    c_ax.axis('off')

In [None]:
from keras.applications.mobilenet import MobileNet
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, BatchNormalization
from keras.models import Sequential
base_mobilenet_model = MobileNet(input_shape =  t_x.shape[1:], 
                                 include_top = False, 
                                 weights = None)
bone_age_model = Sequential()
bone_age_model.add(BatchNormalization(input_shape = t_x.shape[1:]))
bone_age_model.add(base_mobilenet_model)
bone_age_model.add(BatchNormalization())
bone_age_model.add(GlobalAveragePooling2D())
bone_age_model.add(Dropout(0.5))
bone_age_model.add(Dense(1, activation = 'linear' )) # linear is what 16bit did
from keras.metrics import mean_absolute_error
def mae_months(in_gt, in_pred):
    return mean_absolute_error(boneage_div*in_gt, boneage_div*in_pred)

bone_age_model.compile(optimizer = 'adam', loss = 'mse',
                           metrics = [mae_months])

bone_age_model.summary()

In [None]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
weight_path="{}_weights.best.hdf5".format('bone_age')

checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, 
                             save_best_only=True, mode='min', save_weights_only = True)


reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=10, verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001)
early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=5) # probably needs to be more patient, but kaggle time is limited
callbacks_list = [checkpoint, early, reduceLROnPlat]

In [None]:
train_gen.batch_size = 16
bone_age_model.fit_generator(train_gen, 
                                  validation_data = (test_X, test_Y), 
                                  epochs = 10, 
                                  callbacks = callbacks_list)

In [None]:
bone_age_model.load_weights(weight_path)

In [None]:
pred_Y = boneage_div*bone_age_model.predict(test_X, batch_size = 16, verbose = True)+boneage_mean
test_Y_months = boneage_div*test_Y+boneage_mean

In [None]:
fig, ax1 = plt.subplots(1,1, figsize = (6,6))
ax1.plot(test_Y_months, pred_Y, 'r.', label = 'predictions')
ax1.plot(test_Y_months, test_Y_months, 'b-', label = 'actual')
ax1.legend()
ax1.set_xlabel('Actual Age (Months)')
ax1.set_ylabel('Predicted Age (Months)')

In [None]:
rand_idx = np.random.choice(range(test_X.shape[0]), 8)
fig, m_axs = plt.subplots(4, 2, figsize = (16, 32))
for (idx, c_ax) in zip(rand_idx, m_axs.flatten()):
    c_ax.imshow(test_X[idx, :,:,0], cmap = 'bone')
    
    c_ax.set_title('Age: %2.1f\nPredicted Age: %2.1f' % (test_Y_months[idx], pred_Y[idx]))
    c_ax.axis('off')
fig.savefig('trained_img_predictions.png', dpi = 300)

In [None]:
train_gen = core_idg.flow_from_dataframe(
    dataframe = train_df,
    directory = '/root/.keras/datasets/images',
    x_col = 'fileName',
    y_col = 'boneage_zscore',
    batch_size = 16,
    shuffle = True,
    class_mode = 'other',
    flip_vertical = True,
    color_mode = 'rgb',
    target_size = IMG_SIZE)

test_gen = core_idg.flow_from_dataframe(
    dataframe = valid_df,
    directory = '/root/.keras/datasets/images',
    x_col = 'fileName',
    y_col = 'boneage_zscore',
    batch_size = 16,
    shuffle = True,
    class_mode = 'other',
    flip_vertical = True,
    color_mode = 'rgb',
    target_size = IMG_SIZE)

test_X, test_Y  = next(core_idg.flow_from_dataframe(
    dataframe = test_df,
    directory = '/root/.keras/datasets/images',
    x_col = 'fileName',
    y_col = 'patientSex',
    batch_size = 32,
    shuffle = True,
    class_mode = 'other',
    flip_vertical = True,
    color_mode = 'rgb',
    target_size = IMG_SIZE))

In [None]:
t_x, t_y = next(train_gen)
fig, m_axs = plt.subplots(4, 4, figsize = (16, 16))
for (c_x, c_y, c_ax) in zip(t_x, t_y, m_axs.flatten()):
    c_ax.imshow(c_x[:,:,0], cmap = 'bone', vmin = -3, vmax = 3)
    c_ax.set_title('%2.0f months' % (c_y*boneage_div+boneage_mean))
    c_ax.axis('off')

In [None]:
from keras.applications.mobilenet import MobileNet
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, BatchNormalization
from keras.models import Sequential
base_mobilenet_model = MobileNet(input_shape =  t_x.shape[1:], 
                                 include_top = False, 
                                 weights = None)
bone_age_model = Sequential()
bone_age_model.add(BatchNormalization(input_shape = t_x.shape[1:]))
bone_age_model.add(base_mobilenet_model)
bone_age_model.add(BatchNormalization())
bone_age_model.add(GlobalAveragePooling2D())
bone_age_model.add(Dropout(0.5))
bone_age_model.add(Dense(1, activation = 'linear' )) # linear is what 16bit did
from keras.metrics import mean_absolute_error
def mae_months(in_gt, in_pred):
    return mean_absolute_error(boneage_div*in_gt, boneage_div*in_pred)

bone_age_model.compile(optimizer = 'adam', loss = 'mse',
                           metrics = [mae_months])

bone_age_model.summary()

In [None]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
weight_path="{}_weights.best.hdf5".format('bone_age')

checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, 
                             save_best_only=True, mode='min', save_weights_only = True)


reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=10, verbose=1, mode='auto', min_delta=0.0001, cooldown=5, min_lr=0.0001)
early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=5) # probably needs to be more patient, but kaggle time is limited
callbacks_list = [checkpoint, early, reduceLROnPlat]

In [None]:
# função que retorna o erro medio absoluto em meses
def mae_in_months(x_p, y_p):
    '''function to return mae in months'''
    return mean_absolute_error((std_bone_age*x_p + mean_bone_age), (std_bone_age*y_p + mean_bone_age))

In [None]:
def plot_it(history):
    '''function to plot training and validation error'''
    fig, ax = plt.subplots( figsize=(20,10))
    ax.plot(history.history['mae_in_months'])
    ax.plot(history.history['val_mae_in_months'])
    plt.title('Model Error')
    plt.ylabel('error')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper right')
    ax.grid(color='black')
    plt.show()

In [None]:
early_stopping = EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience= 5,
                              verbose=0, mode='auto')

# model checkpoint - armazena o melhor modelo ou peso treinado para ser usado no teste final
# https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/ModelCheckpoint
mc = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', save_best_only=True)

#tensorboard callback -->> não entendo bem de como fucniona o tensor board mas foi necessario manter aqui no código para mantero o callback
# 
# logdir = os.path.join(logs_dir,datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))
# tensorboard_callback = TensorBoard(logdir, histogram_freq = 1)

#reduce lr on plateau - aplica a redução da taxa de aprendizado quando a metrica para de ser melhorada, é aplicado a cada 10 epocas
# https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/ReduceLROnPlateau
red_lr_plat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)

# https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/ModelCheckpoint
callbacks = [early_stopping,mc, red_lr_plat]


#fit model
history = bone_age_model.fit_generator(train_gen,
                            steps_per_epoch = 300,
                            validation_data = valid_gen,
                            validation_steps = 1,
                            epochs = 30,
                            callbacks= callbacks)

# mostrando o treinamento
history
#%tensorboard --logdir logs
plot_it(history)

In [None]:
# model_2.load_weights('best_model.h5')
# calculo para retornar o valor correto da predição devido a normalização feita anteriormente
pred = mean_bone_age + std_bone_age*(bone_age_model.predict(test_X, batch_size = 32, verbose = True))
test_months = mean_bone_age + std_bone_age*(test_Y)

# obs.: algumas etapas neste trecho não comprendi muito bem estou estudando ainda.
ord_ind = np.argsort(test_Y)
ord_ind = ord_ind[np.linspace(0, len(ord_ind)-1, 8).astype(int)] # take 8 evenly spaced ones
fig, axs = plt.subplots(4, 2, figsize = (15, 30))
for (ind, ax) in zip(ord_ind, axs.flatten()):
    ax.imshow(test_X[ind, :,:,0], cmap = 'bone')
    ax.set_title('Age: %fY\nPredicted Age: %fY' % (test_months[ind]/12.0, 
                                                           pred[ind]/12.0))
    ax.axis('off')
fig.savefig('trained_image_predictions.png', dpi = 300)

In [None]:
# Plotando algumas imagens para visualização dos resultados
fig, ax = plt.subplots(figsize = (7,7))
ax.plot(test_months, pred, 'r.', label = 'predictions')
ax.plot(test_months, test_months, 'b-', label = 'actual')
ax.legend(loc = 'upper right')
ax.set_xlabel('Actual Age (Months)')
ax.set_ylabel('Predicted Age (Months)')

In [None]:
female_test_df.sample(10)

In [None]:
# test - data generator
# test_data_generator = ImageDataGenerator(preprocessing_function = preprocess_input)

# # é daqui que vai sair a base de teste para gerar o data set de submissão
# test_generator = val_data_generator.flow_from_dataframe(
#     dataframe = df_test,
#     directory = '/kaggle/output/working/preview/',
#     x_col = 'fileName',
#     y_col = 'patientSex',
#     batch_size = 32,
#     #seed = 42,
#     shuffle = True,
#     class_mode = 'other',
#     flip_vertical = True,
#     color_mode = 'rgb',
#     target_size = (img_size, img_size))

# test_gen = flow_from_dataframe(core_idg,
#                                in_df = test_df,
#                                 directory = '/root/.keras/datasets/images',
#                              path_col = 'fileName',
#                             y_col = 'patientSex', 
#                             target_size = IMG_SIZE,
#                              color_mode = 'grayscale',
#                             batch_size = 8) 



# test_data_generator = ImageDataGenerator(preprocessing_function = preprocess_input)

# # é daqui que vai sair a base de teste para gerar o data set de submissão

# val_data_generator = ImageDataGenerator(preprocessing_function = preprocess_input)

test_gen = core_idg.flow_from_dataframe(
    dataframe = test_df,
    directory = '/root/.keras/datasets/images',
    x_col = 'fileName',
    y_col = 'patientSex',
    batch_size = 64,
    shuffle = True,
    class_mode = 'other',
    flip_vertical = True,
    color_mode = 'grayscale',
    target_size = IMG_SIZE)

In [None]:
test_gen.reset()

In [None]:
female_train_df['path'][0]

In [None]:
female_test_df.sample(3)

In [None]:
test_df.head(5)

In [None]:
test_df = test_df.drop(columns=['path'])

In [None]:
test_gen.reset()

In [None]:
# utilizando o test_generator criado etapas acima
# test_gen.reset()
y_pred = bone_age_model.predict_generator(test_gen)
predicted = y_pred.flatten()
predicted_months = mean_bone_age + std_bone_age*(predicted)
filenames=test_gen.filenames
results=pd.DataFrame({"fileName":filenames,
                      "boneage": predicted_months})
# salvando o resultado para submeter no kaggle, o arquivo ficara no outputs basta efetuar o download.
results.to_csv("results_10.csv",index=False)

In [None]:
results.head()