In [3]:
import os

import tensorflow as tf

import pandas as pd
import numpy as np

In [4]:
def data_append(X, ages, sexes, image_path, age, sex):
    X.append(image_path)
    ages.append(age)
    sexes.append(sex)
    return X, ages, sexes

In [5]:
def load_dataset(full_path, truth):
    X_test = []
    age_test = []
    sex_test = []

    image_names = os.listdir(full_path)
    for i, image_name in enumerate(image_names):
        if not image_name.endswith('.png'):
            continue

        id = int(image_name[:-4])

        if not (id in truth['Case ID'].to_list()):
            continue
        idx = (truth['Case ID'] == id)
        age_of_this = int(truth['Ground truth bone age (months)'][idx])
        sex_of_this = (truth['Sex'][idx]).values[0]
        if sex_of_this=='M':
            sex_of_this = True
        if  sex_of_this=='F':
            sex_of_this = False
        X_test, age_test, sex_test = data_append(X_test, age_test, sex_test, os.path.join(full_path, image_name), age_of_this, sex_of_this)

    return X_test, age_test, sex_test

In [6]:
unprocessed_path = os.path.join(os.getcwd(), 'data/test/cut')
processed_path = os.path.join(os.getcwd(), 'data/test/reshaped')
test_truth = pd.read_excel('./data/test/test.xlsx')
X_unprocessed, age_unprocessed, sex_unprocessed = load_dataset(unprocessed_path, test_truth)
X_processed, age_processed, sex_processed = load_dataset(processed_path, test_truth)

test_features = pd.read_csv('./data/test/maxes.csv')
test_features['image'] = test_features['image'].str[:-4].astype(int)

unprocessed_df = pd.DataFrame({'age':age_unprocessed, 'sex':sex_unprocessed})
processed_df = pd.merge(test_features, test_truth, left_on='image', right_on='Case ID')
processed_df = processed_df.drop(['image'], axis=1).rename({'Sex': 'sex', 'Ground truth bone age (months)': 'age'}, axis=1)
processed_df['sex'] = processed_df['sex'].replace({'M': 1, 'F': 0})

In [7]:
def process_image(path_df, label):
    path, df = path_df
    # Get the image
    img = tf.io.read_file(path)
    # Decode the PNG
    img = tf.image.decode_png(img)
    # Cast image to float32
    img = tf.cast(img, tf.float32)
    # Normalize image
    img = img / 255.0

    return (img, df), label

In [8]:
def create_dataset_images(img_paths, second_branch_df, ages, batch_size):
    second_branch_df = second_branch_df.astype('float32')
    dataset = tf.data.Dataset.from_tensor_slices(((img_paths, second_branch_df), ages)).map(process_image)

    dataset = dataset.batch(batch_size=batch_size)
    dataset = dataset.prefetch(buffer_size=1)

    return dataset

In [23]:
batch_size = 32
unprocessed_dataset = create_dataset_images(img_paths=X_unprocessed,
                                     second_branch_df=unprocessed_df['sex'],
                                     ages=unprocessed_df['age'],
                                     batch_size=batch_size)
processed_dataset = create_dataset_images(img_paths=X_processed,
                                          second_branch_df=processed_df['sex'],
                                          ages=processed_df['age'],
                                          batch_size=batch_size)
features_dataset = create_dataset_images(img_paths=X_processed,
                                          second_branch_df=processed_df.drop(['age', 'Case ID'], axis=1),
                                          ages=processed_df['age'],
                                          batch_size=batch_size)

In [10]:
unprocessed_model = tf.keras.models.load_model('./models/unprocessed_50_epochs')

In [11]:
unprocessed_results = unprocessed_model.predict(unprocessed_dataset)
unprocessed_results_df = pd.DataFrame(unprocessed_results, columns=['Model ouput'])
unprocessed_results_df['Output Age'] = unprocessed_results_df['Model ouput'] * 200
unprocessed_results_df['Real Age'] = unprocessed_df['age'] / 200
unprocessed_results_df['Error'] = unprocessed_results_df['Model ouput'] - unprocessed_results_df['Real Age']
unprocessed_results_df['Error Months'] = unprocessed_results_df['Error'] * 200
unprocessed_results_df['Absolute Error'] = unprocessed_results_df['Error'].abs()
unprocessed_results_df['Squared Error'] = unprocessed_results_df['Error'] ** 2
unprocessed_results_df



Unnamed: 0,Model ouput,Output Age,Real Age,Error,Error Months,Absolute Error,Squared Error
0,0.722045,144.408936,0.840,-0.117955,-23.591059,0.117955,0.013913
1,0.830717,166.143387,0.845,-0.014283,-2.856606,0.014283,0.000204
2,0.242140,48.427952,0.365,-0.122860,-24.572049,0.122860,0.015095
3,0.589275,117.854942,0.760,-0.170725,-34.145059,0.170725,0.029147
4,0.610914,122.182755,0.675,-0.064086,-12.817249,0.064086,0.004107
...,...,...,...,...,...,...,...
195,0.569395,113.878944,0.665,-0.095605,-19.121058,0.095605,0.009140
196,0.727276,145.455170,0.645,0.082276,16.455170,0.082276,0.006769
197,0.779735,155.947021,0.835,-0.055265,-11.052982,0.055265,0.003054
198,0.765628,153.125534,0.675,0.090628,18.125536,0.090628,0.008213


In [12]:
unprocessed_rmse = 200 * np.sqrt(np.mean((unprocessed_results_df['Squared Error'])))
unprocessed_mae = np.mean(unprocessed_results_df['Absolute Error']) * 200
print(f'Unprocessed Model:\n'
      f'\tRMSE: {unprocessed_rmse}\n'
      f'\tMAE:  {unprocessed_mae}')

Unprocessed Model:
	RMSE: 25.201613092162557
	MAE:  19.776304013729096


In [13]:
processed_model = tf.keras.models.load_model('./models/base_fixed_50_epochs')

In [14]:
processed_results = processed_model.predict(processed_dataset)
processed_results_df = pd.DataFrame(processed_results, columns=['Model ouput'])
processed_results_df['ID'] = processed_df['Case ID']
processed_results_df['Output Age'] = processed_results_df['Model ouput'] * 200
processed_results_df['Real Age'] = processed_df['age'] / 200
processed_results_df['Error'] = processed_results_df['Model ouput'] - processed_results_df['Real Age']
processed_results_df['Error Months'] = processed_results_df['Error'] * 200
processed_results_df['Absolute Error'] = processed_results_df['Error'].abs()
processed_results_df['Squared Error'] = processed_results_df['Error'] ** 2
processed_results_df



Unnamed: 0,Model ouput,ID,Output Age,Real Age,Error,Error Months,Absolute Error,Squared Error
0,0.823742,4360,164.748459,0.844671,-0.020929,-4.185795,0.020929,0.000438
1,0.838921,4361,167.784149,0.848263,-0.009343,-1.868535,0.009343,0.000087
2,0.309167,4362,61.833382,0.366281,-0.057114,-11.422731,0.057114,0.003262
3,0.776587,4363,155.317337,0.764313,0.012273,2.454673,0.012273,0.000151
4,0.703953,4364,140.790588,0.677285,0.026668,5.333628,0.026668,0.000711
...,...,...,...,...,...,...,...,...
195,0.697188,4555,139.437698,0.667434,0.029755,5.950942,0.029755,0.000885
196,0.690979,4556,138.195724,0.646272,0.044706,8.941224,0.044706,0.001999
197,0.806363,4557,161.272568,0.838425,-0.032062,-6.412460,0.032062,0.001028
198,0.726242,4558,145.248413,0.679902,0.046341,9.268101,0.046341,0.002147


In [15]:
processed_rmse = 200 * np.sqrt(np.mean((processed_results_df['Squared Error'])))
processed_mae = np.mean(processed_results_df['Absolute Error']) * 200
print(f'Unprocessed Model:\n'
      f'\tRMSE: {processed_rmse}\n'
      f'\tMAE:  {processed_mae}')

Unprocessed Model:
	RMSE: 12.373001428541864
	MAE:  9.222783359710217


In [16]:
features_model = tf.keras.models.load_model('./models/full_fixed_50_epochs')

In [25]:
features_results = features_model.predict(features_dataset)
features_results_df = pd.DataFrame(features_results, columns=['Model ouput'])
features_results_df['ID'] = processed_df['Case ID']
features_results_df['Output Age'] = features_results_df['Model ouput'] * 200
features_results_df['Real Age'] = processed_df['age'] / 200
features_results_df['Error'] = features_results_df['Model ouput'] - features_results_df['Real Age']
features_results_df['Error Months'] = features_results_df['Error'] * 200
features_results_df['Absolute Error'] = features_results_df['Error'].abs()
features_results_df['Squared Error'] = features_results_df['Error'] ** 2
features_results_df



Unnamed: 0,Model ouput,ID,Output Age,Real Age,Error,Error Months,Absolute Error,Squared Error
0,0.785482,4360,157.096390,0.844671,-0.059189,-11.837863,0.059189,0.003503
1,0.673844,4361,134.768860,0.848263,-0.174419,-34.883810,0.174419,0.030422
2,0.267024,4362,53.404701,0.366281,-0.099257,-19.851412,0.099257,0.009852
3,0.699714,4363,139.942886,0.764313,-0.064599,-12.919785,0.064599,0.004173
4,0.705308,4364,141.061584,0.677285,0.028023,5.604638,0.028023,0.000785
...,...,...,...,...,...,...,...,...
195,0.703443,4555,140.688690,0.667434,0.036010,7.201936,0.036010,0.001297
196,0.639001,4556,127.800262,0.646272,-0.007271,-1.454231,0.007271,0.000053
197,0.754251,4557,150.850266,0.838425,-0.084174,-16.834761,0.084174,0.007085
198,0.728614,4558,145.722870,0.679902,0.048713,9.742554,0.048713,0.002373


In [26]:
features_rmse = 200 * np.sqrt(np.mean((features_results_df['Squared Error'])))
features_mae = np.mean(features_results_df['Absolute Error']) * 200
print(f'Unprocessed Model:\n'
      f'\tRMSE: {features_rmse}\n'
      f'\tMAE:  {features_mae}')

Unprocessed Model:
	RMSE: 19.19494305743658
	MAE:  14.868095504827403


### Result by age/gender