In [22]:
import os

import tensorflow as tf

import pandas as pd
import numpy as np

In [5]:
def data_append(X, ages, sexes, image_path, age, sex):
    X.append(image_path)
    ages.append(age)
    sexes.append(sex)
    return X, ages, sexes

In [6]:
def load_dataset(full_path, truth):
    X_test = []
    age_test = []
    sex_test = []

    image_names = os.listdir(full_path)
    for i, image_name in enumerate(image_names):
        if not image_name.endswith('.png'):
            continue

        id = int(image_name[:-4])

        if not (id in truth['Case ID'].to_list()):
            continue
        idx = (truth['Case ID'] == id)
        age_of_this = int(truth['Ground truth bone age (months)'][idx])
        sex_of_this = (truth['Sex'][idx]).values[0]
        if sex_of_this=='M':
            sex_of_this = True
        if  sex_of_this=='F':
            sex_of_this = False
        X_test, age_test, sex_test = data_append(X_test, age_test, sex_test, os.path.join(full_path, image_name), age_of_this, sex_of_this)

    return X_test, age_test, sex_test

In [14]:
unprocessed_path = os.path.join(os.getcwd(), 'data/test/cut')
processed_path = os.path.join(os.getcwd(), 'data/test/reshaped')
test_truth = pd.read_excel('./data/test/test.xlsx')
X_unprocessed, age_unprocessed, sex_unprocessed = load_dataset(unprocessed_path, test_truth)
X_processed, age_processed, sex_processed = load_dataset(processed_path, test_truth)

test_features = pd.read_csv('./data/test/maxes.csv')
test_features['image'] = test_features['image'].str[:-4].astype(int)

unprocessed_df = pd.DataFrame({'age':age_unprocessed, 'sex':sex_unprocessed})
processed_df = pd.merge(test_features, test_truth, left_on='image', right_on='Case ID')
processed_df = processed_df.drop(['Case ID', 'image', 'Ground truth bone age (months)'],
                                                   axis=1).rename({'Sex': 'sex'}, axis=1)
processed_df['sex'] = processed_df['sex'].replace({'M': 1, 'F': 0})

In [16]:
def process_image(path_df, label):
    path, df = path_df
    # Get the image
    img = tf.io.read_file(path)
    # Decode the PNG
    img = tf.image.decode_png(img)
    # Cast image to float32
    img = tf.cast(img, tf.float32)
    # Normalize image
    img = img / 255.0

    return (img, df), label

In [17]:
def create_dataset_images(img_paths, second_branch_df, ages, batch_size):
    second_branch_df = second_branch_df.astype('float32')
    dataset = tf.data.Dataset.from_tensor_slices(((img_paths, second_branch_df), ages)).map(process_image)

    dataset = dataset.batch(batch_size=batch_size)
    dataset = dataset.prefetch(buffer_size=1)

    return dataset

In [37]:
batch_size = 32
unprocessed_dataset = create_dataset_images(img_paths=X_unprocessed,
                                     second_branch_df=unprocessed_df['sex'],
                                     ages=unprocessed_df['age'],
                                     batch_size=batch_size)
processed_dataset = create_dataset_images(img_paths=X_processed,
                                          second_branch_df=processed_df['sex'],
                                          ages=processed_df['age'],
                                          batch_size=batch_size)

KeyError: 'age'

In [19]:
unprocessed_model = tf.keras.models.load_model('./models/unprocessed_50_epochs')

In [25]:
unprocessed_results = unprocessed_model.predict(unprocessed_dataset)
unprocessed_results_df = pd.DataFrame(unprocessed_results, columns=['Model ouput'])
unprocessed_results_df['Output Age'] = unprocessed_results_df['Model ouput'] * 200
unprocessed_results_df['Real Age'] = unprocessed_df['age'] / 200
unprocessed_results_df['Error'] = unprocessed_results_df['Model ouput'] - unprocessed_results_df['Real Age']
unprocessed_results_df['Error Months'] = unprocessed_results_df['Error'] * 200
unprocessed_results_df['Absolute Error'] = unprocessed_results_df['Error'].abs()
unprocessed_results_df['Squared Error'] = unprocessed_results_df['Error'] ** 2
unprocessed_results_df



Unnamed: 0,Model ouput,Output Age,Real Age,Error,Error Months,Absolute Error,Squared Error
0,0.722045,144.408936,0.840,-0.117955,-23.591059,0.117955,0.013913
1,0.830717,166.143387,0.845,-0.014283,-2.856606,0.014283,0.000204
2,0.242140,48.427952,0.365,-0.122860,-24.572049,0.122860,0.015095
3,0.589275,117.854942,0.760,-0.170725,-34.145059,0.170725,0.029147
4,0.610914,122.182755,0.675,-0.064086,-12.817249,0.064086,0.004107
...,...,...,...,...,...,...,...
195,0.569395,113.878944,0.665,-0.095605,-19.121058,0.095605,0.009140
196,0.727276,145.455170,0.645,0.082276,16.455170,0.082276,0.006769
197,0.779735,155.947021,0.835,-0.055265,-11.052982,0.055265,0.003054
198,0.765628,153.125534,0.675,0.090628,18.125536,0.090628,0.008213


In [35]:
unprocessed_rmse = 200 * np.sqrt(np.mean((unprocessed_results_df['Squared Error'])))
unprocessed_mae = np.mean(unprocessed_results_df['Absolute Error']) * 200
print(f'Unprocessed Model:\n'
      f'\tRMSE: {unprocessed_rmse}\n'
      f'\tMAE:  {unprocessed_mae}')

Unprocessed Model:
	RMSE: 25.201630342190374
	MAE:  19.776293731927872


In [36]:
base_model = tf.keras.models.load_model('./models/base_50_epochs')

In [None]:
base_results = base_model.predict(processed_dataset)
base_results_df = pd.DataFrame(base_results, columns=['Model ouput'])
base_results_df['Output Age'] = base_results_df['Model ouput'] * 200
base_results_df['Real Age'] = processed_df['age'] / 200
base_results_df['Error'] = base_results_df['Model ouput'] - base_results_df['Real Age']
base_results_df['Error Months'] = base_results_df['Error'] * 200
base_results_df['Absolute Error'] = base_results_df['Error'].abs()
base_results_df['Squared Error'] = base_results_df['Error'] ** 2
base_results_df