In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np

import pandas as pd
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rcParams['figure.figsize'] = [20, 5]

from sklearn.linear_model import ElasticNetCV
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

import tensorflow as tf
tf.keras.backend.clear_session()

In [2]:
dataset = pd.read_csv('measurements.csv', sep=';', decimal=',')
dataset = dataset.dropna()
dataset.drop(dataset.columns[[0, 1, 2]], axis = 1, inplace = True) 

max_tube_diameter = np.min(dataset[['AP cricoïde', 'AP fin de trachée', 'T fin de trachée']], axis=1)
dataset= dataset.iloc[:, :3]
dataset['max_tube_diameter'] = max_tube_diameter

cuffless_OD = np.array([2.9, 3.6, 4.2, 4.9, 5.5, 6.2, 6.8, 7.5, 8.2, 8.8, 9.6])
cuffed_OD = np.array([4.2, 5.5, 6.8, 7.5, 8.2, 8.8, 9.6, 10.2, 10.9, 11.5, 12.1, 12.8, 13.5])

In [3]:
train_dataset = dataset.sample(frac=0.8, random_state=42)
test_dataset = dataset.drop(train_dataset.index)
train_dataset.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
age en mois,154.0,85.993506,51.391169,1.0,36.0,85.5,131.75,176.0
sexe (0=M/1=F),154.0,0.350649,0.47873,0.0,0.0,0.0,1.0,1.0
poids,154.0,27.76039,15.704075,4.0,15.0,25.0,36.55,90.0
max_tube_diameter,154.0,7.432532,1.997008,2.81,6.1925,7.295,8.7675,12.6


In [4]:
train_features = train_dataset.copy()
train_target = train_features.pop('max_tube_diameter')

test_features = test_dataset.copy()
test_target = test_features.pop('max_tube_diameter')

In [5]:
od = np.array([cuffless_OD[np.abs(cuffless_OD - mtd).argmin()] for mtd in test_dataset['max_tube_diameter']])
test_dataset['OD cuffless ref'] = cuffless_OD[np.maximum([j if i>0 else j-1 
                                for i, j in zip(np.sign(test_dataset['max_tube_diameter'] - od),
                                                [np.where(cuffless_OD == c)[0].item() for c in od])], 0)]


od = np.array([cuffed_OD[np.abs(cuffed_OD - mtd).argmin()] for mtd in test_dataset['max_tube_diameter']])
test_dataset['OD cuffed ref'] = cuffed_OD[np.maximum([j if i>0 else j-1 
                                for i, j in zip(np.sign(test_dataset['max_tube_diameter'] - od),
                                                [np.where(cuffed_OD == c)[0].item() for c in od])], 0)]

# Linear Regression

In [6]:
reg = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], cv=5, random_state=42)
reg.fit(np.array(train_features['age en mois']).reshape(-1, 1), train_target)

ElasticNetCV(cv=5, l1_ratio=[0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1],
             random_state=42)

In [7]:
test_dataset['max_tube_diameter LR'] = reg.predict(np.array(test_features['age en mois']).reshape(-1, 1))

od = np.array([cuffless_OD[np.abs(cuffless_OD - mtd).argmin()] for mtd in test_dataset['max_tube_diameter LR']])
test_dataset['OD cuffless LR'] = cuffless_OD[np.maximum([j if i>0 else j-1 
                                for i, j in zip(np.sign(test_dataset['max_tube_diameter LR'] - od),
                                                [np.where(cuffless_OD == c)[0].item() for c in od])], 0)]


od = np.array([cuffed_OD[np.abs(cuffed_OD - mtd).argmin()] for mtd in test_dataset['max_tube_diameter LR']])
test_dataset['OD cuffed LR'] = cuffed_OD[np.maximum([j if i>0 else j-1 
                                for i, j in zip(np.sign(test_dataset['max_tube_diameter LR'] - od),
                                                [np.where(cuffed_OD == c)[0].item() for c in od])], 0)]

# Random Forest

In [None]:
forest = RandomForestRegressor(random_state=42)

hyper_parameters = dict(n_estimators = [100, 300, 500, 800, 1200],
                        max_depth = [5, 8, 15, 25, 30, 50, 100, 150],
                        min_samples_split = [2, 5, 10, 15, 30, 50, 100],
                        min_samples_leaf = [1, 2, 5, 10, 15])

grid_search = GridSearchCV(forest, hyper_parameters, cv = 5, verbose = 1, n_jobs = 20)

best_rf = grid_search.fit(train_features, train_target)

Fitting 5 folds for each of 1400 candidates, totalling 7000 fits


[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    1.4s
[Parallel(n_jobs=20)]: Done 160 tasks      | elapsed:    7.3s
[Parallel(n_jobs=20)]: Done 410 tasks      | elapsed:   17.4s
[Parallel(n_jobs=20)]: Done 760 tasks      | elapsed:   31.1s
[Parallel(n_jobs=20)]: Done 1210 tasks      | elapsed:   48.9s
[Parallel(n_jobs=20)]: Done 1760 tasks      | elapsed:  1.2min
[Parallel(n_jobs=20)]: Done 2410 tasks      | elapsed:  1.6min


In [None]:
test_dataset['max_tube_diameter RF'] = best_rf.predict(test_features)

od = np.array([cuffless_OD[np.abs(cuffless_OD - mtd).argmin()] for mtd in test_dataset['max_tube_diameter RF']])
test_dataset['OD cuffless RF'] = cuffless_OD[np.maximum([j if i>0 else j-1 
                                for i, j in zip(np.sign(test_dataset['max_tube_diameter RF'] - od),
                                                [np.where(cuffless_OD == c)[0].item() for c in od])], 0)]


od = np.array([cuffed_OD[np.abs(cuffed_OD - mtd).argmin()] for mtd in test_dataset['max_tube_diameter RF']])
test_dataset['OD cuffed RF'] = cuffed_OD[np.maximum([j if i>0 else j-1 
                                for i, j in zip(np.sign(test_dataset['max_tube_diameter RF'] - od),
                                                [np.where(cuffed_OD == c)[0].item() for c in od])], 0)]

# Neural Network

In [None]:
normalizer = tf.keras.layers.experimental.preprocessing.Normalization()
normalizer.adapt(np.array(train_features))

model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=1)
])


model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.001),
    loss='mse',
    metrics=[tf.keras.metrics.RootMeanSquaredError(),
            tf.keras.metrics.MeanAbsoluteError()])

history = model.fit(x=train_features, y=train_target,
                    validation_data=(test_features, test_target),
                    epochs=300)

In [None]:
test_dataset['max_tube_diameter NN'] = [model(np.array([row[1][:3]])).numpy().item() for row in test_dataset.iterrows()]

od = np.array([cuffless_OD[np.abs(cuffless_OD - mtd).argmin()] for mtd in test_dataset['max_tube_diameter NN']])
test_dataset['OD cuffless NN'] = cuffless_OD[np.maximum([j if i>0 else j-1 
                                for i, j in zip(np.sign(test_dataset['max_tube_diameter NN'] - od),
                                                [np.where(cuffless_OD == c)[0].item() for c in od])], 0)]


od = np.array([cuffed_OD[np.abs(cuffed_OD - mtd).argmin()] for mtd in test_dataset['max_tube_diameter NN']])
test_dataset['OD cuffed NN'] = cuffed_OD[np.maximum([j if i>0 else j-1 
                                for i, j in zip(np.sign(test_dataset['max_tube_diameter NN'] - od),
                                                [np.where(cuffed_OD == c)[0].item() for c in od])], 0)]

# Results

In [None]:
plt.plot(test_dataset['age en mois'], test_dataset['max_tube_diameter LR'], 'b');
plt.plot(test_dataset['age en mois'], test_dataset['max_tube_diameter RF'], 'r');
plt.plot(test_dataset['age en mois'], test_dataset['max_tube_diameter NN'], 'g');
plt.plot(test_dataset['age en mois'], test_dataset['max_tube_diameter'], 'orange');
plt.xlabel("Age (months)");
plt.ylabel("Max Tube Diameter LR");

In [None]:
print('--- Cuffless predictions ---')
print()
print('LR Adapted diameter: {:.2%}'.format(sum(test_dataset['OD cuffless LR'] <= test_dataset['OD cuffless ref']) / len(test_dataset)))
print('RF Adapted diameter: {:.2%}'.format(sum(test_dataset['OD cuffless RF'] <= test_dataset['OD cuffless ref']) / len(test_dataset)))
print('NN Adapted diameter: {:.2%}'.format(sum(test_dataset['OD cuffless NN'] <= test_dataset['OD cuffless ref']) / len(test_dataset)))
print()
print('LR Exact diameter: {:.2%}'.format(sum(test_dataset['OD cuffless LR'] == test_dataset['OD cuffless ref']) / len(test_dataset)))
print('RF Exact diameter: {:.2%}'.format(sum(test_dataset['OD cuffless RF'] == test_dataset['OD cuffless ref']) / len(test_dataset)))
print('NN Exact diameter: {:.2%}'.format(sum(test_dataset['OD cuffless NN'] == test_dataset['OD cuffless ref']) / len(test_dataset)))
print()
print('--- Cuffed prediction ---')
print()
print('LR Adapted diameter: {:.2%}'.format(sum(test_dataset['OD cuffed LR'] <= test_dataset['OD cuffed ref']) / len(test_dataset)))
print('RF Adapted diameter: {:.2%}'.format(sum(test_dataset['OD cuffed RF'] <= test_dataset['OD cuffed ref']) / len(test_dataset)))
print('NN Adapted diameter: {:.2%}'.format(sum(test_dataset['OD cuffed NN'] <= test_dataset['OD cuffed ref']) / len(test_dataset)))
print()
print('LR Exact diameter: {:.2%}'.format(sum(test_dataset['OD cuffed LR'] == test_dataset['OD cuffed ref']) / len(test_dataset)))
print('RF Exact diameter: {:.2%}'.format(sum(test_dataset['OD cuffed RF'] == test_dataset['OD cuffed ref']) / len(test_dataset)))
print('NN Exact diameter: {:.2%}'.format(sum(test_dataset['OD cuffed NN'] == test_dataset['OD cuffed ref']) / len(test_dataset)))

In [None]:
def plot_loss(history, name):
    plt.plot(history.history['loss'], label='loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss (' + name + ')')
    plt.legend()
    plt.grid(True)
    
def plot_rmse(history, name):
    plt.plot(history.history['root_mean_squared_error'], label='rmse')
    plt.plot(history.history['val_root_mean_squared_error'], label='val_rmse')
    plt.xlabel('Epoch')
    plt.ylabel('Metric (' + name + ')')
    plt.legend()
    plt.grid(True)
    
print('MAE_val: {}'.format(min(history.history['val_mean_absolute_error'])))
print('RMSE_val: {}'.format(min(history.history['val_root_mean_squared_error'])))

plot_loss(history, 'MAE');
plt.show();
plot_rmse(history, 'RMSE');

In [None]:
test_dataset.head()