# Conditional density estimation with noise regularization

IMPORTANT NOTE:
The cde library will work only for tensorflow versions between 1.4 and 1.7.
Here I am using tensorflow_gpu in version 1.7.
Initially, we will predict the CDE of age given the MRI high-level features (DTI excluded for the moment).
As a density estimation model for the moment I choose MDN.

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from utils import create_dataset_mri
from cde.density_estimator import MixtureDensityNetwork
import numpy as np
import tensorflow as tf
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error

Instructions for updating:
Use the retry module or similar alternatives.


In [3]:
import tensorflow as tf
# torch version
print(tf.__version__)

1.7.0


In [4]:
# Create dataset
target = 'Age'
# use all MRI high-level features, no DTI
data = create_dataset_mri(SCORE = target)
# for the moment, remove the diagnosis colums
try:
    data.drop(columns=['DX_01_Cat'], inplace=True)
except:
    pass
try:
    data.drop(columns=['DX_01_Sub'], inplace=True)
except:
    pass
try:
    data.drop(columns=['DX_01'], inplace=True)
except:
    pass

In [5]:
# labels and features
y = data[target]
X = data.drop([target, 'ID'], axis=1)

In [6]:
print(X.shape)
print(y.shape)
y = np.array(y)
X = np.array(X)
y = y.reshape((1146,1))

(1146, 369)
(1146,)


In [7]:
# split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.22, random_state=42)

In [8]:
# Set model parameters
ndim_x=369
ndim_y=1
# We try the "faster decay rate for non-gaussian data" proposed in the paper: h = n^(-1/(d+1))
n = X.shape[0]
d = X.shape[1]+y.shape[1]
h = n**(-1/(d+1))
print(h)

0.9811924987692464


In [9]:
model = MixtureDensityNetwork('MDN', ndim_x, ndim_y, n_centers=10, hidden_sizes=(16, 16), hidden_nonlinearity=tf.nn.tanh,
               n_training_epochs=1000, x_noise_std=h, y_noise_std=h, adaptive_noise_fn=None, entropy_reg_coef=0.0,
               weight_decay=0.0, weight_normalization=True, data_normalization=True, dropout=0.0, l2_reg=0.0, l1_reg=0.0,
               random_seed=42)

In [10]:
model.fit(X_train, y_train)

1000/1000 [100%] ██████████████████████████████ Elapsed: 11s | loss: 1313.019
mean log-loss train: 1.4703


In [16]:
y_pred = model.mean_(X_test)
y_pred = y_pred.reshape((-1,1))
y_pred.shape

(253, 1)

In [19]:
y_pred

array([[ 9.86504173],
       [ 7.66265774],
       [ 7.37785149],
       [12.88258266],
       [10.84910107],
       [ 6.29711723],
       [ 6.00925112],
       [ 7.65075493],
       [ 8.8582077 ],
       [11.84934425],
       [15.25678921],
       [ 7.12550068],
       [11.75008297],
       [12.01037979],
       [ 9.29751682],
       [10.82301521],
       [ 7.17250156],
       [12.62624741],
       [ 7.9437933 ],
       [ 7.89524841],
       [ 8.08544064],
       [ 8.68732262],
       [ 9.20664024],
       [12.39980888],
       [ 8.14702988],
       [17.71029472],
       [10.34744263],
       [16.85252953],
       [ 7.22358322],
       [11.3884058 ],
       [ 6.19844437],
       [16.12165451],
       [15.25907135],
       [13.61967373],
       [10.03926849],
       [19.51402664],
       [12.89876842],
       [ 7.49852324],
       [17.07832336],
       [ 7.8304534 ],
       [13.6093483 ],
       [ 9.09165859],
       [ 6.66081667],
       [ 7.53991699],
       [17.0955658 ],
       [11

In [18]:
print('Test MSE: {}'.format(mean_squared_error(y_pred, y_test)))

Test MSE: 5.800377586364281
