# Conditional density estimation with noise regularization

In [1]:
import warnings
warnings.filterwarnings('ignore')
from utils import create_dataset_mri, cv_for_cde, create_dataset_eeg
from cde.density_estimator import MixtureDensityNetwork
import numpy as np
import tensorflow as tf
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from tensorflow.python.keras.activations import tanh
from sklearn.impute import SimpleImputer
import pandas as pd

Instructions for updating:
Use the retry module or similar alternatives.


In [2]:
import tensorflow as tf
# torch version
print(tf.__version__)

1.7.0


## Structural MRI

In [3]:
# Create dataset
target = 'Age'
# use all MRI high-level features, no DTI
data = create_dataset_mri(SCORE = target)
# for the moment, remove the diagnosis colums
data.drop(columns=['DX_01_Cat', 'DX_01_Sub', 'DX_01'], inplace=True)

In [4]:
# labels and features
y = data[target]
X = data.drop([target, 'ID'], axis=1)
print(X.shape)
print(y.shape)
y = np.array(y)
X = np.array(X)
y = y.reshape((-1,1))

# split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.22, random_state=2)

(1146, 369)
(1146,)


In [5]:
# Set model parameters
ndim_x=X_train.shape[1]
ndim_y=y_train.shape[1]
# We try the "faster decay rate for non-gaussian data" proposed in the paper: h = n^(-1/(d+1))
n = X_train.shape[0]
d = X_train.shape[1]+y_train.shape[1]
h = n**(-1/(d+1))
print('h = {}'.format(h))

# Define the model
model = MixtureDensityNetwork('MDN_mri', ndim_x, ndim_y, n_centers=10, hidden_sizes=(16, 16), hidden_nonlinearity=tf.nn.tanh,
               n_training_epochs=1000, x_noise_std=h, y_noise_std=h, adaptive_noise_fn=None, entropy_reg_coef=0.0,
               weight_decay=0.0, weight_normalization=True, data_normalization=True, dropout=0.0, l2_reg=0.0, l1_reg=0.0,
               random_seed=42)
# Fit
model.fit(X_train, y_train)
# Predict
y_pred = model.mean_(X_test)
y_pred = y_pred.reshape((-1,1))
y_pred.shape
print('Test MSE: {}'.format(mean_squared_error(y_pred, y_test)))

h = 0.9818524371920933
1000/1000 [100%] ██████████████████████████████ Elapsed: 10s | loss: 1300.356
mean log-loss train: 1.4562
Test MSE: 4.840087141773545


In [8]:
# CV results
res = cv_for_cde(X_train, y_train.flatten(), 'mri', h, n_splits=5)
np.mean(res)

1000/1000 [100%] ██████████████████████████████ Elapsed: 11s | loss: 1065.688
mean log-loss train: 1.4926
MSE: 4.645851777119789
1000/1000 [100%] ██████████████████████████████ Elapsed: 11s | loss: 1024.148
mean log-loss train: 1.4344
MSE: 4.778558200695333
1000/1000 [100%] ██████████████████████████████ Elapsed: 12s | loss: 1045.872
mean log-loss train: 1.4648
MSE: 5.456636122440835
1000/1000 [100%] ██████████████████████████████ Elapsed: 12s | loss: 1014.773
mean log-loss train: 1.4193
MSE: 6.753902714941601
1000/1000 [100%] ██████████████████████████████ Elapsed: 12s | loss: 1052.664
mean log-loss train: 1.4723
MSE: 4.895140938532837


5.30601795074608

## Structural MRI + DTI

In [9]:
# Create dataset
target = 'Age'
# use all MRI high-level features, plus DTI
data = create_dataset_mri(SCORE = target, DTI = True)
# for the moment, remove the diagnosis colums
data.drop(columns=['DX_01_Cat', 'DX_01_Sub', 'DX_01'], inplace=True)

In [10]:
# labels and features
y = data[target]
X = data.drop([target, 'ID'], axis=1)
print(X.shape)
print(y.shape)
y = np.array(y)
X = np.array(X)
y = y.reshape((-1,1))
# Impute missing DTI values
imp = SimpleImputer(strategy = 'median')
X = imp.fit_transform(X)
# split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.22, random_state=2)

(838, 390)
(838,)


In [11]:
# Set model parameters
ndim_x=X_train.shape[1]
ndim_y=y_train.shape[1]
# We try the "faster decay rate for non-gaussian data" proposed in the paper: h = n^(-1/(d+1))
n = X.shape[0]
d = X.shape[1]+y.shape[1]
h = n**(-1/(d+1))
# Define the model
model = MixtureDensityNetwork('MDN_dti', ndim_x, ndim_y, n_centers=10, hidden_sizes=(16, 16), hidden_nonlinearity=tf.nn.tanh,
               n_training_epochs=1000, x_noise_std=h, y_noise_std=h, adaptive_noise_fn=None, entropy_reg_coef=0.0,
               weight_decay=0.0, weight_normalization=True, data_normalization=True, dropout=0.0, l2_reg=0.0, l1_reg=0.0,
               random_seed=42)
# Fit
model.fit(X_train, y_train)
# Predict
y_pred = model.mean_(X_test)
y_pred = y_pred.reshape((-1,1))
y_pred.shape
print('Test MSE: {}'.format(mean_squared_error(y_pred, y_test)))

1000/1000 [100%] ██████████████████████████████ Elapsed: 12s | loss: 950.702
mean log-loss train: 1.4559
Test MSE: 4.517503224792356


In [12]:
# CV results
res = cv_for_cde(X_train, y_train.flatten(), 'dti', h, n_splits=5)
np.mean(res)

1000/1000 [100%] ██████████████████████████████ Elapsed: 13s | loss: 788.403
mean log-loss train: 1.5104
MSE: 5.876351321458095
1000/1000 [100%] ██████████████████████████████ Elapsed: 14s | loss: 730.318
mean log-loss train: 1.3991
MSE: 5.383017929219776
1000/1000 [100%] ██████████████████████████████ Elapsed: 14s | loss: 769.633
mean log-loss train: 1.4744
MSE: 4.347566019282965
1000/1000 [100%] ██████████████████████████████ Elapsed: 14s | loss: 757.914
mean log-loss train: 1.4492
MSE: 7.485911999841195
1000/1000 [100%] ██████████████████████████████ Elapsed: 16s | loss: 748.438
mean log-loss train: 1.4310
MSE: 4.518440574361241


5.5222575688326545

## EEG

In [13]:
# Create dataset
target = 'Age'
# eeg cluster features
data = create_dataset_eeg(SCORE = target, clusters = True)
# for the moment, remove the diagnosis colums
data.drop(columns=['DX_01_Cat', 'DX_01_Sub', 'DX_01'], inplace=True)
# Features and labels
y = data[target]
X = data.drop([target, 'id'], axis=1)
y = np.array(y)
X = np.array(X)
print(X.shape)
y = y.reshape((-1,1))
# Impute missing EEG values
imp = SimpleImputer(strategy = 'median')
X = imp.fit_transform(X)
# split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.22, random_state=2)

(1306, 300)


In [14]:
# Set model parameters
ndim_x=X_train.shape[1]
ndim_y=y_train.shape[1]
# We try the "faster decay rate for non-gaussian data" proposed in the paper: h = n^(-1/(d+1))
n = X.shape[0]
d = X.shape[1]+y.shape[1]
h = n**(-1/(d+1))
print('h = {}'.format(h))
# Define the model
model = MixtureDensityNetwork('MDN_eeg', ndim_x, ndim_y, n_centers=10, hidden_sizes=(16, 16), hidden_nonlinearity=tf.nn.tanh,
               n_training_epochs=1000, x_noise_std=h, y_noise_std=h, adaptive_noise_fn=None, entropy_reg_coef=0.0,
               weight_decay=0.0, weight_normalization=True, data_normalization=True, dropout=0.0, l2_reg=0.0, l1_reg=0.0,
               random_seed=42)
# Fit
model.fit(X_train, y_train)
# Predict
y_pred = model.mean_(X_test)
y_pred = y_pred.reshape((-1,1))
y_pred.shape
print('Test MSE: {}'.format(mean_squared_error(y_pred, y_test)))

h = 0.9765226193230893
1000/1000 [100%] ██████████████████████████████ Elapsed: 17s | loss: 1542.786
mean log-loss train: 1.5155
Test MSE: 5.594303000894464


In [15]:
# CV results
res = cv_for_cde(X_train, y_train.flatten(), 'eeg', h, n_splits=5)
np.mean(res)

1000/1000 [100%] ██████████████████████████████ Elapsed: 17s | loss: 1261.812
mean log-loss train: 1.5501
MSE: 5.361740153461321
1000/1000 [100%] ██████████████████████████████ Elapsed: 18s | loss: 1251.756
mean log-loss train: 1.5378
MSE: 6.201627949652423
1000/1000 [100%] ██████████████████████████████ Elapsed: 19s | loss: 1206.154
mean log-loss train: 1.4818
MSE: 6.097960528900607
1000/1000 [100%] ██████████████████████████████ Elapsed: 20s | loss: 1237.301
mean log-loss train: 1.5182
MSE: 6.487517571261879
1000/1000 [100%] ██████████████████████████████ Elapsed: 22s | loss: 1266.509
mean log-loss train: 1.5540
MSE: 5.262103052869523


5.8821898512291515

# Comments

Not necessary to scale the input data (the model already does normalization).
Note that with DTI we obtain results similar to pure MRI. EEG data gives the worst results.