# Conditional density estimation with noise regularization

In [1]:
import warnings
warnings.filterwarnings('ignore')
from utils import create_dataset_mri, cv_for_cde, create_dataset_eeg
from cde.density_estimator import MixtureDensityNetwork
import numpy as np
import tensorflow as tf
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from tensorflow.python.keras.activations import tanh
from sklearn.impute import SimpleImputer
import pandas as pd

Instructions for updating:
Use the retry module or similar alternatives.


In [2]:
import tensorflow as tf
# torch version
print(tf.__version__)

1.7.0


## Structural MRI

In [3]:
# Create dataset
target = 'Age'
# use all MRI high-level features, no DTI
data = create_dataset_mri(SCORE = target)
# for the moment, remove the diagnosis colums
data.drop(columns=['DX_01_Cat', 'DX_01_Sub', 'DX_01'], inplace=True)

In [4]:
# labels and features
y = data[target]
X = data.drop([target, 'ID'], axis=1)
print(X.shape)
print(y.shape)
y = np.array(y)
X = np.array(X)
y = y.reshape((-1,1))

# split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.22, random_state=2)

(1146, 369)
(1146,)


In [5]:
# Set model parameters
ndim_x=X_train.shape[1]
ndim_y=y_train.shape[1]
# We try the "faster decay rate for non-gaussian data" proposed in the paper: h = n^(-1/(d+1))
n = X_train.shape[0]
d = X_train.shape[1]+y_train.shape[1]
h = n**(-1/(d+1))
print('h = {}'.format(h))

# Define the model
model = MixtureDensityNetwork('MDN_mri', ndim_x, ndim_y, n_centers=10, hidden_sizes=(16, 16), hidden_nonlinearity=tf.nn.tanh,
               n_training_epochs=1000, x_noise_std=h, y_noise_std=h, adaptive_noise_fn=None, entropy_reg_coef=0.0,
               weight_decay=0.0, weight_normalization=True, data_normalization=True, dropout=0.0, l2_reg=0.0, l1_reg=0.0,
               random_seed=42)
# Fit
model.fit(X_train, y_train)
# Predict
y_pred = model.mean_(X_test)
y_pred = y_pred.reshape((-1,1))
y_pred.shape
print('Test MSE: {}'.format(mean_squared_error(y_pred, y_test)))

h = 0.9818524371920933
1000/1000 [100%] ██████████████████████████████ Elapsed: 11s | loss: 1300.356
mean log-loss train: 1.4562
Test MSE: 4.840087141773545


In [6]:
# CV results
res = cv_for_cde(data=X_train, labels=y_train.flatten(), name = 'mri', n_splits=5)
np.mean(res)

1000/1000 [100%] ██████████████████████████████ Elapsed: 10s | loss: -469.010
mean log-loss train: -0.6569
MSE: 5.056738633647421
1000/1000 [100%] ██████████████████████████████ Elapsed: 20s | loss: -484.294
mean log-loss train: -0.6783
MSE: 4.961049731611744
1000/1000 [100%] ██████████████████████████████ Elapsed: 10s | loss: -519.910
mean log-loss train: -0.7282
MSE: 6.214937640528559
1000/1000 [100%] ██████████████████████████████ Elapsed: 10s | loss: -495.980
mean log-loss train: -0.6937
MSE: 5.245402047576165
1000/1000 [100%] ██████████████████████████████ Elapsed: 11s | loss: -526.526
mean log-loss train: -0.7364
MSE: 6.50257150902764


5.596139912478305

## Structural MRI + DTI

In [7]:
# Create dataset
target = 'Age'
# use all MRI high-level features, plus DTI
data = create_dataset_mri(SCORE = target, DTI = True)
# for the moment, remove the diagnosis colums
data.drop(columns=['DX_01_Cat', 'DX_01_Sub', 'DX_01'], inplace=True)

In [8]:
# labels and features
y = data[target]
X = data.drop([target, 'ID'], axis=1)
print(X.shape)
print(y.shape)
y = np.array(y)
X = np.array(X)
y = y.reshape((-1,1))
# Impute missing DTI values
imp = SimpleImputer(strategy = 'median')
X = imp.fit_transform(X)
# split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.22, random_state=2)

(838, 390)
(838,)


In [9]:
# Set model parameters
ndim_x=X_train.shape[1]
ndim_y=y_train.shape[1]
# We try the "faster decay rate for non-gaussian data" proposed in the paper: h = n^(-1/(d+1))
n = X.shape[0]
d = X.shape[1]+y.shape[1]
h = n**(-1/(d+1))
# Define the model
model = MixtureDensityNetwork('MDN_dti', ndim_x, ndim_y, n_centers=10, hidden_sizes=(16, 16), hidden_nonlinearity=tf.nn.tanh,
               n_training_epochs=1000, x_noise_std=h, y_noise_std=h, adaptive_noise_fn=None, entropy_reg_coef=0.0,
               weight_decay=0.0, weight_normalization=True, data_normalization=True, dropout=0.0, l2_reg=0.0, l1_reg=0.0,
               random_seed=42)
# Fit
model.fit(X_train, y_train)
# Predict
y_pred = model.mean_(X_test)
y_pred = y_pred.reshape((-1,1))
y_pred.shape
print('Test MSE: {}'.format(mean_squared_error(y_pred, y_test)))

1000/1000 [100%] ██████████████████████████████ Elapsed: 11s | loss: 950.702
mean log-loss train: 1.4559
Test MSE: 4.517503224792356


In [10]:
# CV results
res = cv_for_cde(data=X_train, labels=y_train.flatten(), n_splits=5, name = 'dti')
np.mean(res)

1000/1000 [100%] ██████████████████████████████ Elapsed: 11s | loss: -383.187
mean log-loss train: -0.7341
MSE: 4.859856948265746
1000/1000 [100%] ██████████████████████████████ Elapsed: 12s | loss: -387.726
mean log-loss train: -0.7428
MSE: 6.701996293895531
1000/1000 [100%] ██████████████████████████████ Elapsed: 13s | loss: -415.854
mean log-loss train: -0.7967
MSE: 6.577864446742694
1000/1000 [100%] ██████████████████████████████ Elapsed: 14s | loss: -384.318
mean log-loss train: -0.7348
MSE: 6.590415030457063
1000/1000 [100%] ██████████████████████████████ Elapsed: 15s | loss: -370.332
mean log-loss train: -0.7081
MSE: 4.778028214741852


5.901632186820577

## EEG

In [11]:
# Create dataset
target = 'Age'
# eeg cluster features
data = create_dataset_eeg(SCORE = target, clusters = True)
# for the moment, remove the diagnosis colums
data.drop(columns=['DX_01_Cat', 'DX_01_Sub', 'DX_01'], inplace=True)
# Features and labels
y = data[target]
X = data.drop([target, 'id'], axis=1)
y = np.array(y)
X = np.array(X)
print(X.shape)
y = y.reshape((-1,1))
# Impute missing EEG values
imp = SimpleImputer(strategy = 'median')
X = imp.fit_transform(X)
# split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.22, random_state=2)

(1306, 300)


In [12]:
# Set model parameters
ndim_x=X_train.shape[1]
ndim_y=y_train.shape[1]
# We try the "faster decay rate for non-gaussian data" proposed in the paper: h = n^(-1/(d+1))
n = X.shape[0]
d = X.shape[1]+y.shape[1]
h = n**(-1/(d+1))
print('h = {}'.format(h))
# Define the model
model = MixtureDensityNetwork('MDN_eeg', ndim_x, ndim_y, n_centers=10, hidden_sizes=(16, 16), hidden_nonlinearity=tf.nn.tanh,
               n_training_epochs=1000, x_noise_std=h, y_noise_std=h, adaptive_noise_fn=None, entropy_reg_coef=0.0,
               weight_decay=0.0, weight_normalization=True, data_normalization=True, dropout=0.0, l2_reg=0.0, l1_reg=0.0,
               random_seed=42)
# Fit
model.fit(X_train, y_train)
# Predict
y_pred = model.mean_(X_test)
y_pred = y_pred.reshape((-1,1))
y_pred.shape
print('Test MSE: {}'.format(mean_squared_error(y_pred, y_test)))

h = 0.9765226193230893
1000/1000 [100%] ██████████████████████████████ Elapsed: 49s | loss: 1542.786
mean log-loss train: 1.5155
Test MSE: 5.594303000894464


In [13]:
# CV results
res = cv_for_cde(data=X_train, labels=y_train.flatten(), n_splits=5, name = 'eeg')
np.mean(res)

1000/1000 [100%] ██████████████████████████████ Elapsed: 67s | loss: -179.687
mean log-loss train: -0.2207
MSE: 6.189956021311957
1000/1000 [100%] ██████████████████████████████ Elapsed: 58s | loss: -234.595
mean log-loss train: -0.2882
MSE: 7.306372974784494
1000/1000 [100%] ██████████████████████████████ Elapsed: 59s | loss: -239.557
mean log-loss train: -0.2943
MSE: 6.0561494244488205
1000/1000 [100%] ██████████████████████████████ Elapsed: 70s | loss: -199.253
mean log-loss train: -0.2445
MSE: 5.789622660751444
1000/1000 [100%] ██████████████████████████████ Elapsed: 67s | loss: -248.613
mean log-loss train: -0.3050
MSE: 5.778252691272942


6.2240707545139315

# Comments

Not necessary to scale the input data (the model already does normalization).
Note that with DTI we obtain results similar to pure MRI. EEG data gives the worst results.