# Neural network models with Gaussian noise regularization

In [1]:
#libraries

#tensorflow-related
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow import feature_column

#basic
import numpy as np
import pandas as pd

#sklearn-related
from sklearn.model_selection import train_test_split

#custom
import utils

In [2]:
tf.keras.backend.clear_session()  # For easy reset of notebook state.
#tf.keras.backend.set_floatx('float64')

In [3]:
# import data: MRI/EEG/...

dataframe = utils.create_dataset_mri(select_disease = None, select_category = None, SCORE = 'Age', thickness= True, volume=True, subcortical=True, DTI = False)

  if (await self.run_code(code, result,  async_=asy)):


In [4]:
# detect which columns have nan samples and find which samples are those

def find_columns_with_nan_samples(dataframe): 
    
    # dataframe is a pandas DataFrame
    
    columns_nans = {}
    for column in dataframe.columns: 
        nan_col_vector = dataframe[column].isna()
        samples_with_nan = []
        if nan_col_vector.values.any(): 
            for index, value in enumerate(nan_col_vector):
                if value == True: 
                    samples_with_nan.append(index)
            columns_nans[column] =  samples_with_nan
    return columns_nans


In [5]:
# find columns with object as dtype

def find_columns_with_dtype_object(dataframe):
    
    columns_object_type = []
    for column in dataframe.columns:
        if dataframe[column].dtypes == 'object': 
            columns_object_type.append(column)
    return columns_object_type
    

In [6]:
cols_with_nans = find_columns_with_nan_samples(dataframe)
print(cols_with_nans)

obj_cols = find_columns_with_dtype_object(dataframe)
print(obj_cols)

{'DX_01_Cat': [1110], 'DX_01_Sub': [0, 1, 4, 8, 10, 11, 14, 17, 18, 19, 20, 21, 27, 28, 29, 30, 33, 37, 40, 41, 42, 44, 45, 49, 50, 52, 53, 55, 58, 59, 60, 64, 65, 68, 69, 71, 72, 74, 76, 77, 79, 80, 81, 82, 83, 87, 88, 93, 94, 95, 98, 99, 100, 101, 105, 107, 110, 112, 113, 115, 116, 117, 118, 119, 120, 121, 123, 124, 129, 131, 132, 133, 136, 137, 144, 147, 152, 160, 161, 162, 165, 166, 169, 170, 174, 180, 185, 187, 189, 191, 192, 194, 196, 197, 199, 200, 203, 204, 211, 212, 213, 216, 220, 221, 222, 225, 226, 232, 236, 237, 239, 240, 241, 242, 243, 244, 245, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 263, 265, 266, 267, 268, 270, 271, 277, 279, 280, 281, 282, 283, 287, 288, 289, 290, 292, 295, 296, 297, 300, 301, 305, 307, 310, 312, 313, 314, 316, 317, 318, 320, 321, 323, 327, 328, 329, 330, 333, 339, 340, 351, 352, 354, 355, 356, 357, 358, 362, 363, 365, 366, 368, 376, 377, 381, 382, 385, 386, 388, 393, 397, 403, 404, 405, 409, 410, 411, 413, 415, 419, 425, 426, 431, 432, 434, 

Then one sees what columns with NaN and/or 'object' as type one wants to keep. In this case kept 'DX_01_Cat'.

In [7]:
# drop samples that are nan in selected columns 
for sample in cols_with_nans['DX_01_Cat']:
    dataframe = dataframe.drop(sample, axis=0)

In [8]:
# convert object variables into categorical variables (if they don't have too much NaN)
dataframe['DX_01_Cat'] = pd.Categorical(dataframe['DX_01_Cat'])
dataframe['DX_01_Cat'] = dataframe['DX_01_Cat'].cat.codes

In [9]:
# drop other columns with NaNs and/or 'object' as type + 'ID'
id_column = dataframe.pop('ID')
dataframe = dataframe.drop(['DX_01_Sub', 'DX_01'], axis=1)

In [10]:
# print dtype of the columns: useful for feature_columns
dataframe.dtypes

Age                               float64
DX_01_Cat                            int8
lh_G.S_frontomargin_thickness     float64
lh_G.S_occipital_inf_thickness    float64
lh_G.S_paracentral_thickness      float64
                                   ...   
rh_frontalpole_volume               int64
rh_temporalpole_volume              int64
rh_transversetemporal_volume        int64
rh_insula_volume                    int64
GlobalCorticalThickness           float64
Length: 371, dtype: object

## Model 

In [11]:
# SELECT TARGET VARIABLE
target_variable = 'Age'     #header of the variable to use as label (target), i.e. value to be predicted

# HYPERPARAMETERS
Gaussian_regularization = True
std_dev = tf.Variable(1.0)     # std deviation of added Gaussian noise

# PIPELINE PARAMETERS
batch_size_train = 8
batch_size_eval = 8
n_epochs = 2
loss ='mse'          # mean squared error
metrics =['mae']    # mean absolute error
test_set_size = 0.2

In [12]:

dataframe_c = dataframe.copy()
target = dataframe_c.pop(target_variable)

#TRAIN/TEST SPLIT
dataframe_train, dataframe_test, target_train, target_test = train_test_split(dataframe_c, target, test_size=test_set_size, shuffle=True)

# DATASET API 
train_dataset = tf.data.Dataset.from_tensor_slices((dict(dataframe_train), target_train)).batch(batch_size_train)
test_dataset = tf.data.Dataset.from_tensor_slices((dict(dataframe_test), target_test)).batch(batch_size_eval)

#FEATURE LAYER 
#columns are all numeric 
feature_columns = []

for header in dataframe_c.columns:
    feature_columns.append(feature_column.numeric_column(header))
    
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [13]:
#CREATE MODEL
model = tf.keras.Sequential()
model.add(feature_layer)
if Gaussian_regularization == True:
    model.add(layers.GaussianNoise(std_dev))        # Gaussian Noise regularization layer
model.add(layers.Dense(64, activation='tanh'))
model.add(layers.Dense(64, activation='tanh'))
model.add(layers.Dense(1, activation="linear"))


#CONFIGURE MODEL
model.compile(optimizer=tf.keras.optimizers.Adam(0.01),
              loss=loss,      
              metrics=metrics)  

In [14]:
#TRAIN MODEL
print('# Fit model on training data')
history = model.fit(train_dataset, epochs=n_epochs)

print ('Losses:', history.history['loss'])

# Fit model on training data


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/2
Epoch 2/2
Losses: [16.234350142000025, 14.101581378795174]


In [15]:
# TEST MODEL
print('\n# Evaluate on test data')
mse, mae = model.evaluate(test_dataset)
print('mse, mae:', mse, mae)


# Evaluate on test data
mse, mae: 14.100794611306027 2.909834
