In [1]:
%config IPCompleter.greedy=True
from IPython.display import IFrame

import os
import time

import pandas as pd
import numpy as np
import math

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

from sklearn import preprocessing as prep
import sklearn.model_selection as model_selection
from sklearn import cluster
from sklearn import mixture

import tensorflow as tf
from tensorflow import keras
import keras.backend as K

import h5py

Using TensorFlow backend.


In [2]:
# strategy = tf.distribute.MirroredStrategy()

In [3]:
TEST_IDS = [map_id.split('.')[0] for map_id in sorted(os.listdir('00_Data/fMRI_test'))]
TRAIN_IDS = [map_id.split('.')[0] for map_id in sorted(os.listdir('00_Data/fMRI_train'))]
ALL_IDS = TRAIN_IDS + TEST_IDS
REVEAL_IDS_S2 = pd.read_csv('00_Data/reveal_ID_site2.csv', dtype=str).values
NOREVEAL_IDS = [i for i in ALL_IDS if i not in REVEAL_IDS_S2]

In [4]:
print(len(ALL_IDS), len(REVEAL_IDS_S2), len(NOREVEAL_IDS))

11754 510 11244


In [5]:
data = pd.read_csv('00_Data/train_scores_full.csv')
data

Unnamed: 0,Id,age,domain1_var1,domain1_var2,domain2_var1,domain2_var2
0,10001,57.436077,30.571975,62.553736,53.325130,51.427998
1,10002,59.580851,50.969456,67.470628,60.651856,58.311361
2,10004,71.413018,53.152498,58.012103,52.418389,62.536641
3,10007,38.617381,49.197021,65.674285,40.151376,34.096421
4,10008,35.326582,15.769168,65.782269,44.643805,50.448485
...,...,...,...,...,...,...
5872,21654,53.103634,50.951656,62.168022,49.389400,53.020847
5873,21665,38.246437,48.018227,59.522285,45.697098,53.208160
5874,21674,69.414169,58.593918,60.298779,49.865669,47.863167
5875,21693,62.009209,54.272484,60.474388,52.325031,52.989803


In [6]:
def get_inputs(idx):
    #MRI inputs
    patient_SM = h5py.File('00_Data/fMRI_all/{0}.mat'.format(idx), mode='r')
    patient_SM = np.array(patient_SM.get('SM_feature'))
#     print(patient_SM.shape)
    k = 1
    ki_padding = 3
    
    arr_regions = []
    for i in range(patient_SM.shape[0]):
        sample_map = patient_SM[i,:,:,:]
        if k > 1:
            map_shape = sample_map.shape
            shape_pad = ((map_shape[0]//k + 1)*k - map_shape[0],
                         (map_shape[1]//k + 1)*k - map_shape[1],
                         (map_shape[2]//k + 1)*k - map_shape[2])

            npad = (((0 if shape_pad[0]%2==0 else shape_pad[0]//2), (shape_pad[0]//2 if shape_pad[0]%2==0 else shape_pad[0]//2+1)),    
                    ((0 if shape_pad[1]%2==0 else shape_pad[0]//2), (shape_pad[1]//2 if shape_pad[1]%2==0 else shape_pad[1]//2+1)),    
                    ((0 if shape_pad[2]%2==0 else shape_pad[0]//2), (shape_pad[2]//2 if shape_pad[2]%2==0 else shape_pad[2]//2+1)))

            sample_map_padded = np.pad(sample_map, pad_width=npad, mode='constant', constant_values=0)

            sx = sample_map_padded.shape[0] / k
            sy = sample_map_padded.shape[1] / k
            sz = sample_map_padded.shape[2] / k
            for kz in range(k):
                for ky in range(k):
                    for kx in range(k):
                        ki_region = sample_map_padded[int(kx*sx): int(kx*sx + sx - 1), 
                                                     int(ky*sy): int(ky*sy + sy - 1), 
                                                     int(kz*sz): int(kz*sz + sz - 1)]
                        #padding i-th region by 3 pixels
                        ki_region_padded = np.pad(ki_region, pad_width=ki_padding, mode='constant', constant_values=0)
                        arr_regions.append(ki_region_padded)
        else:
            map_shape = sample_map.shape
            shape_pad = ((map_shape[0]//2 + 1)*2 - map_shape[0],
                         (map_shape[1]//2 + 1)*2 - map_shape[1],
                         (map_shape[2]//2 + 1)*2 - map_shape[2])

            npad = (((0 if shape_pad[0]%2==0 else shape_pad[0]//2+1), (0 if shape_pad[0]%2==0 else shape_pad[0]//2+1)),    
                    ((0 if shape_pad[1]%2==0 else shape_pad[0]//2+1), (0 if shape_pad[1]%2==0 else shape_pad[1]//2+1)),    
                    ((0 if shape_pad[2]%2==0 else shape_pad[0]//2+1), (0 if shape_pad[2]%2==0 else shape_pad[2]//2+1)))

            sample_map_padded = np.pad(sample_map, pad_width=npad, mode='constant', constant_values=0)
            
#             sample_map_padded = np.pad(sample_map, pad_width=ki_padding, mode='constant', constant_values=0)
            arr_regions.append(sample_map_padded)
            
    X_mri = np.stack(arr_regions, axis=3)
#     print(X_mri.shape)
    return X_mri

In [7]:
# get_inputs('10002')[0].shape

In [8]:
def new_py_function(func, inp, Tout, name=None):
    
    def wrapped_func(*flat_inp):
        reconstructed_inp = tf.nest.pack_sequence_as(inp, flat_inp,
                                                     expand_composites=True)
        out = func(*reconstructed_inp)
        return tf.nest.flatten(out, expand_composites=True)
    
    flat_Tout = tf.nest.flatten(Tout, expand_composites=True)
    flat_out = tf.py_function(func=wrapped_func, 
                              inp=tf.nest.flatten(inp, expand_composites=True),
                              Tout=[_tensor_spec_to_dtype(v) for v in flat_Tout],
                              name=name)
    spec_out = tf.nest.map_structure(_dtype_to_tensor_spec, Tout, expand_composites=True)
    out = tf.nest.pack_sequence_as(spec_out, flat_out, expand_composites=True)
    return out

def _dtype_to_tensor_spec(v):
    return tf.TensorSpec(None, v) if isinstance(v, tf.dtypes.DType) else v

def _tensor_spec_to_dtype(v):
    return v.dtype if isinstance(v, tf.TensorSpec) else v

In [9]:
def get_dataset(data, batch_size):
    data = np.array([int(i) for i in data])
    data = tf.data.Dataset.from_tensor_slices(data)
    data = data.shuffle(buffer_size=12000, seed=30, reshuffle_each_iteration=True)
    
    data = data.map(lambda idx: new_py_function(get_inputs, inp=[idx], 
                                                    Tout=tf.TensorSpec(shape=(None, 52, 66, 56, 53), dtype=tf.dtypes.float64), 
                                                name=None), 
                     num_parallel_calls=tf.data.experimental.AUTOTUNE, 
                     deterministic=False)
    data = data.batch(batch_size, drop_remainder=False)
    data = data.prefetch(tf.data.experimental.AUTOTUNE)
    return data

In [10]:
batch_size = 8

ds_train = get_dataset(ALL_IDS, batch_size)
ds_reveal_s2 = get_dataset(REVEAL_IDS_S2, batch_size)
ds_noreveal = get_dataset(NOREVEAL_IDS, batch_size)

In [11]:
# for i in ds_train.take(1):
#     pass

In [12]:
INPUT_SHAPE_mri = (52, 66, 56, 53)

In [13]:
def create_model(input_shape, filters=[32, 16, 8, 2]):
    
    #============================================================================
    # ENCODER
    #============================================================================
    inputs_mri = keras.layers.Input(shape=INPUT_SHAPE_mri, name='inpupt_mri')

    # convolution block #1
    x = keras.layers.Conv3D(filters[0], kernel_size=(3, 3, 3), strides=(1,1,1), padding='same',
                                  kernel_initializer=keras.initializers.he_normal(seed=30),
                                  bias_initializer=keras.initializers.Constant(0.01))(inputs_mri)
    x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    x = keras.layers.Conv3D(filters[0], kernel_size=(3, 3, 3), strides=(1,1,1), padding='same',
                                  kernel_initializer=keras.initializers.he_normal(seed=30),
                                  bias_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    x = keras.layers.MaxPooling3D(pool_size=(2, 2, 2), strides=(2,2,2))(x)
#     x, p1_idx = tf.nn.max_pool_with_argmax(x, ksize=[2], strides=[2], padding='SAME', name="p1")
    x = keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, 
                                              beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros',
                                              moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, 
                                              beta_constraint=None, gamma_constraint=None)(x)

    # convolution block #2
    x = keras.layers.Conv3D(filters[1], kernel_size=(3, 3, 3), strides=(1,1,1), padding='same',
                                  kernel_initializer=keras.initializers.he_normal(seed=30),
                                  bias_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    x = keras.layers.Conv3D(filters[1], kernel_size=(3, 3, 3), strides=(1,1,1), padding='same',
                                  kernel_initializer=keras.initializers.he_normal(seed=30),
                                  bias_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    x = keras.layers.MaxPooling3D(pool_size=(2, 2, 2), strides=(2,2,2))(x)
    x = keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, 
                                              beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros',
                                              moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, 
                                              beta_constraint=None, gamma_constraint=None)(x)

    # convolution block #3
    x = keras.layers.Conv3D(filters[2], kernel_size=(3, 3, 3), strides=(1,1,1), padding='same',
                                  kernel_initializer=keras.initializers.he_normal(seed=30),
                                  bias_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    x = keras.layers.Conv3D(filters[2], kernel_size=(3, 3, 3), strides=(1,1,1), padding='same',
                                  kernel_initializer=keras.initializers.he_normal(seed=30),
                                  bias_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    x = keras.layers.MaxPooling3D(pool_size=(2, 2, 2), strides=(2,2,2))(x)
    x = keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, 
                                              beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros',
                                              moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, 
                                              beta_constraint=None, gamma_constraint=None)(x)

    # convolution block #4
#     x = keras.layers.Conv3D(filters[3], kernel_size=(3, 3, 3), strides=(1,1,1), padding='same',
#                                   kernel_initializer=keras.initializers.he_normal(seed=30),
#                                   bias_initializer=keras.initializers.Constant(0.01))(x)
#     x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
#     x = keras.layers.Conv3D(filters[3], kernel_size=(3, 3, 3), strides=(1,1,1), padding='same',
#                                   kernel_initializer=keras.initializers.he_normal(seed=30),
#                                   bias_initializer=keras.initializers.Constant(0.01))(x)
#     x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
#     x = keras.layers.MaxPooling3D(pool_size=(2, 2, 2), strides=(2,2,2))(x)
#     x = keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, 
#                                               beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros',
#                                               moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, 
#                                               beta_constraint=None, gamma_constraint=None)(x)
    

    flatten = keras.layers.Flatten(data_format='channels_last')(x)

    encoded = keras.layers.Dense(2,
                               kernel_initializer=keras.initializers.he_normal(seed=30),
                               bias_initializer=keras.initializers.Constant(5.))(flatten)
    encoded = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.5))(encoded)

    
    #============================================================================
    # DECODER
    #============================================================================
    x = keras.layers.Dense(filters[2]*int(input_shape[0]/8)*int(input_shape[1]/8)*int(input_shape[2]/8),
                           kernel_initializer=keras.initializers.he_normal(seed=30),
                           bias_initializer=keras.initializers.Constant(5.))(encoded)
    
    x = keras.layers.Reshape((int(input_shape[0]/8), int(input_shape[1]/8), int(input_shape[2]/8), filters[2]))(x)
    
    # convolution block #4
#     x = tf.keras.layers.UpSampling3D(size=(2, 2, 2))(x)
#     x = tf.keras.layers.Conv3DTranspose(filters[2], kernel_size=(1, 1, 2), strides=(1,1,1), padding='valid',
#                                         kernel_initializer=keras.initializers.he_normal(seed=30),
#                                         bias_initializer=keras.initializers.Constant(0.01))(x)
#     x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    
    # convolution block #3
    x = tf.keras.layers.UpSampling3D(size=(2, 2, 2))(x)
    x = tf.keras.layers.Conv3DTranspose(filters[2], kernel_size=(2, 1, 1), strides=(1,1,1), padding='valid',
                                        kernel_initializer=keras.initializers.he_normal(seed=30),
                                        bias_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.Conv3DTranspose(filters[2], kernel_size=(2, 1, 1), strides=(1,1,1), padding='same',
                                        kernel_initializer=keras.initializers.he_normal(seed=30),
                                        bias_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    
    # convolution block #2
    x = tf.keras.layers.UpSampling3D(size=(2, 2, 2))(x)
    x = tf.keras.layers.Conv3DTranspose(filters[1], kernel_size=(1, 2, 1), strides=(1,1,1), padding='valid',
                                        kernel_initializer=keras.initializers.he_normal(seed=30),
                                        bias_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.Conv3DTranspose(filters[1], kernel_size=(1, 2, 1), strides=(1,1,1), padding='same',
                                        kernel_initializer=keras.initializers.he_normal(seed=30),
                                        bias_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    
    # convolution block #1
    x = tf.keras.layers.UpSampling3D(size=(2, 2, 2))(x)
    x = tf.keras.layers.Conv3DTranspose(filters[0], kernel_size=(1, 1, 1), strides=(1,1,1), padding='valid',
                                        kernel_initializer=keras.initializers.he_normal(seed=30),
                                        bias_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.Conv3DTranspose(filters[0], kernel_size=(1, 1, 1), strides=(1,1,1), padding='same',
                                        kernel_initializer=keras.initializers.he_normal(seed=30),
                                        bias_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.Conv3DTranspose(input_shape[3], kernel_size=(1, 1, 1), strides=(1,1,1), padding='valid',
                                        kernel_initializer=keras.initializers.he_normal(seed=30),
                                        bias_initializer=keras.initializers.Constant(0.01))(x)
    x = tf.keras.layers.PReLU(alpha_initializer=keras.initializers.Constant(0.01))(x)
    
    decoded = x
    
    #============================================================================
    # COMPILE
    #============================================================================
    autoencoder = keras.Model(inputs=inputs_mri, outputs=decoded, name='autoencoder')
    encoder = keras.Model(inputs=inputs_mri, outputs=encoded, name='encoder')

    optim = tf.keras.optimizers.Adadelta(learning_rate=0.001, rho=0.95)

    METRICS = [keras.metrics.RootMeanSquaredError(name='rmse')]

    autoencoder.compile(loss='mse', metrics=METRICS, optimizer=optim)
    return autoencoder, encoder

In [14]:
autoencoder, encoder = create_model(INPUT_SHAPE_mri)

In [15]:
autoencoder.summary()

Model: "autoencoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inpupt_mri (InputLayer)      [(None, 52, 66, 56, 53)]  0         
_________________________________________________________________
conv3d (Conv3D)              (None, 52, 66, 56, 32)    45824     
_________________________________________________________________
p_re_lu (PReLU)              (None, 52, 66, 56, 32)    6150144   
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 52, 66, 56, 32)    27680     
_________________________________________________________________
p_re_lu_1 (PReLU)            (None, 52, 66, 56, 32)    6150144   
_________________________________________________________________
max_pooling3d (MaxPooling3D) (None, 26, 33, 28, 32)    0         
_________________________________________________________________
batch_normalization (BatchNo (None, 26, 33, 28, 32)    

In [16]:
autoencoder.load_weights('./99_Training_checkpoints/mri_clustering/run_02/model_weights_02.h5')

In [17]:
y_reveal_s2_enc = np.genfromtxt('y_reveal_s2_enc.csv', delimiter=',')
y_noreveal_enc = np.genfromtxt('y_noreveal_enc.csv', delimiter=',')

y_reveal_s2_enc_mean = np.genfromtxt('y_reveal_s2_enc_mean.csv', delimiter=',')
y_noreveal_enc_mean = np.genfromtxt('y_noreveal_enc_mean.csv', delimiter=',')

In [18]:
y_reveal_s2_enc_mean

array([-0.1474206 , -0.07687219])

In [19]:
y_noreveal_enc_mean

array([-0.10948444, -0.10004571])

In [20]:
print(y_reveal_s2_enc.shape, y_noreveal_enc.shape)

(510, 2) (11244, 2)


In [21]:
y_all = np.append(y_reveal_s2_enc, y_noreveal_enc, axis=0)

In [22]:
y_all.shape

(11754, 2)

In [23]:
n_clusters = 2

In [24]:
class ClusteringLayer(keras.layers.Layer):
    """
    Clustering layer converts input sample (feature) to soft label.

    # Example
    ```
        model.add(ClusteringLayer(n_clusters=10))
    ```
    # Arguments
        n_clusters: number of clusters.
        weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
        alpha: degrees of freedom parameter in Student's t-distribution. Default to 1.0.
    # Input shape
        2D tensor with shape: `(n_samples, n_features)`.
    # Output shape
        2D tensor with shape: `(n_samples, n_clusters)`.
    """

    def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(ClusteringLayer, self).__init__(**kwargs)
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initial_weights = weights
        self.input_spec = keras.layers.InputSpec(ndim=2)

    def build(self, input_shape):
        assert len(input_shape) == 2
        input_dim = input_shape[1]
        self.input_spec = keras.layers.InputSpec(dtype=K.floatx(), shape=(None, input_dim))
        self.clusters = self.add_weight(shape=(self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def call(self, inputs, **kwargs):
        """ student t-distribution, as same as used in t-SNE algorithm.        
                 q_ij = 1/(1+dist(x_i, µ_j)^2), then normalize it.
                 q_ij can be interpreted as the probability of assigning sample i to cluster j.
                 (i.e., a soft assignment)
        Arguments:
            inputs: the variable containing data, shape=(n_samples, n_features)
        Return:
            q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
        """
        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
        q **= (self.alpha + 1.0) / 2.0
        q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # Make sure each sample's 10 values add up to 1.
        return q

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 2
        return input_shape[0], self.n_clusters

    def get_config(self):
        config = {'n_clusters': self.n_clusters}
        base_config = super(ClusteringLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [25]:
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
model = keras.Model(inputs=encoder.input, outputs=[clustering_layer, autoencoder.output])

In [26]:
kmeans = cluster.KMeans(n_clusters=n_clusters, init=np.array([y_reveal_s2_enc_mean, y_noreveal_enc_mean]))
y_pred_km = kmeans.fit_predict(y_all)
kmeans.cluster_centers_

  return self.fit(X, sample_weight=sample_weight).labels_


array([[-0.27640564, -0.22242   ],
       [ 0.19237857,  0.12753383]])

In [27]:
gm = mixture.GaussianMixture(n_components=2, 
                             covariance_type='full', 
                             tol=0.0001, 
                             reg_covar=1e-06, 
                             max_iter=1000, 
                             n_init=1, 
                             init_params='kmeans', 
                             weights_init=[1-len(y_reveal_s2_enc)/len(y_all),
                                           1-len(y_noreveal_enc)/len(y_all)], 
                             means_init=[y_reveal_s2_enc_mean, y_noreveal_enc_mean], 
                             precisions_init=None, 
                             random_state=30, 
                             verbose=0, 
                             verbose_interval=10)
y_pred_gm = gm.fit_predict(y_all)
gm.means_

array([[ 0.04507672,  0.05673662],
       [-0.22917223, -0.21675597]])

In [28]:
init_centers_km = kmeans.cluster_centers_
init_centers_gm = gm.means_

In [29]:
model.get_layer(name='clustering').set_weights([init_centers_gm])
optim = tf.keras.optimizers.Adadelta(learning_rate=0.001, rho=0.95)
model.compile(loss=['kld', 'mse'], optimizer=optim)

In [30]:
y_pred_last = np.copy(y_pred_gm)

In [31]:
# loss_object = tf.keras.losses.MeanSquaredError(reduction=losses_utils.ReductionV2.AUTO, name='mse')

In [32]:
# # computing an auxiliary target distribution
# def target_distribution(q):
#     weight = q ** 2 / q.sum(0)
#     return (weight.T / weight.sum(1)).T

In [33]:
# def loss(model, x, y, training):
#     # computing an auxiliary target distribution
#     def target_distribution(q):
#         weight = q ** 2 / q.sum(0)
#         return (weight.T / weight.sum(1)).T
    
#     # training=training is needed only if there are layers with different
#     # behavior during training versus inference (e.g. Dropout).
#     q, _ = model(x, training=training)
#     p = target_distribution(q)  # update the auxiliary target distribution p
    
#     return loss_object(y_true=y, y_pred=y_)

In [34]:
# def grad(model, inputs, targets):
#     with tf.GradientTape() as tape:
#         loss_value = loss(model, inputs, targets, training=True)
#     return loss_value, tape.gradient(loss_value, model.trainable_variables)

In [35]:
def target_distribution(q):
    weight = q ** 2 / q.sum(0)
    print(weight.shape)
    return (weight.T / weight.sum(1)).T

In [36]:
q, _  = model.predict(ds_train, verbose=1)

  16/1470 [..............................] - ETA: 59:54  

ResourceExhaustedError:  OOM when allocating tensor with shape[8,32,52,66,56] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node model/conv3d/Conv3D (defined at <ipython-input-36-15bfa8924b98>:1) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_predict_function_2181]

Function call stack:
predict_function


In [None]:
p = target_distribution(q)

In [37]:
loss = 0
index = 0
# maxiter = 1470
update_interval = 64
# index_array = np.arange(x.shape[0])

In [38]:
tol = 0.001 # tolerance threshold to stop training

In [None]:
# for ite in range(int(maxiter)):
for step, x_batch in enumerate(ds_train):
    if step % update_interval == 0:
        q = model.predict(x, verbose=0)
        p = target_distribution(q)  # update the auxiliary target distribution p

        # evaluate the clustering performance
        y_pred = q.argmax(1)
        if y is not None:
            acc = np.round(metrics.acc(y, y_pred), 5)
            nmi = np.round(metrics.nmi(y, y_pred), 5)
            ari = np.round(metrics.ari(y, y_pred), 5)
            loss = np.round(loss, 5)
            print('Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' % (ite, acc, nmi, ari), ' ; loss=', loss)

        # check stop criterion
        delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
        y_pred_last = np.copy(y_pred)
        if ite > 0 and delta_label < tol:
            print('delta_label ', delta_label, '< tol ', tol)
            print('Reached tolerance threshold. Stopping training.')
            break
    idx = index_array[index * batch_size: min((index+1) * batch_size, x.shape[0])]
    loss = model.train_on_batch(x=x[idx], y=p[idx])
    index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0

model.save_weights(save_dir + '/conv_DEC_model_final.h5')

In [None]:
epochs = 2
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(ds_train):

        # Open a GradientTape to record the operations run
        # during the forward pass, which enables autodifferentiation.
        with tf.GradientTape() as tape:

            # Run the forward pass of the layer.
            # The operations that the layer applies
            # to its inputs are going to be recorded
            # on the GradientTape.
            logits = model(x_batch_train, training=True)  # Logits for this minibatch

            # Compute the loss value for this minibatch.
            loss_value = loss_fn(y_batch_train, logits)

        # Use the gradient tape to automatically retrieve
        # the gradients of the trainable variables with respect to the loss.
        grads = tape.gradient(loss_value, model.trainable_weights)

        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %s samples" % ((step + 1) * 64))