In [1]:
import os
import sys
import math
import argparse
import numpy as np
import pandas as pd
from collections import defaultdict, Counter
from sklearn.decomposition import PCA
from typing import Dict, List, Tuple, Iterable, Union, Optional, Set, Sequence, Callable, DefaultDict, Any

# Keras imports
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import LeakyReLU, PReLU, ELU, ThresholdedReLU, Lambda, Reshape, LayerNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback
from tensorflow.keras.layers import SpatialDropout1D, SpatialDropout2D, SpatialDropout3D, add, concatenate
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, Activation, Flatten, LSTM, RepeatVector
from tensorflow.keras.layers import Conv1D, Conv2D, Conv3D, UpSampling1D, UpSampling2D, UpSampling3D, MaxPooling1D
from tensorflow.keras.layers import MaxPooling2D, MaxPooling3D, AveragePooling1D, AveragePooling2D, AveragePooling3D, Layer
from tensorflow.keras.layers import SeparableConv1D, SeparableConv2D, DepthwiseConv2D, Concatenate, Add
from tensorflow.keras.layers import GlobalAveragePooling1D, GlobalAveragePooling2D, GlobalAveragePooling3D


# ML4CVD Imports
from ml4cvd.TensorMap import TensorMap
from ml4cvd.arguments import parse_args
from ml4cvd.models import make_multimodal_multitask_model, train_model_from_generators, make_hidden_layer_model, _conv_layer_from_kind_and_dimension
from ml4cvd.tensor_generators import TensorGenerator, big_batch_from_minibatch_generator, test_train_valid_tensor_generators
from ml4cvd.recipes import plot_predictions, infer_hidden_layer_multimodal_multitask

# IPython imports
%matplotlib inline
import matplotlib.pyplot as plt


Tensor = tf.Tensor

ACTIVATION_CLASSES = {
    'leaky': LeakyReLU(),
    'prelu': PReLU(),
    'elu': ELU(),
    'thresh_relu': ThresholdedReLU,
}
ACTIVATION_FUNCTIONS = {
    'swish': tf.nn.swish,
    'gelu': tfa.activations.gelu,
    'lisht': tfa.activations.lisht,
    'mish': tfa.activations.mish,
}
NORMALIZATION_CLASSES = {
    'batch_norm': BatchNormalization,
    'layer_norm': LayerNormalization,
    'instance_norm': tfa.layers.InstanceNormalization,
    'poincare_norm': tfa.layers.PoincareNormalize,
}
CONV_REGULARIZATION_CLASSES = {
    # class name -> (dimension -> class)
    'spatial_dropout': {2: SpatialDropout1D, 3: SpatialDropout2D, 4: SpatialDropout3D},
    'dropout': defaultdict(lambda _: Dropout),
}
DENSE_REGULARIZATION_CLASSES = {
    'dropout': Dropout,  # TODO: add l1, l2
}

In [2]:

def _activation_layer(activation: str) -> Activation:
    return (
        ACTIVATION_CLASSES.get(activation, None)
        or Activation(ACTIVATION_FUNCTIONS.get(activation, None) or activation)
    )


def _normalization_layer(norm: str) -> Layer:
    if not norm:
        return lambda x: x
    return NORMALIZATION_CLASSES[norm]()


def _regularization_layer(dimension: int, regularization_type: str, rate: float):
    if not regularization_type:
        return lambda x: x
    if regularization_type in DENSE_REGULARIZATION_CLASSES:
        return DENSE_REGULARIZATION_CLASSES[regularization_type](rate)
    return CONV_REGULARIZATION_CLASSES[regularization_type][dimension](rate)


def _calc_start_shape(
        num_upsamples: int, output_shape: Tuple[int, ...], upsample_rates: Sequence[int], channels: int,
) -> Tuple[int, ...]:
    """
    Given the number of blocks in the decoder and the upsample rates, return required input shape to get to output shape
    """
    upsample_rates = list(upsample_rates) + [1] * len(output_shape)
    return tuple((shape // rate**num_upsamples for shape, rate in zip(output_shape[:-1], upsample_rates))) + (channels,)




class FlatToStructure:
    """Takes a flat input, applies a dense layer, then restructures to output_shape"""
    def __init__(
            self,
            output_shape: Tuple[int, ...],
            activation: str,
            normalization: str,
    ):
        self.input_shapes = output_shape
        self.dense = Dense(units=int(np.prod(output_shape)))
        self.activation = _activation_layer(activation)
        self.reshape = Reshape(output_shape)
        self.norm = _normalization_layer(normalization)

    def __call__(self, x: Tensor) -> Tensor:
        return self.reshape(self.norm(self.activation(self.dense(x))))


def _conv_layer_from_kind_and_dimension(
        dimension: int, conv_layer_type: str, conv_x: List[int], conv_y: List[int], conv_z: List[int],
) -> Tuple[Layer, List[Tuple[int, ...]]]:
    if dimension == 4 and conv_layer_type == 'conv':
        conv_layer = Conv3D
        kernel = zip(conv_x, conv_y, conv_z)
    elif dimension == 3 and conv_layer_type == 'conv':
        conv_layer = Conv2D
        kernel = zip(conv_x, conv_y)
    elif dimension == 2 and conv_layer_type == 'conv':
        conv_layer = Conv1D
        kernel = zip(conv_x)
    elif dimension == 3 and conv_layer_type == 'separable':
        conv_layer = SeparableConv2D
        kernel = zip(conv_x, conv_y)
    elif dimension == 2 and conv_layer_type == 'separable':
        conv_layer = SeparableConv1D
        kernel = zip(conv_x)
    elif dimension == 3 and conv_layer_type == 'depth':
        conv_layer = DepthwiseConv2D
        kernel = zip(conv_x, conv_y)
    else:
        raise ValueError(f'Unknown convolution type: {conv_layer_type} for dimension: {dimension}')
    return conv_layer, list(kernel)


def _upsampler(dimension, pool_x, pool_y, pool_z):
    if dimension == 4:
        return UpSampling3D(size=(pool_x, pool_y, pool_z))
    elif dimension == 3:
        return UpSampling2D(size=(pool_x, pool_y))
    elif dimension == 2:
        return UpSampling1D(size=pool_x)
    

    
def _one_by_n_kernel(dimension):
    return tuple([1] * (dimension - 1))


class DenseConvolutionalBlock:
    def __init__(
            self,
            *,
            dimension: int,
            block_size: int,
            conv_layer_type: str,
            filters: int,
            conv_x: List[int],
            conv_y: List[int],
            conv_z: List[int],
            activation: str,
            normalization: str,
            regularization: str,
            regularization_rate: float,
    ):
        conv_layer, kernels = _conv_layer_from_kind_and_dimension(dimension, conv_layer_type, conv_x, conv_y, conv_z)
        if isinstance(conv_layer, DepthwiseConv2D):
            self.conv_layers = [conv_layer(kernel_size=kernel, padding='same') for kernel in kernels]
        else:
            self.conv_layers = [conv_layer(filters=filters, kernel_size=kernel, padding='same') for kernel in kernels]
        self.activations = [_activation_layer(activation) for _ in range(block_size)]
        self.normalizations = [_normalization_layer(normalization) for _ in range(block_size)]
        self.regularizations = [_regularization_layer(dimension, regularization, regularization_rate) for _ in range(block_size)]
        print(f'Dense Block Convolutional Layers (num_filters, kernel_size): {list(zip([filters]*len(kernels), kernels))}')

    def __call__(self, x: Tensor) -> Tensor:
        dense_connections = [x]
        for i, (convolve, activate, normalize, regularize) in enumerate(
            zip(
                    self.conv_layers, self.activations, self.normalizations, self.regularizations,
            ),
        ):
            x = normalize(regularize(activate(convolve(x))))
            if i < len(self.conv_layers) - 1:  # output of block does not get concatenated to
                dense_connections.append(x)
                x = Concatenate()(dense_connections[:])  # [:] is necessary because of tf weirdness
        return x

    
class ConvDecoder2:
    def __init__(
            self,
            *,
            tensor_map_out: TensorMap,
            filters_per_dense_block: List[int],
            conv_layer_type: str,
            conv_x: List[int],
            conv_y: List[int],
            conv_z: List[int],
            block_size: int,
            activation: str,
            normalization: str,
            regularization: str,
            regularization_rate: float,
            upsample_x: int,
            upsample_y: int,
            upsample_z: int,
    ):
        dimension = tensor_map_out.axes()
        self.dense_blocks = [
            DenseConvolutionalBlock(
                dimension=tensor_map_out.axes(), conv_layer_type=conv_layer_type, filters=filters, conv_x=[x]*block_size,
                conv_y=[y]*block_size, conv_z=[z]*block_size, block_size=block_size, activation=activation, normalization=normalization,
                regularization=regularization, regularization_rate=regularization_rate,
            )
            for filters, x, y, z in zip(filters_per_dense_block, conv_x, conv_y, conv_z)
        ]
        conv_layer, _ = _conv_layer_from_kind_and_dimension(dimension, 'conv', conv_x, conv_y, conv_z)
        self.conv_label = conv_layer(tensor_map_out.shape[-1], _one_by_n_kernel(dimension), activation=tensor_map_out.activation, name=tensor_map_out.output_name())
        self.upsamples = [_upsampler(dimension, upsample_x, upsample_y, upsample_z) for _ in range(len(filters_per_dense_block) + 1)]
        print(f'Decode has: {list(enumerate(zip(self.dense_blocks, self.upsamples)))}')
    def __call__(self, x: Tensor) -> Tensor:
        for i, (dense_block, upsample) in enumerate(zip(self.dense_blocks, self.upsamples)):
            
            x = upsample(x)
            x = dense_block(x)
        return self.conv_label(x)
    
    

In [3]:
def make_paired_autoencoder_model(
    pairs: List[Tuple[TensorMap, TensorMap]],
    **kwargs
) -> Model:
    inputs = {tm: Input(shape=tm.shape, name=tm.input_name()) for tm in args.tensor_maps_in}
    original_outputs = {tm:1 for tm in args.tensor_maps_out}
    multimodal_activations = []
    desired_distance_tm = []
    my_metrics = {}
    outputs = []
    losses = []
    for left, right in pairs:
        args.tensor_maps_in = [left]
        left_model = make_multimodal_multitask_model(**args.__dict__)
        encode_left = make_hidden_layer_model(left_model, [left], args.hidden_layer)
        h_left = encode_left(inputs[left])
        
        args.tensor_maps_in = [right]
        right_model = make_multimodal_multitask_model(**args.__dict__)     
        encode_right = make_hidden_layer_model(right_model, [right], args.hidden_layer)
        h_right = encode_right(inputs[right])        
        
        tff = lambda tm, hd5, d: np.zeros((1,))
        tm0 = TensorMap(f'paired_{left.name}_{right.name}', shape=(1,), tensor_from_file=tff)
        desired_distance_tm.append(tm0)
        
        # Compute the L2 distance
        l2_layer = Lambda(lambda tensors: K.mean(K.square(tensors[0] - tensors[1]), axis=-1, keepdims=True), name=tm0.output_name())
        l2_distance = l2_layer([h_left, h_right])
        outputs.append(l2_distance)
        losses.append('binary_crossentropy')
        multimodal_activations.extend([h_left, h_right])
        
    multimodal_activation = Concatenate()(multimodal_activations)
    
    pre_decoder_shapes: Dict[TensorMap, Optional[Tuple[int, ...]]] = {}
    for tm in args.tensor_maps_out:
        shape = _calc_start_shape(num_upsamples=len(args.dense_blocks), output_shape=tm.shape, 
                                  upsample_rates=[args.pool_x, args.pool_y, args.pool_z], 
                                  channels=args.dense_blocks[-1])    
        
        restructure = FlatToStructure(output_shape=shape, activation=args.activation, 
                                      normalization=args.dense_normalize)
        
        decode = ConvDecoder2(
            tensor_map_out=tm,
            filters_per_dense_block=args.dense_blocks[::-1],
            conv_layer_type=args.conv_type,
            conv_x=args.conv_x,
            conv_y=args.conv_y,
            conv_z=args.conv_z,
            block_size=args.block_size,
            activation=args.activation,
            normalization=args.conv_normalize,
            regularization=args.conv_regularize,
            regularization_rate=args.conv_regularize_rate,
            upsample_x=args.pool_x,
            upsample_y=args.pool_y,
            upsample_z=args.pool_z,
        )
        
        outputs.append(decode(restructure(multimodal_activation)))
        losses.append(tm.loss)

    args.tensor_maps_out =  list(original_outputs.keys()) + desired_distance_tm
    args.tensor_maps_in = list(inputs.keys())
    
    opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    #outputs.reverse()  # Make paired loss last
    #losses.reverse()
    m = Model(inputs=list(inputs.values()), outputs=outputs)
    m.compile(optimizer=opt, loss=losses)
    m.summary()
    
    if kwargs['model_layers'] is not None:
        m.load_weights(kwargs['model_layers'], by_name=True)
        print(f"Loaded model weights from:{kwargs['model_layers']}")
        
    return m

In [None]:
sys.argv = ['train', 
            '--tensors', '/mnt/disks/segmented-sax-lax/2020-07-07/', 
            '--input_tensors', 'lax_2ch_diastole_slice0_3d', 'lax_3ch_diastole_slice0_3d', 
            '--output_tensors', 'lax_2ch_diastole_slice0_3d', 'lax_3ch_diastole_slice0_3d',
            '--activation', 'swish',
            '--conv_layers', '48',
            '--conv_x', '3', '3', '3',
            '--conv_y', '3', '3', '3', 
            '--conv_z', '3', '3', '3', 
            '--dense_blocks', '48',
            '--block_size', '3',
            '--dense_layers', '256',
            '--pool_x', '2',
            '--pool_y', '2',
            '--batch_size', '1',
            '--patience', '32',
            '--epochs', '248',
            '--learning_rate', '0.001',
            '--training_steps', '256',
            '--validation_steps', '30',
            '--test_steps', '2',
            '--num_workers', '4',
            '--inspect_model',
            '--tensormap_prefix', 'ml4cvd.tensormap.ukb.mri',
            '--id', 'lax_2ch_3ch_diastole_paired_autoencoder_swish']
args = parse_args()
pairs = [(args.tensor_maps_in[0], args.tensor_maps_in[1])]
overparameterized_model = make_paired_autoencoder_model(pairs, **args.__dict__)
generate_train, generate_valid, generate_test = test_train_valid_tensor_generators(**args.__dict__)
train_model_from_generators(
        overparameterized_model, generate_train, generate_valid, args.training_steps, args.validation_steps, args.batch_size,
        args.epochs, args.patience, args.output_folder, args.id, args.inspect_model, args.inspect_show_labels,
)

2020-08-18 13:16:37,557 - logger:25 - INFO - Logging configuration was loaded. Log messages can be found at ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/log_2020-08-18_13-16_0.log.
2020-08-18 13:16:37,693 - arguments:414 - INFO - Command Line was: 
./scripts/tf.sh train --tensors /mnt/disks/segmented-sax-lax/2020-07-07/ --input_tensors lax_2ch_diastole_slice0_3d lax_3ch_diastole_slice0_3d --output_tensors lax_2ch_diastole_slice0_3d lax_3ch_diastole_slice0_3d --activation swish --conv_layers 48 --conv_x 3 3 3 --conv_y 3 3 3 --conv_z 3 3 3 --dense_blocks 48 --block_size 3 --dense_layers 256 --pool_x 2 --pool_y 2 --batch_size 1 --patience 32 --epochs 248 --learning_rate 0.001 --training_steps 256 --validation_steps 30 --test_steps 2 --num_workers 4 --inspect_model --tensormap_prefix ml4cvd.tensormap.ukb.mri --id lax_2ch_3ch_diastole_paired_autoencoder_swish

2020-08-18 13:16:37,725 - models:379 - INFO - Residual Block Convolutional Layers (num_filters, kernel_size): [(48

2020-08-18 13:16:39,706 - models:379 - INFO - Residual Block Convolutional Layers (num_filters, kernel_size): [(48, (3, 3))]
2020-08-18 13:16:39,712 - models:414 - INFO - Dense Block Convolutional Layers (num_filters, kernel_size): [(48, (3, 3)), (48, (3, 3)), (48, (3, 3))]
2020-08-18 13:16:39,722 - models:414 - INFO - Dense Block Convolutional Layers (num_filters, kernel_size): [(48, (3, 3)), (48, (3, 3)), (48, (3, 3))]
2020-08-18 13:16:39,729 - models:414 - INFO - Dense Block Convolutional Layers (num_filters, kernel_size): [(48, (3, 3)), (48, (3, 3)), (48, (3, 3))]
Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_lax_3ch_diastole_slice0_3 [(None, 200, 160, 1) 0                                            
__________________________________________________________________________________________________
conv2d_10 (C

Dense Block Convolutional Layers (num_filters, kernel_size): [(48, (3, 3)), (48, (3, 3)), (48, (3, 3))]
Decode has: [(0, (<__main__.DenseConvolutionalBlock object at 0x7f907c043400>, <tensorflow.python.keras.layers.convolutional.UpSampling2D object at 0x7f907c043d68>))]
Dense Block Convolutional Layers (num_filters, kernel_size): [(48, (3, 3)), (48, (3, 3)), (48, (3, 3))]
Decode has: [(0, (<__main__.DenseConvolutionalBlock object at 0x7f906efc7da0>, <tensorflow.python.keras.layers.convolutional.UpSampling2D object at 0x7f906efd1710>))]
Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_lax_2ch_diastole_slice0_3 [(None, 200, 160, 1) 0                                            
__________________________________________________________________________________________________
input_lax_3ch_diastole_slice0_3 [(None, 200, 

2020-08-18 13:16:44,566 - models:1316 - INFO - Saving architecture diagram to:./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/architecture_graph_lax_2ch_3ch_diastole_paired_autoencoder_swish.png
2020-08-18 13:16:45,457 - tensor_generators:151 - INFO - Started 3 train workers with cache size 0.875GB.
2020-08-18 13:16:45,868 - tensor_generators:151 - INFO - Started 1 validation workers with cache size 0.875GB.
Train for 256 steps, validate for 1 steps
2020-08-18 13:17:54,096 - models:1254 - INFO - Spent:68.75 seconds training, Samples trained on:256 Per sample training speed:0.269 seconds.
2020-08-18 13:17:59,859 - models:1260 - INFO - Spent:5.76 seconds predicting, Samples inferred:256 Per sample inference speed:0.1799 seconds.
Train for 256 steps, validate for 30 steps
Epoch 1/248
Epoch 00001: val_loss improved from inf to 0.47352, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 2/248
E

Epoch 17/248
Epoch 00017: val_loss improved from 0.34005 to 0.32355, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 18/248
Epoch 00018: val_loss did not improve from 0.32355
Epoch 19/248
Epoch 00019: val_loss improved from 0.32355 to 0.31567, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 20/248
Epoch 00020: val_loss improved from 0.31567 to 0.31458, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 21/248
Epoch 00021: val_loss did not improve from 0.31458
Epoch 22/248
Epoch 00022: val_loss improved from 0.31458 to 0.30753, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 23/248
Epoch 00023: val_loss improved from 0.30753 to 0.30747, saving model to

Epoch 25/248
Epoch 00025: val_loss did not improve from 0.29613
Epoch 26/248
Epoch 00026: val_loss did not improve from 0.29613
Epoch 27/248
Epoch 00027: val_loss improved from 0.29613 to 0.28554, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 28/248
Epoch 00028: val_loss did not improve from 0.28554
Epoch 29/248
Epoch 00029: val_loss did not improve from 0.28554
Epoch 30/248
Epoch 00030: val_loss improved from 0.28554 to 0.28083, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 31/248
Epoch 00031: val_loss did not improve from 0.28083
Epoch 32/248
Epoch 00032: val_loss improved from 0.28083 to 0.27815, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 33/248
Epoch 00033: val_loss improved from 0.27815 to 0.27600, saving model to ./re

Epoch 34/248
Epoch 00034: val_loss improved from 0.27600 to 0.26948, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 35/248
Epoch 00035: val_loss did not improve from 0.26948
Epoch 36/248
Epoch 00036: val_loss improved from 0.26948 to 0.26207, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 37/248
Epoch 00037: val_loss improved from 0.26207 to 0.25664, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 38/248
Epoch 00038: val_loss did not improve from 0.25664
Epoch 39/248
Epoch 00039: val_loss did not improve from 0.25664
Epoch 40/248
Epoch 00040: val_loss did not improve from 0.25664
Epoch 41/248
Epoch 00041: val_loss did not improve from 0.25664
Epoch 42/248
Epoch 00042: val_loss improved from 0.25664 to 0.25129, saving model to ./re

Epoch 43/248
Epoch 00043: val_loss improved from 0.25129 to 0.24547, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 44/248
Epoch 00044: val_loss did not improve from 0.24547
Epoch 45/248
Epoch 00045: val_loss did not improve from 0.24547
Epoch 46/248
Epoch 00046: val_loss did not improve from 0.24547
Epoch 47/248
Epoch 00047: val_loss improved from 0.24547 to 0.24179, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 48/248
Epoch 00048: val_loss did not improve from 0.24179
Epoch 49/248
Epoch 00049: val_loss improved from 0.24179 to 0.23467, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 50/248
Epoch 00050: val_loss improved from 0.23467 to 0.22818, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2

Epoch 52/248
Epoch 00052: val_loss did not improve from 0.22818
Epoch 53/248
Epoch 00053: val_loss did not improve from 0.22818
Epoch 54/248
Epoch 00054: val_loss did not improve from 0.22818
Epoch 55/248
Epoch 00055: val_loss did not improve from 0.22818
Epoch 56/248
Epoch 00056: val_loss did not improve from 0.22818
Epoch 57/248
Epoch 00057: val_loss improved from 0.22818 to 0.22791, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 58/248
Epoch 00058: val_loss improved from 0.22791 to 0.22262, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 59/248
Epoch 00059: val_loss did not improve from 0.22262
Epoch 60/248
Epoch 00060: val_loss did not improve from 0.22262
Epoch 61/248
Epoch 00061: val_loss improved from 0.22262 to 0.21589, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3c

Epoch 00070: val_loss improved from 0.21251 to 0.21186, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 71/248
Epoch 00071: val_loss improved from 0.21186 to 0.20905, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 72/248
Epoch 00072: val_loss did not improve from 0.20905
Epoch 73/248
Epoch 00073: val_loss did not improve from 0.20905
Epoch 74/248
Epoch 00074: val_loss improved from 0.20905 to 0.20848, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 75/248
Epoch 00075: val_loss did not improve from 0.20848
Epoch 76/248
Epoch 00076: val_loss did not improve from 0.20848
Epoch 77/248
Epoch 00077: val_loss did not improve from 0.20848
Epoch 78/248
Epoch 00078: val_loss improved from 0.20848 to 0.20646, saving model to ./recipes_output/

Epoch 79/248
Epoch 00079: val_loss did not improve from 0.20646
Epoch 80/248
Epoch 00080: val_loss did not improve from 0.20646
Epoch 81/248
Epoch 00081: val_loss improved from 0.20646 to 0.19959, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 82/248
Epoch 00082: val_loss did not improve from 0.19959
Epoch 83/248
Epoch 00083: val_loss improved from 0.19959 to 0.19538, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 84/248
Epoch 00084: val_loss did not improve from 0.19538
Epoch 85/248
Epoch 00085: val_loss did not improve from 0.19538
Epoch 86/248
Epoch 00086: val_loss did not improve from 0.19538
Epoch 87/248
Epoch 00087: val_loss did not improve from 0.19538
Epoch 88/248
Epoch 00088: val_loss did not improve from 0.19538
Epoch 89/248
Epoch 00089: val_loss improved from 0.19538 to 0.19280, saving model to ./recipes

Epoch 00097: val_loss did not improve from 0.19247
Epoch 98/248
Epoch 00098: val_loss improved from 0.19247 to 0.19161, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 99/248
Epoch 00099: val_loss improved from 0.19161 to 0.19142, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 100/248
Epoch 00100: val_loss did not improve from 0.19142
Epoch 101/248
Epoch 00101: val_loss did not improve from 0.19142
Epoch 102/248
Epoch 00102: val_loss did not improve from 0.19142
Epoch 103/248
Epoch 00103: val_loss improved from 0.19142 to 0.17826, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 104/248
Epoch 00104: val_loss did not improve from 0.17826
Epoch 105/248
Epoch 00105: val_loss did not improve from 0.17826


Epoch 106/248
Epoch 00106: val_loss did not improve from 0.17826
Epoch 107/248
Epoch 00109: val_loss did not improve from 0.17826
Epoch 110/248
Epoch 00110: val_loss did not improve from 0.17826
Epoch 111/248
Epoch 00111: val_loss did not improve from 0.17826
Epoch 112/248
Epoch 00112: val_loss did not improve from 0.17826
Epoch 113/248
Epoch 00113: val_loss did not improve from 0.17826
Epoch 114/248
Epoch 00114: val_loss did not improve from 0.17826
Epoch 115/248
Epoch 00115: val_loss did not improve from 0.17826
Epoch 116/248
Epoch 00116: val_loss did not improve from 0.17826
Epoch 117/248

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00128: val_loss did not improve from 0.17466
Epoch 129/248
Epoch 00129: val_loss improved from 0.17466 to 0.17219, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 130/248
Epoch 00130: val_loss did not improve from 0.17219
Epoch 131/248
Epoch 00131: val_loss did not improve from 0.17219
Epoch 132/248
Epoch 00132: val_loss did not improve from 0.17219
Epoch 133/248
Epoch 00133: val_loss did not improve from 0.17219
Epoch 134/248
Epoch 00134: val_loss did not improve from 0.17219
Epoch 135/248
Epoch 00135: val_loss did not improve from 0.17219
Epoch 136/248

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00147: val_loss did not improve from 0.16889
Epoch 148/248
Epoch 00148: val_loss did not improve from 0.16889
Epoch 149/248
Epoch 00149: val_loss did not improve from 0.16889
Epoch 150/248
Epoch 00150: val_loss improved from 0.16889 to 0.16142, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 151/248
Epoch 00151: val_loss did not improve from 0.16142
Epoch 152/248
Epoch 00152: val_loss did not improve from 0.16142
Epoch 153/248
Epoch 00153: val_loss did not improve from 0.16142
Epoch 154/248
Epoch 00154: val_loss did not improve from 0.16142
Epoch 155/248
Epoch 00155: val_loss did not improve from 0.16142
Epoch 156/248

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00166: val_loss did not improve from 0.15902
Epoch 167/248
Epoch 00167: val_loss did not improve from 0.15902
Epoch 168/248
Epoch 00168: val_loss did not improve from 0.15902
Epoch 169/248
Epoch 00169: val_loss did not improve from 0.15902
Epoch 170/248
Epoch 00170: val_loss did not improve from 0.15902
Epoch 171/248
Epoch 00171: val_loss did not improve from 0.15902
Epoch 172/248
Epoch 00172: val_loss did not improve from 0.15902
Epoch 173/248
Epoch 00173: val_loss did not improve from 0.15902
Epoch 174/248
Epoch 00174: val_loss did not improve from 0.15902
Epoch 175/248

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 00186: val_loss improved from 0.15284 to 0.15252, saving model to ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5
Epoch 187/248
Epoch 00187: val_loss did not improve from 0.15252
Epoch 188/248
Epoch 00188: val_loss did not improve from 0.15252
Epoch 189/248
Epoch 00189: val_loss did not improve from 0.15252
Epoch 190/248
Epoch 00190: val_loss did not improve from 0.15252
Epoch 191/248
Epoch 00191: val_loss did not improve from 0.15252
Epoch 192/248
Epoch 00192: val_loss did not improve from 0.15252
Epoch 193/248

In [None]:
sys.argv = ['train', 
            '--tensors', '/mnt/disks/segmented-sax-lax/2020-07-07/', 
            '--input_tensors', 'lax_2ch_diastole_slice0_3d', 'lax_3ch_diastole_slice0_3d', 
            '--output_tensors',  'lax_2ch_diastole_slice0_3d', 'lax_3ch_diastole_slice0_3d', 'LVM',
            '--activation', 'swish',
            '--conv_layers', '24',
            '--conv_x', '3', '3', '3',
            '--conv_y', '3', '3', '3',
            '--conv_z', '3', '3', '3',
            '--dense_blocks', '24',
            '--block_size', '4',
            '--dense_layers', '512',
            '--pool_x', '2',
            '--pool_y', '2',
            '--batch_size', '2',
            '--patience', '32',
            '--epochs', '292',
            '--learning_rate', '0.001',
            '--training_steps', '256',
            '--validation_steps', '30',
            '--test_steps', '2',
            '--num_workers', '4',
            '--hidden_layer', 'concatenate_12',
            '--model_file', './recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5',
            '--tensormap_prefix', 'ml4cvd.tensormap.ukb.mri',
            '--id', 'lax_2ch_3ch_diastole_paired_autoencoder_swish']
args = parse_args()
#plot_predictions(args)
infer_hidden_layer_multimodal_multitask(args)

2020-08-19 09:47:08,241 - logger:25 - INFO - Logging configuration was loaded. Log messages can be found at ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/log_2020-08-19_09-47_0.log.
2020-08-19 09:47:08,370 - arguments:414 - INFO - Command Line was: 
./scripts/tf.sh train --tensors /mnt/disks/segmented-sax-lax/2020-07-07/ --input_tensors lax_2ch_diastole_slice0_3d lax_3ch_diastole_slice0_3d --output_tensors lax_2ch_diastole_slice0_3d lax_3ch_diastole_slice0_3d LVM --activation swish --conv_layers 24 --conv_x 3 3 3 --conv_y 3 3 3 --conv_z 3 3 3 --dense_blocks 24 --block_size 4 --dense_layers 512 --pool_x 2 --pool_y 2 --batch_size 2 --patience 32 --epochs 292 --learning_rate 0.001 --training_steps 256 --validation_steps 30 --test_steps 2 --num_workers 4 --hidden_layer concatenate_12 --model_file ./recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/lax_2ch_3ch_diastole_paired_autoencoder_swish.h5 --tensormap_prefix ml4cvd.tensormap.ukb.mri --id lax_2ch_3ch_diasto

2020-08-19 09:47:14,164 - recipes:292 - INFO - Dummy output shape is: (1, 512) latent dimensions: 512
2020-08-19 10:08:40,074 - recipes:316 - INFO - Wrote:500 rows of latent space inference.  Last tensor:/mnt/disks/segmented-sax-lax/2020-07-07/1686043.hd5
2020-08-19 10:27:50,032 - recipes:316 - INFO - Wrote:1000 rows of latent space inference.  Last tensor:/mnt/disks/segmented-sax-lax/2020-07-07/2253565.hd5
2020-08-19 10:45:05,420 - recipes:316 - INFO - Wrote:1500 rows of latent space inference.  Last tensor:/mnt/disks/segmented-sax-lax/2020-07-07/2755254.hd5
2020-08-19 11:02:25,288 - recipes:316 - INFO - Wrote:2000 rows of latent space inference.  Last tensor:/mnt/disks/segmented-sax-lax/2020-07-07/3305476.hd5
2020-08-19 11:21:18,589 - recipes:316 - INFO - Wrote:2500 rows of latent space inference.  Last tensor:/mnt/disks/segmented-sax-lax/2020-07-07/3864291.hd5
2020-08-19 11:39:02,405 - recipes:316 - INFO - Wrote:3000 rows of latent space inference.  Last tensor:/mnt/disks/segmented-

In [None]:
hidden_inference = './recipes_output/lax_2ch_3ch_diastole_paired_autoencoder_swish/hidden_inference_lax_2ch_3ch_diastole_paired_autoencoder_swish.tsv'
df = pd.read_csv('/home/sam/ml/trained_models/lax_4ch_diastole_autoencode_leaky_converge/tensors_all_union.csv')
df['21003_Age-when-attended-assessment-centre_2_0'].plot.hist(bins=30)

df2 = pd.read_csv(hidden_inference, sep='\t')
df['fpath'] = pd.to_numeric(df['fpath'], errors='coerce')
df2['sample_id'] = pd.to_numeric(df2['sample_id'], errors='coerce')
#df.info()
latent_df = pd.merge(df, df2, left_on='fpath', right_on='sample_id', how='inner')
#latent_df.info()
df3 = pd.read_csv('/home/sam/tsvs/ttn_disease.tsv', sep='\t')
df4 = pd.read_csv('/home/sam/csvs/has_exome.csv')
latent_df = pd.merge(df3, latent_df, left_on='sample_id', right_on='sample_id', how='right')
latent_df.info()
print(latent_df['has_ttntv'].value_counts())

In [None]:
def pca_on_matrix(matrix, pca_components):
    pca = PCA()
    pca.fit(matrix)
    print(f'PCA explains {100*np.sum(pca.explained_variance_ratio_[:pca_components]):0.1f}% of variance with {pca_components} top PCA components.')
    matrix_reduced = pca.transform(matrix)[:, :pca_components]
    print(f'PCA reduces matrix shape:{matrix_reduced.shape} from matrix shape: {matrix.shape}')
    plot_scree(pca_components, 100*pca.explained_variance_ratio_)
    return pca, matrix_reduced

def plot_scree(pca_components, percent_explained):
    _ = plt.figure(figsize=(6, 4))
    plt.plot(range(len(percent_explained)), percent_explained, 'g.-', linewidth=1)
    plt.axvline(x=pca_components, c='r', linewidth=3)
    label = f'{np.sum(percent_explained[:pca_components]):0.1f}% of variance explained by top {pca_components} of {len(percent_explained)} components'
    plt.text(pca_components+0.02*len(percent_explained), percent_explained[1], label)
    plt.title('Scree Plot')
    plt.xlabel('Principal Components')
    plt.ylabel('% of Variance Explained by Each Component')
    figure_path = f'./results/pca_{pca_components}_of_{len(percent_explained)}_testimonials.png'
    if not os.path.exists(os.path.dirname(figure_path)):
        os.makedirs(os.path.dirname(figure_path))
    plt.savefig(figure_path)
    
def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)

def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2'::

            >>> angle_between((1, 0, 0), (0, 1, 0))
            1.5707963267948966
            >>> angle_between((1, 0, 0), (1, 0, 0))
            0.0
            >>> angle_between((1, 0, 0), (-1, 0, 0))
            3.141592653589793
    """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

def directions_in_latent_space(stratify_column, stratify_thresh, split_column, split_thresh, latent_cols, latent_df):
    hit = latent_df.loc[latent_df[stratify_column] >= stratify_thresh][latent_cols].to_numpy()
    miss = latent_df.loc[latent_df[stratify_column] < stratify_thresh][latent_cols].to_numpy()
    miss_mean_vector = np.mean(miss, axis=0)
    hit_mean_vector = np.mean(hit, axis=0)
    strat_vector = hit_mean_vector - miss_mean_vector
    
    hit1 = latent_df.loc[(latent_df[stratify_column] >= stratify_thresh) 
                        & (latent_df[split_column] >= split_thresh)][latent_cols].to_numpy()
    miss1 = latent_df.loc[(latent_df[stratify_column] < stratify_thresh) 
                        & (latent_df[split_column] >= split_thresh)][latent_cols].to_numpy()
    hit2 = latent_df.loc[(latent_df[stratify_column] >= stratify_thresh) 
                        & (latent_df[split_column] < split_thresh)][latent_cols].to_numpy()
    miss2 = latent_df.loc[(latent_df[stratify_column] < stratify_thresh) 
                        & (latent_df[split_column] < split_thresh)][latent_cols].to_numpy()
    miss_mean_vector1 = np.mean(miss1, axis=0)
    hit_mean_vector1 = np.mean(hit1, axis=0)
    angle1 = angle_between(miss_mean_vector1, hit_mean_vector1)
    miss_mean_vector2 = np.mean(miss2, axis=0)
    hit_mean_vector2 = np.mean(hit2, axis=0)
    angle2 = angle_between(miss_mean_vector2, hit_mean_vector2)
    h1_vector = hit_mean_vector1-miss_mean_vector1
    h2_vector = hit_mean_vector2-miss_mean_vector2
    angle3 = angle_between(h1_vector, h2_vector)
    angle4 = angle_between(strat_vector, h1_vector)
    angle5 = angle_between(strat_vector, h2_vector)
    print(f'\n Between {stratify_column}, and splits: {split_column}\n',
          f'Angles: {angle1:.4f}, {angle2:.4f} \n'
          f'stratify threshold: {stratify_thresh}, split thresh: {split_thresh}, \n'
          f'hit_mean_vector2 shape {miss_mean_vector1.shape}, miss1:{hit_mean_vector2.shape} \n'
          f'Hit1 shape {hit1.shape}, miss1:{miss1.shape} threshold:{stratify_thresh}\n'
          f'Hit2 shape {hit2.shape}, miss2:{miss2.shape}\n')

def stratify_latent_space(stratify_column, stratify_thresh, latent_cols, latent_df):
    hit = latent_df.loc[latent_df[stratify_column] >= stratify_thresh][latent_cols].to_numpy()
    miss = latent_df.loc[latent_df[stratify_column] < stratify_thresh][latent_cols].to_numpy()
    miss_mean_vector = np.mean(miss, axis=0)
    hit_mean_vector = np.mean(hit, axis=0)
    angle = angle_between(miss_mean_vector, hit_mean_vector)
    print(f'Angle between {stratify_column} and all others: {angle}, \n'
          f'Hit shape {hit.shape}, miss:{miss.shape} threshold:{stratify_thresh}\n'
          f'Distance: {np.linalg.norm(hit_mean_vector-miss_mean_vector):.3f}, Hit std {np.std(hit, axis=1).mean():.3f}, miss std:{np.std(miss, axis=1).mean():.3f}\n')
    
def plot_pcs(sides, color_key):
    f, axes = plt.subplots(sides, sides, figsize=(16, 16))
    for i, ax in enumerate(axes.ravel()):
        colors = latent_df[color_key].to_numpy()
        points = ax.scatter(matrix_reduce[:, i], matrix_reduce[:, i+1], c=colors)
        f.colorbar(points, ax=ax)
        

