In [1]:
# Imports
import os
import sys
import pickle
import random
from typing import List, Dict, Callable
from collections import defaultdict, Counter

import csv
import gzip
import h5py
import shutil
import zipfile
import pydicom
import numpy as np


# Keras imports
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import History
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import model_to_dot
from tensorflow.keras.layers import LeakyReLU, PReLU, ELU, ThresholdedReLU, Lambda, Reshape, LayerNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback
from tensorflow.keras.layers import SpatialDropout1D, SpatialDropout2D, SpatialDropout3D, add, concatenate
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, Activation, Flatten, LSTM, RepeatVector
from tensorflow.keras.layers import Conv1D, Conv2D, Conv3D, UpSampling1D, UpSampling2D, UpSampling3D, MaxPooling1D
from tensorflow.keras.layers import MaxPooling2D, MaxPooling3D, AveragePooling1D, AveragePooling2D, AveragePooling3D, Layer
from tensorflow.keras.layers import SeparableConv1D, SeparableConv2D, DepthwiseConv2D


%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import gridspec

from ml4cvd.defines import StorageType
from ml4cvd.arguments import parse_args, TMAPS, _get_tmap
from ml4cvd.TensorMap import TensorMap, Interpretation
from ml4cvd.tensor_generators import test_train_valid_tensor_generators, big_batch_from_minibatch_generator
from ml4cvd.models import train_model_from_generators, make_multimodal_multitask_model, _inspect_model, train_model_from_generators, make_hidden_layer_model
from ml4cvd.recipes import test_multimodal_multitask, train_multimodal_multitask, saliency_maps, _predict_and_evaluate

# Constants
HD5_FOLDER = '/mnt/disks/ecg-rest-38k-tensors/2020-03-14/'

In [2]:
sys.argv = ['train', 
            '--tensors', HD5_FOLDER, 
            '--input_tensors', 't2_20_slices_1',
            '--output_tensors', 'sex',
            '--training_steps', '96',
            '--validation_steps', '24',
            '--test_steps', '24',
            '--epochs', '2',
            '--batch_size', '4',
            '--id', 't2_20_slices_1_slice_share',
            '--inspect_model',
           ]

args = parse_args()

2020-05-22 11:36:06,862 - logger:25 - INFO - Logging configuration was loaded. Log messages can be found at ./recipes_output/t2_20_slices_1_slice_share/log_2020-05-22_11-36_0.log.
2020-05-22 11:36:06,864 - arguments:372 - INFO - Command Line was: 
./scripts/tf.sh train --tensors /mnt/disks/brains-all-together/2020-02-11/ --input_tensors t2_20_slices_1 --output_tensors sex --training_steps 96 --validation_steps 24 --test_steps 24 --epochs 2 --batch_size 4 --id t2_20_slices_1_slice_share --inspect_model

2020-05-22 11:36:06,866 - arguments:373 - INFO - Total TensorMaps: 556 Arguments are Namespace(activation='relu', aligned_dimension=16, alpha=0.5, anneal_max=2.0, anneal_rate=0.0, anneal_shift=0.0, app_csv=None, b_slice_force=None, balance_csvs=[], batch_size=4, bigquery_credentials_file='/mnt/ml4cvd/projects/jamesp/bigquery/bigquery-viewer-credentials.json', bigquery_dataset='broad-ml4cvd.ukbb7089_r10data', block_size=3, bottleneck_type=<BottleneckType.FlattenRestructure: 1>, cache_size

In [3]:
slice_axis = -1
volume_tm = args.tensor_maps_in[0]
slices = volume_tm.shape[slice_axis]
slice_shape = list(volume_tm.shape)
print(f' original shape: {volume_tm.shape}')
del slice_shape[slice_axis]
slice_tm = TensorMap(f'slice_{volume_tm.input_name()}', shape=tuple(slice_shape))
print(f'Slice name: {slice_tm.name} slice shape: {slice_shape} slices: {slices} original shape: {volume_tm.shape}')
args.tensor_maps_in = [slice_tm]
slice_model = make_multimodal_multitask_model(**args.__dict__)
embed_slice_model = make_hidden_layer_model(slice_model, args.tensor_maps_in, 'embed')
embed_slice_model.summary()

in_volume = Input(shape=volume_tm.shape, name=volume_tm.input_name())
embeddings = []
for i in range(slices):
    if slice_axis == -3 or volume_tm.axes() - slice_axis == 2:
        embeddings.append(embed_slice_model(in_volume[..., i, :, :]))
    elif slice_axis == -2 or volume_tm.axes() - slice_axis == 1:
        embeddings.append(embed_slice_model(in_volume[..., i, :]))
    elif slice_axis == -1 or volume_tm.axes() - slice_axis == 0:
        embeddings.append(embed_slice_model(in_volume[..., i]))
    else:
        raise ValueError(f'Can not handle slice axis {slice_axis} with original shape {volume_tm.shape}')
multimodal_activation = concatenate(embeddings, axis=-1)
for units in args.dense_layers:
    multimodal_activation = Dense(units=units, activation=args.activation)(multimodal_activation)

# build decoders
losses = []
my_metrics = {}
loss_weights = []
output_predictions = {}
tensor_maps_out = args.tensor_maps_out
output_tensor_maps_to_process = tensor_maps_out.copy()
while len(output_tensor_maps_to_process) > 0:
    tm = output_tensor_maps_to_process.pop(0)
    losses.append(tm.loss)
    loss_weights.append(tm.loss_weight)
    my_metrics[tm.output_name()] = tm.metrics
    if tm.is_categorical():
        output_predictions[tm] = Dense(units=tm.shape[0], activation='softmax', name=tm.output_name())(multimodal_activation)
    elif tm.axes() == 1:
        output_predictions[tm] = Dense(units=tm.shape[0],
                                       activation=tm.activation,
                                       kernel_regularizer=tf.keras.regularizers.l1(0.01),
                                       name=tm.output_name())(multimodal_activation)

m = Model(inputs=[in_volume], outputs=[output_predictions[tm] for tm in tensor_maps_out])
m.summary()    

 original shape: (256, 256, 20)
Slice name: slice_input_t2_20_slices_1_continuous slice shape: [256, 256] slices: 20 original shape: (256, 256, 20)
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_slice_input_t2_20_slices_ [(None, 256, 256)]   0                                            
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 256, 32)      24608       input_slice_input_t2_20_slices_1_
__________________________________________________________________________________________________
activation (Activation)         (None, 256, 32)      0           conv1d[0][0]                     
__________________________________________________________________________________________________
max_pooling1d (MaxPooling1D)    (None, 128, 3

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_slice_input_t2_20_slices_ [(None, 256, 256)]   0                                            
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 256, 32)      24608       input_slice_input_t2_20_slices_1_
__________________________________________________________________________________________________
activation (Activation)         (None, 256, 32)      0           conv1d[0][0]                     
__________________________________________________________________________________________________
max_pooling1d (MaxPooling1D)    (None, 128, 32)      0           activation[0][0]                 
____________________________________________________________________________________________

In [4]:
args = parse_args()
generate_train, generate_valid, generate_test = test_train_valid_tensor_generators(**args.__dict__)

2020-05-22 11:36:12,332 - logger:25 - INFO - Logging configuration was loaded. Log messages can be found at ./recipes_output/t2_20_slices_1_slice_share/log_2020-05-22_11-36_0.log.
2020-05-22 11:36:12,333 - arguments:372 - INFO - Command Line was: 
./scripts/tf.sh train --tensors /mnt/disks/brains-all-together/2020-02-11/ --input_tensors t2_20_slices_1 --output_tensors sex --training_steps 96 --validation_steps 24 --test_steps 24 --epochs 2 --batch_size 4 --id t2_20_slices_1_slice_share --inspect_model

2020-05-22 11:36:12,334 - arguments:373 - INFO - Total TensorMaps: 556 Arguments are Namespace(activation='relu', aligned_dimension=16, alpha=0.5, anneal_max=2.0, anneal_rate=0.0, anneal_shift=0.0, app_csv=None, b_slice_force=None, balance_csvs=[], batch_size=4, bigquery_credentials_file='/mnt/ml4cvd/projects/jamesp/bigquery/bigquery-viewer-credentials.json', bigquery_dataset='broad-ml4cvd.ukbb7089_r10data', block_size=3, bottleneck_type=<BottleneckType.FlattenRestructure: 1>, cache_size

In [5]:
opt = Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
m.compile(optimizer=opt, loss=losses, loss_weights=loss_weights, metrics=my_metrics)
train_model_from_generators(m, generate_train, generate_valid, args.training_steps, args.validation_steps, 
                            args.batch_size, args.epochs, args.patience, args.output_folder, args.id, 
                            args.inspect_model, args.inspect_show_labels)

2020-05-22 11:36:15,540 - models:1318 - INFO - Saving architecture diagram to:./recipes_output/t2_20_slices_1_slice_share/architecture_graph_t2_20_slices_1_slice_share.png
2020-05-22 11:36:20,214 - tensor_generators:149 - INFO - Started 3 train workers with cache size 0.875GB.
2020-05-22 11:36:22,194 - tensor_generators:149 - INFO - Started 1 validation workers with cache size 0.875GB.
Train for 96 steps, validate for 1 steps
2020-05-22 11:37:43,639 - models:1257 - INFO - Spent:83.58 seconds training, Samples trained on:384 Per sample training speed:0.218 seconds.
2020-05-22 11:39:01,175 - models:1262 - INFO - Spent:77.53 seconds predicting, Samples inferred:384 Per sample inference speed:0.2019 seconds.
Train for 96 steps, validate for 24 steps
Epoch 1/2
Epoch 00001: val_loss improved from inf to 0.68312, saving model to ./recipes_output/t2_20_slices_1_slice_share/t2_20_slices_1_slice_share.h5
Epoch 2/2
Epoch 00002: val_loss improved from 0.68312 to 0.67920, saving model to ./recipes_

<tensorflow.python.keras.engine.training.Model at 0x7ff4e04bf208>

In [6]:
out_path = os.path.join(args.output_folder, args.id + '/')
test_data, test_labels, test_paths = big_batch_from_minibatch_generator(generate_test, args.test_steps)
_predict_and_evaluate(m, test_data, test_labels, args.tensor_maps_in, args.tensor_maps_out, args.batch_size, args.hidden_layer, out_path, test_paths, args.alpha)

2020-05-22 11:41:31,118 - tensor_generators:149 - INFO - Started 3 test workers with cache size 0.0GB.
2020-05-22 11:41:45,912 - tensor_generators:484 - INFO - Made a big batch of tensors with key:input_t2_20_slices_1_continuous and shape:(96, 256, 256, 20).
2020-05-22 11:41:45,914 - tensor_generators:484 - INFO - Made a big batch of tensors with key:output_Sex_Male_0_0_categorical and shape:(96, 2).
2020-05-22 11:41:53,512 - plots:98 - INFO - For tm:Sex_Male_0_0 with channel map:{'Sex_Female_0_0': 0, 'Sex_Male_0_0': 1} examples:96
2020-05-22 11:41:53,515 - plots:99 - INFO - 
Sum Truth:[59. 37.] 
Sum pred :[49.87045 46.12956]
2020-05-22 11:41:53,547 - plots:1580 - INFO - prAUC Label Sex_Female_0_0 mean precision:0.770 n=59
2020-05-22 11:41:53,552 - plots:1580 - INFO - prAUC Label Sex_Male_0_0 mean precision:0.648 n=37
2020-05-22 11:41:53,772 - plots:1595 - INFO - Saved Precision Recall curve at: ./recipes_output/t2_20_slices_1_slice_share/precision_recall_Sex_Male_0_0.png
2020-05-22 11

{'Sex_Female_0_0': 0.7311039853412734, 'Sex_Male_0_0': 0.7311039853412735}