<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Model-Training" data-toc-modified-id="Model-Training-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Model Training</a></span></li><li><span><a href="#Explore-xArray" data-toc-modified-id="Explore-xArray-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Explore xArray</a></span></li><li><span><a href="#DICOM-Images" data-toc-modified-id="DICOM-Images-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>DICOM Images</a></span></li><li><span><a href="#Nifti-Maker" data-toc-modified-id="Nifti-Maker-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Nifti Maker</a></span></li><li><span><a href="#xarray-generation" data-toc-modified-id="xarray-generation-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>xarray generation</a></span></li><li><span><a href="#xarray-viewer" data-toc-modified-id="xarray-viewer-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>xarray viewer</a></span></li><li><span><a href="#Slurm-Analysis" data-toc-modified-id="Slurm-Analysis-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Slurm Analysis</a></span></li></ul></div>

# New Data Exploration

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
from datetime import datetime
from collections import OrderedDict
import numpy as np
import pickle as pkl
import pandas as pd
import xarray as xr
import SimpleITK as sitk
from tqdm import tqdm_notebook

import holoviews as hv
from holoviews import opts
import panel as pn
import hvplot.pandas
hv.extension('bokeh')
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (10,8)

from mre.plotting import patient_series_viewer, chaos_viewer, xr_viewer, hv_dl_vis_chaos, xr_viewer_chaos
from mre.preprocessing import make_nifti_atlas_v2, make_nifti_atlas_v3, make_xr_dataset_for_chaos
from mre.segmentation import ChaosDataset
from mre.train_seg_model import train_seg_model 
from mre import pytorch_arch_old

from torch.utils.data import Dataset, DataLoader
import torchvision.utils
from torchsummary import summary
import torch
import torch.nn as nn
from collections import defaultdict
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import models


## Model Training

In [3]:
torch.cuda.empty_cache()
# Explicit Args
data_dir = Path('/pghbio/dbmi/batmanlab/bpollack/predictElasticity/data/CHAOS/Train_Sets')
data_file = 'xarray_chaos_v2.nc'
out_dir = '/pghbio/dbmi/batmanlab/bpollack/predictElasticity/data/CHAOS/'

version = None
# version = '2019-10-31_11-33-07'
# version = '2019-11-06_18-27-49'
now = datetime.today().strftime('%Y-%m-%d_%H-%M-%S')
if version is None: version = now
#model_version=f'chaos_notebook_test_{version}'
model_version=version

verbose = True

# kwargs
kwargs = {
    'subj': '001',
    'val': ['002', '003', '101', '102'],
    'def_seq_mode': 'all',
    'model_arch': '3D',
    'loss': 'dice',
    'transform': True,
    
    'step_size': 60,
    'num_epochs': 300,
    'lr': 3e-2,
    'batch_size': 8,
    
    'test_seq_mode': 'all',
    'test_aug': True,
    'train_aug': True,
    'val_aug': False,
    
    'train_color_aug': True,
    'val_color_aug': False,
    'test_color_aug': True,
    
    'bce_weight': 0.2,
    'gamma': 0.3,
    
    'worker_init_fn': 'rand_epoch',
    
    'dry_run': False 
}
print(now)
output = train_seg_model(data_path=data_dir, data_file=data_file, output_path=out_dir, model_version=model_version, verbose=verbose, **kwargs)
                         
#model_path = Path(out_dir, 'trained_models', subj, f'model_{model_version}.pkl')
#model = pytorch_arch_old.GeneralUNet3D(n_layers, 1, model_cap, 1, channel_growth, False, False)
#model_dict = torch.load(model_path, map_location='cuda:0')
##model_dict = torch.load(model_path)
#model_dict = OrderedDict([(key[7:], val) for key, val in model_dict.items()])
#model.load_state_dict(model_dict, strict=True)
#model.eval()

2020-07-09_13-27-31
{'train_trans': True, 'train_clip': True, 'train_aug': True, 'train_sample': 'shuffle', 'val_trans': True, 'val_clip': True, 'val_aug': False, 'val_sample': 'shuffle', 'test_trans': True, 'test_clip': True, 'test_aug': True, 'train_seq_mode': None, 'val_seq_mode': None, 'test_seq_mode': 'all', 'def_seq_mode': 'all', 'seed': 100, 'worker_init_fn': 'rand_epoch', 'subj': '001', 'batch_size': 8, 'model_cap': 16, 'lr': 0.03, 'step_size': 60, 'gamma': 0.3, 'num_epochs': 300, 'dry_run': False, 'coord_conv': False, 'loss': 'dice', 'model_arch': '3D', 'n_layers': 3, 'in_channels': 1, 'out_channels_final': 1, 'channel_growth': False, 'transfer_layer': False, 'bce_weight': 0.2, 'resize': False, 'val': ['002', '003', '101', '102'], 'transform': True}
<xarray.Dataset>
Dimensions:   (sequence: 4, subject: 40, x: 256, y: 256, z: 32)
Coordinates:
  * subject   (subject) object '001' '003' '008' '013' ... '126' '128' '130'
  * sequence  (sequence) object 't1_in' 't1_out' 't2' 'ct'
 

val: bce: 0.103335, dice: 0.389717, loss: 0.246526
saving best model
1m 21s
Epoch 13/299
----------
LR 0.03
train: bce: 0.114616, dice: 0.467159, loss: 0.290887
val: bce: 0.153678, dice: 0.553404, loss: 0.353541
1m 21s
Epoch 14/299
----------
LR 0.03
train: bce: 0.123190, dice: 0.473738, loss: 0.298464
val: bce: 0.216110, dice: 0.562958, loss: 0.389534
1m 20s
Epoch 15/299
----------
LR 0.03
train: bce: 0.112610, dice: 0.444540, loss: 0.278575
val: bce: 0.217674, dice: 0.687545, loss: 0.452609
1m 20s
Epoch 16/299
----------
LR 0.03
train: bce: 0.117259, dice: 0.454579, loss: 0.285919
val: bce: 0.490606, dice: 0.555795, loss: 0.523200
1m 20s
Epoch 17/299
----------
LR 0.03
train: bce: 0.113323, dice: 0.437442, loss: 0.275383
val: bce: 0.495566, dice: 0.664137, loss: 0.579852
1m 20s
Epoch 18/299
----------
LR 0.03
train: bce: 0.109544, dice: 0.433804, loss: 0.271674
val: bce: 0.268576, dice: 0.429606, loss: 0.349091
1m 20s
Epoch 19/299
----------
LR 0.03
train: bce: 0.090917, dice: 0.3832

train: bce: 0.039147, dice: 0.162058, loss: 0.100602
val: bce: 0.049125, dice: 0.164997, loss: 0.107061
saving best model
1m 20s
Epoch 70/299
----------
LR 0.009
train: bce: 0.036986, dice: 0.160240, loss: 0.098613
val: bce: 0.053950, dice: 0.179032, loss: 0.116491
1m 21s
Epoch 71/299
----------
LR 0.009
train: bce: 0.036787, dice: 0.158435, loss: 0.097611
val: bce: 0.057074, dice: 0.185101, loss: 0.121088
1m 21s
Epoch 72/299
----------
LR 0.009
train: bce: 0.036272, dice: 0.153774, loss: 0.095023
val: bce: 0.046394, dice: 0.163839, loss: 0.105117
saving best model
1m 21s
Epoch 73/299
----------
LR 0.009
train: bce: 0.035885, dice: 0.151653, loss: 0.093769
val: bce: 0.044061, dice: 0.162659, loss: 0.103360
saving best model
1m 21s
Epoch 74/299
----------
LR 0.009
train: bce: 0.037767, dice: 0.154153, loss: 0.095960
val: bce: 0.048138, dice: 0.166929, loss: 0.107533
1m 21s
Epoch 75/299
----------
LR 0.009
train: bce: 0.036377, dice: 0.157727, loss: 0.097052
val: bce: 0.044336, dice: 0.1

train: bce: 0.026496, dice: 0.114704, loss: 0.070600
val: bce: 0.040975, dice: 0.142765, loss: 0.091870
1m 20s
Epoch 125/299
----------
LR 0.0026999999999999997
train: bce: 0.025702, dice: 0.116423, loss: 0.071063
val: bce: 0.040899, dice: 0.141233, loss: 0.091066
1m 20s
Epoch 126/299
----------
LR 0.0026999999999999997
train: bce: 0.026304, dice: 0.115595, loss: 0.070950
val: bce: 0.039882, dice: 0.139992, loss: 0.089937
saving best model
1m 21s
Epoch 127/299
----------
LR 0.0026999999999999997
train: bce: 0.031022, dice: 0.121252, loss: 0.076137
val: bce: 0.042098, dice: 0.143738, loss: 0.092918
1m 20s
Epoch 128/299
----------
LR 0.0026999999999999997
train: bce: 0.028090, dice: 0.120237, loss: 0.074163
val: bce: 0.044718, dice: 0.150699, loss: 0.097708
1m 20s
Epoch 129/299
----------
LR 0.0026999999999999997
train: bce: 0.026035, dice: 0.118230, loss: 0.072132
val: bce: 0.045790, dice: 0.154446, loss: 0.100118
1m 21s
Epoch 130/299
----------
LR 0.0026999999999999997
train: bce: 0.02

In [5]:
if len(output) == 4:
    inputs, targets, names, model_pred = output
    print('yes')
else:
    inputs, targets, names = next(iter(output[0]['test']))
    model_pred = None
#if model:
#    #inputs.to('cuda:0')
#    model_pred = torch.zeros_like(inputs)
#    for i in tqdm_notebook(range(1)):
#            model_pred[i, :] = model(inputs[i:i+1, :])
#            model_pred[i, :] = torch.sigmoid(model_pred[i, :])
##             # ones = torch.ones_like(model_pred[i, j, :])
##             # zeros = torch.zeros_like(model_pred[i, j, :])
##             # model_pred[:, i, :] = torch.where(model_pred[:, i, :]>3e-3, ones, zeros)
##     inputs.to('cpu')
# hv_dl_vis_chaos(inputs, targets, names, ['t1_in', 't1_out', 't2'], model_pred)
hv_dl_vis_chaos(inputs, targets, ['1','2','3'], ['seq'], model_pred)
# hv_dl_vis_chaos(inputs, targets, ['1'], ['seq'], model_pred)

yes
(3, 1, 32, 256, 256)
(3, 1, 32, 256, 256)
[['1', '2', '3'], ['seq'], range(0, 32), range(0, -256, -1), range(0, 256)]
(3, 1, 32, 256, 256)
here


## Explore xArray

In [None]:
data_dir = Path('/pghbio/dbmi/batmanlab/bpollack/predictElasticity/data/CHAOS/Train_Sets')
ds_path = Path(data_dir, 'xarray_chaos_v2.nc')
ds = xr.open_dataset(ds_path)
ds = ds.load()
ds

In [None]:
#xr_viewer_chaos(ds)

In [4]:
pkl.dump(output, open('/pghbio/dbmi/batmanlab/bpollack/predictElasticity/data/seg_example_output.pkl', 'wb'))

In [5]:
output = pkl.load(open('/pghbio/dbmi/batmanlab/bpollack/predictElasticity/data/seg_example_output.pkl', 'rb'))

In [11]:
np.random.randint(0,2, size=10)

array([0, 0, 1, 1, 1, 1, 0, 0, 0, 0])

## DICOM Images

In [17]:
a  = {'lol':1,'wut':2, 'huh':['ga', 'ba', 'la']}

In [19]:
for i in a:
    print(i, a[i])

lol 1
wut 2
huh ['ga', 'ba', 'la']


In [30]:
# patient_series_viewer(data_dir, 'DICOMA/PA1/ST0')
patient_series_viewer(data_dir_ct, '1', img_type='DICOM_CHAOS_CT')

/pghbio/dbmi/batmanlab/bpollack/predictElasticity/data/CHAOS/Train_Sets/CT/1/DICOM_anon
direction (1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)
origin (-167.78590393066406, -176.04379272460938, -84.0)
spacing (0.687671, 0.687671, 1.5)
(96, 512, 512)
:Dataset   [xDICOM_anon  CHAOS^CT_SET_1,yDICOM_anon  CHAOS^CT_SET_1,zDICOM_anon  CHAOS^CT_SET_1]   (MRIDICOM_anon  CHAOS^CT_SET_1)



## Nifti Maker

In [None]:
np.random.randint(0,3)

In [None]:
#make_nifti_atlas_v2()

In [15]:
make_nifti_atlas_v3(True, True, None)

HBox(children=(FloatProgress(value=0.0, description='Patients', max=32.0, style=ProgressStyle(description_widt…

/pghbio/dbmi/batmanlab/bpollack/predictElasticity/data/CHAOS/Train_Sets/CT/3/DICOM_anon does not exist.
Cannot make 103_CT
/pghbio/dbmi/batmanlab/bpollack/predictElasticity/data/CHAOS/Train_Sets/MR/6/T1DUAL/DICOM_anon/InPhase does not exist.
Cannot make 006_t1_pre_in_MR
/pghbio/dbmi/batmanlab/bpollack/predictElasticity/data/CHAOS/Train_Sets/MR/6/T1DUAL/DICOM_anon/OutPhase does not exist.
Cannot make 006_t1_pre_out_MR
/pghbio/dbmi/batmanlab/bpollack/predictElasticity/data/CHAOS/Train_Sets/MR/6/T2SPIR/DICOM_anon does not exist.
Cannot make 006_t2_MR
/pghbio/dbmi/batmanlab/bpollack/predictElasticity/data/CHAOS/Train_Sets/CT/13/DICOM_anon does not exist.
Cannot make 113_CT
/pghbio/dbmi/batmanlab/bpollack/predictElasticity/data/CHAOS/Train_Sets/MR/14/T1DUAL/DICOM_anon/InPhase does not exist.
Cannot make 014_t1_pre_in_MR
/pghbio/dbmi/batmanlab/bpollack/predictElasticity/data/CHAOS/Train_Sets/MR/14/T1DUAL/DICOM_anon/OutPhase does not exist.
Cannot make 014_t1_pre_out_MR
/pghbio/dbmi/batmanlab

In [None]:
#patient_series_viewer(data_dir, 'NIFTI/01', img_type='NIFTI')

In [5]:
chaos_viewer(data_dir_new, 'NIFTI/101')

## xarray generation

In [None]:
ls ../data/CHAOS/Train_Sets/MR/NIFTI/

In [3]:
# patients = ["01",  "03",  "08",  "13",  "19",  "21",  "31",  "33",  "36",  "38",
# "02",  "05",  "10",  "15",  "20",  "22",  "32",  "34",  "37",  "39"] 
# patients = ['001', '101']
patients = ['001',  '003',  '008',  '013',  '019',  '021',  '031',  '033',  '036',  '038',  '101',
             '105',  '108',  '114',  '118',  '121',  '123',  '125', '127',  '129', '002',  '005',
             '010',  '015',  '020',  '022',  '032',  '034',  '037',  '039',  '102',  '106',  '110',
             '116',  '119',  '122',  '124',  '126', '128',  '130']
ds = make_xr_dataset_for_chaos(patients, 256, 256, 32, 'chaos_v2')

HBox(children=(FloatProgress(value=0.0, description='Patients', max=40.0, style=ProgressStyle(description_widt…


Writing file disk...


## xarray viewer

In [11]:
xr_viewer(ds, overlay_data='mask')
#ds

In [None]:
# Setup paths

## Slurm Analysis

In [None]:
config_path = Path(out_dir, 'config')

In [None]:
df = pd.DataFrame()
for f in list(config_path.glob('*2019-10-03_12-10-03*.pkl')):
    s_tmp = pd.Series(pd.read_pickle(str(f)), name=f.stem)
    df = df.append(s_tmp, ignore_index=False)

In [None]:
pd.set_option('display.max_columns', None)
df.head()

In [None]:
df['test_dice_mean'] = (df.test_dice_t1_in+df.test_dice_t1_out+df.test_dice_t2)/3.0

In [None]:
df1 = df.query('channel_growth==1').sort_values('test_dice_t1_out').reset_index().rename(columns={'index':'job_name'})
df2 = df.query('channel_growth==0').sort_values('test_dice_t1_out').reset_index().rename(columns={'index':'job_name'})
(df1.hvplot.line(x='index', y='test_dice_t1_out', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight'], label='C Growth')*
df2.hvplot.line(x='index', y='test_dice_t1_out', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight'], label='C Static')).opts(legend_position='top_left', show_legend=True)

In [None]:
df1 = df.query('def_seq_mode=="t1_in"').sort_values('test_dice_t1_out').reset_index().rename(columns={'index':'job_name'})
df2 = df.query('def_seq_mode=="t1_out"').sort_values('test_dice_t1_out').reset_index().rename(columns={'index':'job_name'})
df3 = df.query('def_seq_mode=="t2"').sort_values('test_dice_t1_out').reset_index().rename(columns={'index':'job_name'})
df4 = df.query('def_seq_mode=="random"').sort_values('test_dice_t1_out').reset_index().rename(columns={'index':'job_name'})
(df1.hvplot.line(x='index', y='test_dice_t1_out', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight', 'job_name'], label='t1_in')*
df2.hvplot.line(x='index', y='test_dice_t1_out', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight', 'job_name'], label='t1_out')*
df3.hvplot.line(x='index', y='test_dice_t1_out', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight', 'job_name'], label='t2')*
df4.hvplot.line(x='index', y='test_dice_t1_out', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight', 'job_name'], label='random')
).opts(legend_position='top_left', show_legend=True)

In [None]:
df1 = df.query('bce_weight==0.2').sort_values('best_loss').reset_index().rename(columns={'index':'job_name'})
df3 = df.query('bce_weight==0.5').sort_values('best_loss').reset_index().rename(columns={'index':'job_name'})
df4 = df.query('bce_weight==0.8').sort_values('best_loss').reset_index().rename(columns={'index':'job_name'})
(df1.hvplot.line(x='index', y='best_loss', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight'], label='0.2')*
df3.hvplot.line(x='index', y='best_loss', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight'], label='0.5')*
df4.hvplot.line(x='index', y='best_loss', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight'], label='0.8')
).opts(legend_position='top_left', show_legend=True)

In [None]:
#
df0 = df.query('model_cap==4').sort_values('test_dice_mean').reset_index().rename(columns={'index':'job_name'})
df1 = df.query('model_cap==8').sort_values('test_dice_mean').reset_index().rename(columns={'index':'job_name'})
df2 = df.query('model_cap==12').sort_values('test_dice_mean').reset_index().rename(columns={'index':'job_name'})
df3 = df.query('model_cap==16').sort_values('test_dice_mean').reset_index().rename(columns={'index':'job_name'})
df4 = df.query('model_cap==32').sort_values('test_dice_mean').reset_index().rename(columns={'index':'job_name'})
(df0.hvplot.line(x='index', y='test_dice_mean', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight'], label='4')*
df1.hvplot.line(x='index', y='test_dice_mean', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight'], label='8')*
df2.hvplot.line(x='index', y='test_dice_mean', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight'], label='12')*
df3.hvplot.line(x='index', y='test_dice_mean', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight'], label='16')*
df4.hvplot.line(x='index', y='test_dice_mean', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight'], label='32')
).opts(legend_position='top_left', show_legend=True)

In [None]:
df.groupby(['model_cap', 'def_seq_mode'])['test_dice_t1_out'].mean()

Notes: t1_out seems to outperform all other combos (including random).  Best current overall: t1_out, model_cap=8.  Why would adding additional images decrease performance?

In [None]:
#df.query('model_cap==8 and def_seq_mode=="t1_out" and subj=="01"')

In [None]:
df1 = df.query('n_layers==5').sort_values('test_dice_t1_out').reset_index().rename(columns={'index':'job_name'})
df2 = df.query('n_layers==6').sort_values('test_dice_t1_out').reset_index().rename(columns={'index':'job_name'})
df3 = df.query('n_layers==7').sort_values('test_dice_t1_out').reset_index().rename(columns={'index':'job_name'})
(df1.hvplot.line(x='index', y='test_dice_t1_out', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight'], label='5')*
df2.hvplot.line(x='index', y='test_dice_t1_out', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight'], label='6')*
df3.hvplot.line(x='index', y='test_dice_t1_out', hover_cols=['model_cap', 'def_seq_mode', 'bce_weight'], label='7')
).opts(legend_position='top_left', show_legend=True)

In [None]:
class A:
    def __init__(self):
        self.data = list(range(10))
        print(self.__len__())
    def __len__(self):
        return len(self.data)
    

In [None]:
a = A()

In [None]:
len(a)