# Pain in the Net - Laplacian Pyramid Translation Network (LPTN)
Application of Laplacian Pyramid Translation Network (LPTN) to domain adaptation of diffusion MRI.


Code by:

Tyler Spears - tas6hh@virginia.edu

Dr. Tom Fletcher

---

Based on the following work(s):

* `J. Liang, H. Zeng, and L. Zhang, “High-Resolution Photorealistic Image Translation in Real-Time: A Laplacian Pyramid Translation Network,” 2021, pp. 9392–9400. Accessed: Aug. 26, 2021. [Online]. Available: https://openaccess.thecvf.com/content/CVPR2021/html/Liang_High-Resolution_Photorealistic_Image_Translation_in_Real-Time_A_Laplacian_Pyramid_Translation_CVPR_2021_paper.html
`


## Imports & Environment Setup

### Imports

In [None]:
# Automatically re-import project-specific modules.
%load_ext autoreload
%autoreload 1

# imports
import collections
import functools
import io
import datetime
import time
import math
import itertools
import os
import shutil
import pathlib
import copy
import pdb
import inspect
import random
import subprocess
import sys
import warnings
from pathlib import Path
import typing
import zipfile

import ants
import dipy
import dipy.core
import dipy.reconst
import dipy.reconst.dti
import dipy.segment.mask
import dipy.viz
import dipy.viz.regtools
import dotenv

# visualization libraries
%matplotlib inline
import matplotlib as mpl
import mpl_toolkits
import matplotlib.pyplot as plt
import seaborn as sns

import IPython

# Try importing GPUtil for printing GPU specs.
# May not be installed if using CPU only.
try:
    import GPUtil
except ImportError:
    warnings.warn("WARNING: Package GPUtil not found, cannot print GPU specs")
from tabulate import tabulate
from IPython.display import display, Markdown
import ipyplot

# Data management libraries.
import nibabel as nib
import natsort
from natsort import natsorted
import addict
from addict import Addict
import box
from box import Box
import pprint
from pprint import pprint as ppr

# Computation & ML libraries.
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import torchio
import pytorch_lightning as pl
import monai

import skimage
import skimage.feature
import skimage.filters
import skimage.measure
import scipy

%aimport pitn
import pitn


plt.rcParams.update({"figure.autolayout": True})
plt.rcParams.update({"figure.facecolor": [1.0, 1.0, 1.0, 1.0]})

# Set print options for ndarrays/tensors.
np.set_printoptions(suppress=True, edgeitems=2, threshold=100, linewidth=88)
torch.set_printoptions(
    sci_mode=False, edgeitems=2, threshold=100, linewidth=88, profile="short"
)

In [None]:
# Update notebook's environment variables with direnv.
# This requires the python-dotenv package, and direnv be installed on the system
# This will not work on Windows.
# NOTE: This is kind of hacky, and not necessarily safe. Be careful...
# Libraries needed on the python side:
# - os
# - subprocess
# - io
# - dotenv

# Form command to be run in direnv's context. This command will print out
# all environment variables defined in the subprocess/sub-shell.
command = f"direnv exec {os.getcwd()} /usr/bin/env"
# Run command in a new subprocess.
proc = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True, cwd=os.getcwd())
# Store and format the subprocess' output.
proc_out = proc.communicate()[0].strip().decode("utf-8")
# Use python-dotenv to load the environment variables by using the output of
# 'direnv exec ...' as a 'dummy' .env file.
dotenv.load_dotenv(stream=io.StringIO(proc_out), override=True);

In [None]:
# torch setup
# allow for CUDA usage, if available
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
# keep device as the cpu
# device = torch.device('cpu')
print(device)

### Specs Recording

In [None]:
%%capture --no-stderr cap
# Capture output and save to log. Needs to be at the *very first* line of the cell.
# Watermark
%load_ext watermark
%watermark --author "Tyler Spears" --updated --iso8601  --python --machine --iversions --githash
if torch.cuda.is_available():

    # GPU information
    # Taken from
    # <https://www.thepythoncode.com/article/get-hardware-system-information-python>.
    # If GPUtil is not installed, skip this step.
    try:
        gpus = GPUtil.getGPUs()
        print("=" * 50, "GPU Specs", "=" * 50)
        list_gpus = []
        for gpu in gpus:
            # get the GPU id
            gpu_id = gpu.id
            # name of GPU
            gpu_name = gpu.name
            driver_version = gpu.driver
            cuda_version = torch.version.cuda
            # get total memory
            gpu_total_memory = f"{gpu.memoryTotal}MB"
            gpu_uuid = gpu.uuid
            list_gpus.append(
                (
                    gpu_id,
                    gpu_name,
                    driver_version,
                    cuda_version,
                    gpu_total_memory,
                    gpu_uuid,
                )
            )

        print(
            tabulate(
                list_gpus,
                headers=(
                    "id",
                    "Name",
                    "Driver Version",
                    "CUDA Version",
                    "Total Memory",
                    "uuid",
                ),
            )
        )
    except NameError:
        print("CUDA Version: ", torch.version.cuda)

else:
    print("CUDA not in use, falling back to CPU")

In [None]:
# cap is defined in an ipython magic command
print(cap)

### Data Variables & Definitions Setup

In [None]:
# Set up directories
data_dir = pathlib.Path(os.environ["DATA_DIR"])

# Directories that contain processed DTI data. The `*_processed_data_dir` should hold
# the directory containing all subjects indexed by the eventual `possible_ids`.
processed_data_dir = pathlib.Path(os.environ["WRITE_DATA_DIR"])
hcp_processed_data_dir = (
    processed_data_dir / "hcp/derivatives/mean-downsample/scale-2.00mm"
)
clinic_processed_data_dir = (
    processed_data_dir / "oasis3/derivatives/mean-downsample/scale-orig"
)
assert hcp_processed_data_dir.exists() and clinic_processed_data_dir.exists()
results_dir = pathlib.Path(os.environ["RESULTS_DIR"])
assert results_dir.exists()
tmp_results_dir = pathlib.Path(os.environ["TMP_RESULTS_DIR"])
assert tmp_results_dir.exists()

### Experiment Logging Setup

In [None]:
# tensorboard experiment logging setup.
EXPERIMENT_NAME = "test_oasis3_unproc_250_epochs"

ts = datetime.datetime.now().replace(microsecond=0).isoformat()
# Break ISO format because many programs don't like having colons ':' in a filename.
ts = ts.replace(":", "_")
experiment_name = ts + "__" + EXPERIMENT_NAME
run_name = experiment_name
print(experiment_name)
# experiment_results_dir = results_dir / experiment_name

# Create temporary directory for results directory, in case experiment does not finish.
tmp_dirs = list(filter(lambda s: not str(s).startswith("."), tmp_results_dir.glob("*")))

# Only keep up to N tmp results.
n_tmp_to_keep = 3
if len(tmp_dirs) > (n_tmp_to_keep - 1):
    print(f"More than {n_tmp_to_keep} temporary results, culling to the most recent")
    tmps_to_delete = natsorted([str(tmp_dir) for tmp_dir in tmp_dirs])[
        : -(n_tmp_to_keep - 1)
    ]
    for tmp_dir in tmps_to_delete:
        shutil.rmtree(tmp_dir)
        print("Deleted temporary results directory ", tmp_dir)

experiment_results_dir = tmp_results_dir / experiment_name
# Final target directory, to be made when experiment is complete.
final_experiment_results_dir = results_dir / experiment_name

In [None]:
# Pass this object into the pytorchlightning Trainer object, for easier logging within
# the training/testing loops.
pl_logger = pl.loggers.TensorBoardLogger(
    tmp_results_dir,
    name=experiment_name,
    version="",
    log_graph=False,
    default_hp_metric=False,
)
# Use the lower-level logger for logging histograms, images, etc.
logger = pl_logger.experiment

# Create a separate txt file to log streams of events & info besides parameters & results.
log_txt_file = Path(logger.log_dir) / "log.txt"
with open(log_txt_file, "a+") as f:
    f.write(f"Experiment Name: {experiment_name}\n")
    f.write(f"Timestamp: {ts}\n")
    # cap is defined in an ipython magic command
    f.write(f"Environment and Hardware Info:\n {cap}\n\n")

### Experiment Parameters

In [None]:
# Parameters
params = Box(default_box=True)

# Data params.
params.num_channels = 6
params.hcp.num_subjects = 14
params.clinic.num_subjects = 9
params.clamp_percentiles = (0.01, 99.99)
# params.data_scale_range = None
# Scale input data by the valid values of each channel of the DTI.
# I.e., Dx,x in [0, 1], Dx,y in [-1, 1], Dy,y in [0, 1], Dy,z in [-1, 1], etc.
params.data_scale_range = ((0, -1, 0, -1, -1, 0), (1, 1, 1, 1, 1, 1))

# Network params.
params.num_laplace_high_freq = 3
params.discriminator_downscale_factors = [1, 2, 4]
params.lambda_adversary_loss = 10
params.lambda_reconst_loss_weight = 50
params.use_grad_penalty = False
params.lambda_grad_penalty = 100
# Set the init function to None to change to pytorch default initialization.
# params.net_init.f = None
params.net_init.mean = 0.0
params.net_init.std = 0.02

# Adam optimizer kwargs for each network.
params.optim.gen_kwargs.lr = 2e-4
params.optim.gen_kwargs.betas = (0.5, 0.99)
params.optim.discriminator_kwargs.lr = 2e-4
params.optim.discriminator_kwargs.betas = (0.5, 0.99)

# Training, validation, & testing params
# Patch size must be a factor of 2**num_laplace_high_freq
params.train.patch_size = (32, 32, 32)
params.batch_size = 32
params.samples_per_subj_per_epoch = 1000
params.max_epochs = 250
params.train.hcp_num_subjects = 13
params.val.hcp_num_subjects = 1

# Create these assert statements because having an invalid number of train/val subjects
# may not be caught in the loading below and cause a silent runtime error.
assert params.train.hcp_num_subjects <= params.hcp.num_subjects
assert params.val.hcp_num_subjects <= params.hcp.num_subjects

with open(log_txt_file, "a+") as f:
    f.write(pprint.pformat(params.to_dict()) + "\n")

In [None]:
# Optional weight & bias initialization of conv layers.
@torch.no_grad()
def conv_init_normal(m, mean, std):
    if isinstance(m, (torch.nn.Conv3d, torch.nn.ConvTranspose3d)):
        torch.nn.init.normal_(m.weight, mean=mean, std=std)
        torch.nn.init.normal_(m.bias, mean=mean, std=std)


if params.net_init.to_dict():
    f = functools.partial(
        conv_init_normal, mean=params.net_init.mean, std=params.net_init.std
    )
    params.net_init.f = f

## Data Loading

In [None]:
# Transformation pipeline.
# The input to the laplacian pyramid must be divisible by 2 for the number of high-
# frequency levels in the pyramid.
laplace_pyramid_divisible_by_shape = 2**params.num_laplace_high_freq

pre_process_pipeline = monai.transforms.Compose(
    [
        monai.transforms.CropForegroundd(["dti", "mask"], source_key="mask", margin=3),
        monai.transforms.DivisiblePadd(
            ["dti", "mask"], laplace_pyramid_divisible_by_shape
        ),
        # Data are already clipped by percentile (see the pipeline.txt file in each
        # subj directory for details).
        #         pitn.transforms.ClipPercentileTransformd(
        #             "dti",
        #             params.clamp_percentiles[0],
        #             params.clamp_percentiles[1],
        #             nonzero=True,
        #             channel_wise=True,
        #         ),
        monai.transforms.ToTensord("dti", dtype=torch.float),
        monai.transforms.ToTensord("mask", dtype=torch.bool),
    ]
)

### Load and Pre-Process HCP Data

In [None]:
# Find data directories for each subject.
hcp_subj_dirs: dict = dict()

possible_ids = [
    "sub-397154",
    "sub-224022",
    "sub-140117",
    "sub-751348",
    "sub-894774",
    "sub-156637",
    "sub-227432",
    "sub-303624",
    "sub-185947",
    "sub-810439",
    "sub-753251",
    "sub-644246",
    "sub-141422",
    "sub-135528",
    "sub-103010",
    "sub-700634",
]

## Sub-set the chosen participants for dev and debugging!
selected_ids = random.sample(possible_ids, params.hcp.num_subjects)
if params.hcp.num_subjects < len(possible_ids):
    warnings.warn(
        "WARNING: Sub-selecting participants for dev and debugging. "
        + f"Subj IDs selected: {selected_ids}"
    )
# ### A nested warning! For debugging only.
# warnings.warn("WARNING: Mixing training and testing subjects")
# selected_ids.append(selected_ids[0])
# ###
##

selected_ids = natsorted(selected_ids)

for subj_id in selected_ids:
    hcp_subj_dirs[subj_id] = hcp_processed_data_dir / f"{subj_id}"
    assert hcp_subj_dirs[subj_id].exists()
ppr(hcp_subj_dirs)

In [None]:
# Log to file and experiment.
with open(log_txt_file, "a+") as f:
    f.write(f"Selected HCP Subjects: {selected_ids}\n")

logger.add_text("hcp_subjs", pprint.pformat(selected_ids))

In [None]:
# Data loading and processing loop.
hcp_subj_data = list()
# Data reader object for NIFTI files.
nib_reader = monai.data.NibabelReader(as_closest_canonical=False)

# Directory prefixes for each image to be read.
dti_file_prefix = "dti"
# Mask was saved alongside the DTI nifti file.
mask_file_prefix = dti_file_prefix

for subj_id, subj_dir in hcp_subj_dirs.items():

    subj_data = dict()
    subj_data["subj_id"] = subj_id

    # Load the DTIs
    img_dir = subj_dir / dti_file_prefix
    img_filename = list(img_dir.glob(f"{subj_id}*dti.nii.gz"))
    # Make sure the glob pattern only matches one file.
    assert len(img_filename) == 1
    img_filename = img_filename[0]
    nib_img = nib_reader.read(img_filename)
    img, metadata = nib_reader.get_data(nib_img)
    subj_data["dti"] = img
    # The default metadata key name for monai.
    subj_data["dti_meta_dict"] = metadata

    # Load masks
    img_dir = subj_dir / mask_file_prefix
    img_filename = list(img_dir.glob(f"{subj_id}*mask.nii.gz"))
    # Make sure the glob pattern only matches one file.
    assert len(img_filename) == 1
    img_filename = img_filename[0]
    nib_img = nib_reader.read(img_filename)
    img, metadata = nib_reader.get_data(nib_img)
    subj_data["mask"] = img
    # The default metadata key name for monai.
    subj_data["mask_meta_dict"] = metadata

    # Pre-process subject DTIs.
    subj_data = pre_process_pipeline(subj_data)

    # Perform scaling of input data?
    if params.data_scale_range is not None:
        scaler = pitn.data.norm.DTIMinMaxScaler(
            params.data_scale_range[0],
            params.data_scale_range[1],
            dim=(1, 2, 3),
            channel_size=params.num_channels,
        )
        scaled = scaler.scale(subj_data["dti"] * subj_data["mask"], stateful=True)
        subj_data["dti"] = scaled * subj_data["mask"]
        subj_data["scaler"] = scaler

    hcp_subj_data.append(subj_data)

# Create dataset with all HCP subjects included.
hcp_subj_dataset = monai.data.Dataset(hcp_subj_data)

### Load & Pre-Process Clinical Data

In [None]:
# # UVA data loading.
# # Find data directories for each subject.
# clinic_subj_dirs: dict = dict()

# possible_ids = ["001"]

# ## Sub-set the chosen participants for dev and debugging!
# selected_ids = random.sample(possible_ids, params.clinic.num_subjects)
# if params.clinic.num_subjects < len(possible_ids):
#     warnings.warn(
#         "WARNING: Sub-selecting participants for dev and debugging. "
#         + f"Subj IDs selected: {selected_ids}"
#     )
# # ### A nested warning! For debugging only.
# # warnings.warn("WARNING: Mixing training and testing subjects")
# # selected_ids.append(selected_ids[0])
# # ###
# ##

# selected_ids = natsorted(selected_ids)

# for subj_id in selected_ids:
#     clinic_subj_dirs[subj_id] = (
#         clinic_processed_data_dir / f"derivatives/diffusion/sub-{subj_id}/ses-01"
#     )
#     assert clinic_subj_dirs[subj_id].exists()
# ppr(clinic_subj_dirs)

In [None]:
# OASIS3 Dataset

# Find data directories for each subject.
clinic_subj_dirs: dict = dict()

possible_ids = [
    "sub-OAS30188_MR_d3844",
    "sub-OAS30375_MR_d5792",
    "sub-OAS30558_MR_d2148",
    "sub-OAS30643_MR_d0280",
    "sub-OAS30685_MR_d0032",
    "sub-OAS30762_MR_d0043",
    "sub-OAS30770_MR_d1201",
    "sub-OAS30944_MR_d0089",
    "sub-OAS31018_MR_d0041",
    "sub-OAS31157_MR_d4924",
]

## Sub-set the chosen participants for dev and debugging!
selected_ids = random.sample(possible_ids, params.clinic.num_subjects)
if params.clinic.num_subjects < len(possible_ids):
    warnings.warn(
        "WARNING: Sub-selecting participants for dev and debugging. "
        + f"Subj IDs selected: {selected_ids}"
    )
# ### A nested warning! For debugging only.
# warnings.warn("WARNING: Mixing training and testing subjects")
# selected_ids.append(selected_ids[0])
# ###
##

selected_ids = natsorted(selected_ids)

for subj_id in selected_ids:
    clinic_subj_dirs[subj_id] = clinic_processed_data_dir / f"{subj_id}"
    assert clinic_subj_dirs[subj_id].exists()
ppr(clinic_subj_dirs)

In [None]:
# Log to file and experiment.
with open(log_txt_file, "a+") as f:
    f.write(f"Selected Clinic-Scanned Subjects: {selected_ids}\n")

logger.add_text("clinic_data_subjs", pprint.pformat(selected_ids))

In [None]:
# Data loading and processing loop.
clinic_subj_data = list()
# Data reader object for NIFTI files.
nib_reader = monai.data.NibabelReader(as_closest_canonical=False)

# Directory prefixes for each image to be read.
dti_file_prefix = "dti"
# Mask is specific to the DTI, so it is located alongside the DTI nifti file.
mask_file_prefix = dti_file_prefix

for subj_id, subj_dir in clinic_subj_dirs.items():
    subj_data = dict()
    subj_data["subj_id"] = subj_id

    # Load the DTIs
    img_dir = subj_dir / dti_file_prefix
    img_filename = list(img_dir.glob(f"{subj_id}*dti.nii.gz"))
    # Make sure the glob pattern only matches one file.
    assert len(img_filename) == 1
    img_filename = img_filename[0]
    nib_img = nib_reader.read(img_filename)
    img, metadata = nib_reader.get_data(nib_img)
    subj_data["dti"] = img
    # The default metadata key name for monai.
    subj_data["dti_meta_dict"] = metadata

    # Load masks
    img_dir = subj_dir / mask_file_prefix
    img_filename = list(img_dir.glob(f"{subj_id}*mask.nii.gz"))
    # Make sure the glob pattern only matches one file.
    assert len(img_filename) == 1
    img_filename = img_filename[0]
    nib_img = nib_reader.read(img_filename)
    img, metadata = nib_reader.get_data(nib_img)
    subj_data["mask"] = img
    # The default metadata key name for monai.
    subj_data["mask_meta_dict"] = metadata

    # Pre-process subject DTIs.
    subj_data = pre_process_pipeline(subj_data)

    # Perform scaling of input data?
    if params.data_scale_range is not None:
        scaler = pitn.data.norm.DTIMinMaxScaler(
            params.data_scale_range[0],
            params.data_scale_range[1],
            dim=(1, 2, 3),
            channel_size=params.num_channels,
        )
        scaled = scaler.scale(subj_data["dti"] * subj_data["mask"], stateful=True)
        subj_data["dti"] = scaled * subj_data["mask"]
        subj_data["scaler"] = scaler

    clinic_subj_data.append(subj_data)

# Create dataset with all "clinical quality" subjects included.
clinic_subj_dataset = monai.data.Dataset(clinic_subj_data)

## Setup of Training Objects

In [None]:
# Designate HCP subjects for training, validation, and testing.
hcp_ids = [s["subj_id"] for s in hcp_subj_data]
random.shuffle(hcp_ids)
hcp_train_ids = hcp_ids[: params.train.hcp_num_subjects]
hcp_val_ids = hcp_ids[: params.val.hcp_num_subjects]

# Designate clinic subject IDs for training.
clinic_ids = [s["subj_id"] for s in clinic_subj_data]
random.shuffle(clinic_ids)
# Just select all clinic IDs.
clinic_train_ids = clinic_ids[: params.clinic.num_subjects]

In [None]:
# Set up dataset and data loading objects.
# ! The samplers created here will cause the source domain patches and the target domain
# patches to *not* be aligned in any way; this is intentional for unpaired I2I.

# Set up HCP scan data.
# Training set.
source_patch_ds = list()
for subj_dict in filter(lambda s: s["subj_id"] in hcp_train_ids, hcp_subj_data):
    source_patch_ds.append(
        pitn.data.MaskFilteredPatchDataset3d(
            subj_dict["dti"], mask=subj_dict["mask"], patch_size=params.train.patch_size
        )
    )

source_train_dataset = torch.utils.data.ConcatDataset(source_patch_ds)
source_train_sampler = pitn.samplers.ConcatDatasetBalancedRandomSampler(
    source_train_dataset.datasets,
    max_samples_per_dataset=params.samples_per_subj_per_epoch,
)

source_train_loader = monai.data.DataLoader(
    source_train_dataset,
    sampler=source_train_sampler,
    batch_size=params.batch_size,
    pin_memory=True,
    num_workers=7,
    persistent_workers=True,
)

# Validation set.
source_vol_ds = list()
for subj_dict in filter(lambda s: s["subj_id"] in hcp_val_ids, hcp_subj_data):
    source_vol_ds.append(
        subj_dict["dti"][
            None,
        ]
    )

source_val_dataset = torch.utils.data.ConcatDataset(source_vol_ds)

source_val_loader = monai.data.DataLoader(
    source_val_dataset,
    batch_size=1,
    shuffle=False,
    pin_memory=True,
    num_workers=0,
    #     persistent_workers=True,
)

In [None]:
# Set up clinic scan data.
target_patch_ds = list()
for subj_dict in filter(lambda s: s["subj_id"] in clinic_train_ids, clinic_subj_data):
    target_patch_ds.append(
        pitn.data.MaskFilteredPatchDataset3d(
            subj_dict["dti"], mask=subj_dict["mask"], patch_size=params.train.patch_size
        )
    )

target_train_dataset = torch.utils.data.ConcatDataset(target_patch_ds)

# Calculate the number of clinic samples per subject to match the total length of the
# source domain dataset.
num_clinic_samples_per_img = np.floor(
    len(source_train_dataset.datasets)
    * params.samples_per_subj_per_epoch
    / len(target_train_dataset.datasets)
).astype(int)

target_train_sampler = pitn.samplers.ConcatDatasetBalancedRandomSampler(
    target_train_dataset.datasets,
    max_samples_per_dataset=num_clinic_samples_per_img,
)

target_train_loader = monai.data.DataLoader(
    target_train_dataset,
    sampler=target_train_sampler,
    batch_size=params.batch_size,
    pin_memory=True,
    num_workers=7,
    persistent_workers=True,
)

## Model Definition

In [None]:
class ClinicMatchGAN(pl.LightningModule):
    def __init__(
        self,
        num_channels: int,
        gen_num_high_freq: int = 3,
        discriminator_downsample_factors=[1, 2, 4],
        lambda_adversary_loss: float = 1,
        lambda_grad_penalty: float = 1,
        lambda_reconst_loss_weight=1,
        gen_optim_kwargs=dict(),
        discriminator_optim_kwargs=dict(),
        weight_init_fn=None,
    ):
        super().__init__()

        self.save_hyperparameters()
        if self.hparams.lambda_grad_penalty is None:
            self.hparams.use_grad_penalty = False
        else:
            self.hparams.use_grad_penalty = True
        self.generator = pitn.nn.gan.generative.LPTN(
            num_channels, num_high_freq_levels=self.hparams.gen_num_high_freq
        )

        self.discriminator = pitn.nn.gan.adversarial.MultiDiscriminator(
            num_channels, self.hparams.discriminator_downsample_factors
        )

        if weight_init_fn is not None:
            self.generator = self.generator.apply(weight_init_fn)
            self.discriminator = self.discriminator.apply(weight_init_fn)

        self.val_psnr_metric = monai.metrics.PSNRMetric(max_val=1.0)
        self.val_viz_slice = None
        self.val_viz_range = None

        self.plain_log = Box(default_box=True, loss_gen=dict(), loss_discrim=dict())
        self.plain_log.discrim_preds.real = dict()
        self.plain_log.discrim_preds.fake = dict()

    def forward(self, x):
        return self.generator(x)

    def reconstruction_loss(self, y_source, y_pred):
        return F.mse_loss(y_source, y_pred, reduction="mean")

    def ls_adversarial_loss(self, sample, label: int):

        sample_pred = self.discriminator(sample)
        loss = F.mse_loss(
            sample_pred, torch.ones_like(sample_pred) * label, reduction="mean"
        )

        return loss

    def grad_penalty(
        self,
        real_samples: torch.Tensor,
        fake_samples: torch.Tensor,
    ):

        batch_size = real_samples.shape[0]
        avg_weight_rand = torch.rand(batch_size, *((1,) * (real_samples.ndim - 1))).to(
            real_samples
        )
        # For each sample in the batch, find a randomly-weighted linear interpolation
        # between the real and generated/fake samples.
        weighted_interpolate = (avg_weight_rand * real_samples) + (
            (1 - avg_weight_rand) * fake_samples
        )
        # Need to require grad for the gradient calculation.
        weighted_interp_samples = weighted_interpolate.requires_grad_(True)
        pred_interp_samples = self.discriminator(weighted_interp_samples)

        grad = torch.autograd.grad(
            outputs=pred_interp_samples,
            inputs=weighted_interp_samples,
            grad_outputs=torch.ones_like(pred_interp_samples),
            create_graph=True,
            only_inputs=True,
            retain_graph=True,
        )[0]

        grad = grad.view(batch_size, -1)
        # Calculate L2 norm manually so a small epsilon can be used to avoid NaNs.
        eps = 1e-7
        penalty = torch.mean((torch.sqrt(torch.sum((grad**2), dim=1) + eps) - 1) ** 2)

        return penalty

    def ls_gan_grad_penalty(self, real_samples, noise_scale: float, k=1):
        """Implements another form of grad penalty from Kodali, et. al., 2017, used in
        Mao, et. al., 2018 (2nd LS-GAN paper).
        """
        batch_size = real_samples.shape[0]

        # Technically, the original paper specified the noise as a multi-variate Gaussian
        # with a diagonal covariance matrix filled with the same value. So, the
        # 'c' value in that formulation would scale up the *variance*, while the
        # equivalent 1D Normal distribution here specifies the *standard deviation*.
        # It probably doesn't matter.
        noise_dist = torch.distributions.Normal(0.0, noise_scale)
        noise = noise_dist.sample(real_samples.shape).to(real_samples)
        noisy_samples = real_samples + noise
        # Need to require grad for the gradient calculation.
        noisy_samples = noisy_samples.requires_grad_(True)

        pred_samples = self.discriminator(noisy_samples)

        grad = torch.autograd.grad(
            outputs=pred_samples,
            inputs=noisy_samples,
            grad_outputs=torch.ones_like(pred_samples),
            create_graph=True,
            only_inputs=True,
            retain_graph=True,
        )[0]

        grad = grad.view(batch_size, -1)
        # Calculate L2 norm manually so a small epsilon can be used to avoid NaNs.
        eps = 1e-7
        penalty = torch.mean((torch.sqrt(torch.sum((grad**2), dim=1) + eps) - k) ** 2)

        return penalty

    def training_step(self, batch, batch_idx, optimizer_idx):

        source_samples = batch["source"]
        target_samples = batch["target"]

        # Optimizer index decides whether this step updates the generator or discriminator.
        # Update generator network.
        if optimizer_idx == self._GENERATOR_OPTIMIZER_IDX:
            translated_samples = self.generator(source_samples)

            l_g_reconstruct = self.reconstruction_loss(
                source_samples, translated_samples
            )
            l_g_reconstruct *= self.hparams.lambda_reconst_loss_weight
            self.log(
                "train_loss_terms/gen_reconstruct",
                l_g_reconstruct.detach(),
            )

            l_g_adversarial = self.ls_adversarial_loss(
                translated_samples,
                label=0,
            )
            l_g_adversarial *= self.hparams.lambda_adversary_loss * 1 / 2
            self.log(
                "train_loss_terms/gen_adversarial",
                l_g_adversarial.detach(),
            )

            # Combine terms into final loss.
            loss_gen = l_g_reconstruct + l_g_adversarial
            self.log("train/gen_loss", loss_gen.detach())
            # Log loss and set up return dictionary.
            self.plain_log.loss_gen[self.global_step] = float(
                loss_gen.detach().cpu().item()
            )

            tqdm_dict = {"loss_gen": loss_gen.detach()}
            output = collections.OrderedDict(
                {"loss": loss_gen, "progress_bar": tqdm_dict, "log": tqdm_dict}
            )

        ### Update discriminator network.
        elif optimizer_idx == self._DISCRIMINATOR_OPTIMIZER_IDX:

            # Real images.
            loss_real = self.ls_adversarial_loss(target_samples, label=1) / 2
            self.log("train_loss_terms/discrim_real_loss", loss_real.detach())

            # Translated (i.e., fake) images
            # We aren't updating the generator weights here, so there's no need to
            # keep track of the generator's gradients.
            with torch.no_grad():
                translated_samples = self.generator(source_samples)
            loss_fake = self.ls_adversarial_loss(translated_samples, label=-1) / 2
            self.log("train_loss_terms/discrim_fake_loss", loss_fake.detach())

            # Noise scaling found by taking `~ 0.1176 x abs diff between max and min`
            # (of values of the input tensors, here the samples from the target domain).
            if self.hparams.use_grad_penalty:
                grad_penalty = self.ls_gan_grad_penalty(
                    target_samples, noise_scale=0.2352
                )
                grad_penalty *= self.hparams.lambda_grad_penalty
                self.log(
                    "train_loss_terms/discrim_grad_penalty",
                    grad_penalty.detach(),
                )
            else:
                grad_penalty = torch.zeros_like(loss_fake)

            # Combine terms into final loss value.
            loss_discrim = loss_fake + loss_real + grad_penalty
            self.log("train/discrim_loss", loss_discrim.detach())

            # Record loss and set up return dictionary.
            self.plain_log.loss_discrim[self.global_step] = float(
                loss_discrim.detach().cpu().item()
            )
            tqdm_dict = {"loss_discrim": loss_discrim.detach()}
            output = collections.OrderedDict(
                {"loss": loss_discrim, "progress_bar": tqdm_dict, "log": tqdm_dict}
            )

        else:
            raise RuntimeError(f"ERROR: Invalid optimizer index {optimizer_idx}")
        # Record discriminator predictions for later plotting.
        if self.global_step % 50 == 0:
            with torch.no_grad():
                real_preds = self.discriminator(target_samples)
                fake_preds = self.discriminator(translated_samples)
                self.plain_log.discrim_preds.real[self.global_step] = torch.clone(
                    real_preds.detach().cpu()
                )
                self.plain_log.discrim_preds.fake[self.global_step] = torch.clone(
                    fake_preds.detach().cpu()
                )

        return output

    def validation_step(self, batch, batch_idx):

        source_sample = batch
        source_translate = self.generator(source_sample)
        reconstruction_loss = (
            self.hparams.lambda_reconst_loss_weight
            * self.reconstruction_loss(source_sample, source_translate)
        )
        adv_loss = (
            self.hparams.lambda_adversary_loss
            * self.ls_adversarial_loss(source_translate, label=0)
            / 2
        )

        self.log("val/reconstruct_loss", reconstruction_loss.detach())
        self.log("val/adversarial_loss", adv_loss.detach())

        psnr_loss = self.val_psnr_metric(y_pred=source_translate, y=source_sample)
        self.log("val/psnr", psnr_loss.detach())

        # Only plot subject translation if batch size of the validation step is 1.
        if source_sample.shape[0] == 1:
            plot_vol = source_translate[0].cpu()
            plot_vol = torch.clip(
                plot_vol,
                *tuple(torch.quantile(plot_vol, q=torch.as_tensor([0.001, 0.999]))),
            )
            plot_vol = monai.transforms.utils.rescale_array(
                plot_vol, minv=0.0, maxv=255.0
            )
            monai.visualize.img2tensorboard.add_animated_gif(
                image_tensor=plot_vol,
                writer=self.logger.experiment,
                tag="val_subj",
                max_out=1,
                scale_factor=1.0,
                global_step=self.global_step,
            )

            # Log a slice of the source, translated, and the abs. error.
            fig = plt.figure(dpi=100)

            if self.val_viz_slice is None:
                self.val_viz_slice = (
                    slice(None),
                    slice(None),
                    (source_translate.shape[-1] // 2) + 2,
                )
            dtis_to_plot = [
                source_sample[0, 0].cpu().numpy(),
                source_translate[0, 0].cpu().numpy(),
                torch.abs(source_sample[0, 0] - source_translate[0, 0]).cpu().numpy(),
            ]
            dtis_to_plot = list(map(lambda v: v[self.val_viz_slice], dtis_to_plot))

            if self.val_viz_range is None:
                vmin = np.min(np.stack(dtis_to_plot))
                vmax = np.max(np.stack(dtis_to_plot))
                self.val_viz_range = (vmin, vmax)
            else:
                vmin, vmax = self.val_viz_range
            cmap = "gray"
            grid = mpl_toolkits.axes_grid1.ImageGrid(
                fig,
                111,
                nrows_ncols=(1, 3),
                axes_pad=0.1,
                share_all=True,
                cbar_mode="single",
                cbar_location="bottom",
                cbar_pad=0.1,
            )

            map_names = ["Source", "Translated", "Abs. Error"]
            for ax, label, dti in zip(grid, map_names, dtis_to_plot):
                im = ax.imshow(
                    np.rot90(dti), interpolation=None, cmap=cmap, vmin=vmin, vmax=vmax
                )
                ax.set_xlabel(label)
                ax.set_xticks([])
                ax.set_yticks([])
                ax.set_xticklabels([])
                ax.set_yticklabels([])

            grid.cbar_axes[0].colorbar(im)

            self.logger.experiment.add_figure("val_slice", fig, self.global_step)

        return psnr_loss

    def configure_optimizers(self):

        opt_gen = torch.optim.Adam(
            self.generator.parameters(), **self.hparams.gen_optim_kwargs
        )
        opt_discriminator = torch.optim.Adam(
            self.discriminator.parameters(), **self.hparams.discriminator_optim_kwargs
        )

        self._GENERATOR_OPTIMIZER_IDX = 0
        self._DISCRIMINATOR_OPTIMIZER_IDX = 1
        return [opt_gen, opt_discriminator], []

## Model Training

In [None]:
# Training loop
train_start_timestamp = datetime.datetime.now().replace(microsecond=0)
# Explicitly set whether or not to use grad penalty.
lambda_grad_penalty = params.lambda_grad_penalty if params.use_grad_penalty else None

# Instantiate model.
model = ClinicMatchGAN(
    params.num_channels,
    gen_num_high_freq=params.num_laplace_high_freq,
    discriminator_downsample_factors=params.discriminator_downscale_factors,
    lambda_adversary_loss=params.lambda_adversary_loss,
    lambda_grad_penalty=lambda_grad_penalty,
    lambda_reconst_loss_weight=params.lambda_reconst_loss_weight,
    gen_optim_kwargs=params.optim.gen_kwargs,
    discriminator_optim_kwargs=params.optim.discriminator_kwargs,
    weight_init_fn=params.net_init.f,
)

with open(log_txt_file, "a+") as f:
    f.write(f"Model overview: {model}\n")

# Create trainer object.
trainer = pl.Trainer(
    gpus=1,
    max_epochs=params.max_epochs,
    logger=pl_logger,
    multiple_trainloader_mode="max_size_cycle",
    log_every_n_steps=min([50, len(source_train_loader), len(target_train_loader)]),
    check_val_every_n_epoch=3,
    #     progress_bar_refresh_rate=10,
    terminate_on_nan=True,
)

# Many warnings are produced here, so it's better for my sanity (and worse in every other
# way) to just filter and ignore them...
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    # with torch.autograd.detect_anomaly():
    trainer.fit(
        model,
        train_dataloaders={
            "source": source_train_loader,
            "target": target_train_loader,
        },
        val_dataloaders=source_val_loader,
    )

train_duration = datetime.datetime.now().replace(microsecond=0) - train_start_timestamp
print(f"Train duration: {train_duration}")
with open(log_txt_file, "a+") as f:
    f.write("\n")
    f.write(f"Training time: {train_duration}\n")
    f.write(
        f"\t{train_duration.days} Days, "
        + f"{train_duration.seconds // 3600} Hours,"
        + f"{(train_duration.seconds // 60) % 60} Minutes,"
        + f'{train_duration.seconds % 60} Seconds"\n'
    )

In [None]:
# Save out trained model
trainer.save_checkpoint(str(experiment_results_dir / "model.ckpt"))

## Result Visualization

In [None]:
enable_fig_save = True

In [None]:
# Set up visualization objects.
viz_data = Box(default_box=True)

# Find a common size for all volumes
spatial_shapes = list()
for subj in itertools.chain(hcp_subj_dataset, clinic_subj_dataset):
    spatial_shapes.append(tuple(subj["dti"].shape[-3:]))
target_spatial_shape = tuple(np.max(np.asarray(spatial_shapes), axis=0))
padder = monai.transforms.SpatialPad(
    torch.Size(target_spatial_shape), method="symmetric", mode="replicate"
)
cropper = monai.transforms.CenterSpatialCrop(torch.Size(target_spatial_shape))

# Designate static image from clinical data as the target for registration.
clinic_viz_subj_idx = 0
# Register the b0 images, then apply the transformation on each DTI channel.
static_arr = clinic_subj_dataset[clinic_viz_subj_idx]["dti"].cpu().numpy()
static_arr = cropper(padder(static_arr))[0]
regist_static_img = ants.from_numpy(static_arr, spacing=(2.0, 2.0, 2.0))
regist_static_img.set_origin(ants.get_center_of_mass(regist_static_img))
# The transform must stay rigid, no scaling or shearing. The translation network is almost
# certainly sensitive to feature scale.
ants_regist_kwargs = {"type_of_transform": "DenseRigid"}

In [None]:
with torch.no_grad():

    # Grab HCP data for viz.
    for subj in hcp_subj_dataset:
        data = Box(default_box=True)

        dti = subj["dti"].cpu().numpy()

        # Pad and crop DTIs to be the same shape.
        dti = np.asarray(cropper(padder(dti)))
        # Perform registration to the selected clinical volume.
        ants_dti_xx = ants.from_numpy(dti[0], spacing=(2, 2, 2))
        dti_center = ants.get_center_of_mass(ants_dti_xx)
        ants_dti_xx.set_origin(dti_center)
        xx_regist = ants.registration(
            regist_static_img, ants_dti_xx, **ants_regist_kwargs
        )

        dti_registered = list()
        for i_ch in range(len(dti)):
            dti_chan = dti[i_ch]
            dti_chan = ants.from_numpy(dti_chan, spacing=(2, 2, 2), origin=dti_center)
            dti_chan = ants.apply_transforms(
                regist_static_img, dti_chan, xx_regist["fwdtransforms"]
            )
            dti_chan = dti_chan.numpy()
            dti_registered.append(dti_chan)

        dti = torch.from_numpy(np.stack(dti_registered))

        data.dti = subj["scaler"].descale(dti.cpu()).numpy()

        mask = cropper(padder(subj["mask"].float()))
        ants_mask = ants.from_numpy(mask[0], spacing=(2, 2, 2), origin=dti_center)
        ants_mask = ants.apply_transforms(
            regist_static_img, ants_mask, xx_regist["fwdtransforms"]
        )
        mask = ants_mask.numpy().astype(bool)
        data.mask = mask

        translated = model.generator(
            dti.to(model.device)[
                None,
            ]
        )[0].cpu()
        translated = subj["scaler"].descale(translated).numpy()
        data.translated = translated

        # Store discriminator prediction.
        pred_class = model.discriminator(
            dti.to(model.device)[
                None,
            ]
        )[0]
        data.pred_class = pred_class.cpu().numpy()

        viz_data.hcp[str(subj["subj_id"])] = data

    # Grab clinic data for viz.
    for subj in clinic_subj_dataset:

        dti = subj["dti"]
        # Pad and crop DTIs to be the same shape.
        dti = torch.from_numpy(cropper(padder(dti)))
        data.dti = subj["scaler"].descale(dti.cpu()).numpy()

        data.mask = cropper(padder(subj["mask"].float())).astype(bool)

        # Store discriminator prediction.
        pred_class = model.discriminator(
            dti.to(model.device)[
                None,
            ]
        )[0]
        data.pred_class = pred_class.cpu().numpy()

        viz_data.clinic[str(subj["subj_id"])] = data

In [None]:
hcp_viz_subj_idx = 1
hcp_viz_subj_id = list(viz_data.hcp.keys())[hcp_viz_subj_idx]
hcp_viz_subj = viz_data.hcp[hcp_viz_subj_id]

clinic_viz_subj_id = list(viz_data.clinic.keys())[clinic_viz_subj_idx]
clinic_viz_subj = viz_data.clinic[clinic_viz_subj_id]

# 6-channel slice for visualization.
# Grab from roughly the center, offset by a few mms.
viz_slice = (
    slice(None),
    (hcp_viz_subj.dti.shape[1] // 2) + 4,
    slice(None),
    slice(None),
)


def abs_error_map(y, y_pred):

    y = torch.as_tensor(y)
    y_pred = torch.as_tensor(y_pred)

    error = torch.abs(y - y_pred)

    return error.cpu().numpy()

### DTI Comparisons - All Channels

In [None]:
channel_names = [
    r"$D_{x,x}$",
    r"$D_{x,y}$",
    r"$D_{y,y}$",
    r"$D_{x,z}$",
    r"$D_{y,z}$",
    r"$D_{z,z}$",
]

In [None]:
# Display all 6 DTIs for the following:
# Source domain
# Translated
# Target domain
# Absolute error between source and translated


cmap = "gray"

row_names = [
    "Source HCP",
    "Translated",
    "Target Clinic",
    "Abs Error\nSource-Translated",
]

dti_rows = [
    hcp_viz_subj.dti,
    hcp_viz_subj.translated,
    clinic_viz_subj.dti,
    abs_error_map(hcp_viz_subj.dti, hcp_viz_subj.translated),
]

dti_rows = list(map(lambda a: a[viz_slice], dti_rows))

nrows = len(dti_rows)
ncols = len(channel_names)

# Don't take the absolute max and min values, as there exist some extreme (e.g., > 3
# orders of magnitude) outliers. Instead, take some percente quantile.
# Reshape and concatenate the dtis in order to compute the quantiles of images with
# different shapes (e.g., the low-res input patch).
max_dti = np.quantile(
    np.concatenate([di.reshape(6, -1) for di in dti_rows], axis=1), 1.0
)
min_dti = np.quantile(
    np.concatenate([di.reshape(6, -1) for di in dti_rows], axis=1), 0.0
)

# nrows = len(dtis)
# ncols = len(channel_names)

fig = plt.figure(figsize=(8, 5), dpi=160)

grid = mpl.gridspec.GridSpec(
    nrows,
    ncols,
    figure=fig,
    hspace=0.05,
    wspace=0.05,
)
axs = list()
max_subplot_height = 0
for i_row in range(nrows):
    dti = dti_rows[i_row]

    for j_col in range(ncols):
        ax = fig.add_subplot(grid[i_row, j_col])
        ax.imshow(
            np.rot90(dti[j_col]),
            cmap=cmap,
            interpolation=None,
            vmin=min_dti,
            vmax=max_dti,
        )
        if ax.get_subplotspec().is_first_col():
            ax.set_ylabel(row_names[i_row], size="xx-small")
        if ax.get_subplotspec().is_last_row():
            ax.set_xlabel(channel_names[j_col])
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        # Update highest subplot to put the `suptitle` later on.
        max_subplot_height = max(
            max_subplot_height, ax.get_position(original=False).get_points()[1, 1]
        )
        axs.append(ax)

color_norm = mpl.colors.Normalize(vmin=min_dti, vmax=max_dti)
fig.colorbar(
    mpl.cm.ScalarMappable(norm=color_norm, cmap=cmap),
    ax=axs,
    location="right",
    fraction=0.1,
    pad=0.03,
)
plt.suptitle(
    "DTI and Abs. Error, Normalized over All Images",
    y=max_subplot_height + 0.015,
    verticalalignment="bottom",
)
if enable_fig_save:
    plt.savefig(experiment_results_dir / "DTI_Channel_w_Abs_Err.png")

## Diffusion Maps

In [None]:
fig, axs = plt.subplots(ncols=3, sharey=True, dpi=160)  # , figsize=(2, 6))

diffusion_viz_slice = (slice(None), slice(None), (target_spatial_shape[-1] // 2) + 8)

map_names = ["Source", "Translated", "Target"]
for ax, label, dti, mask in zip(
    axs,
    map_names,
    [hcp_viz_subj.dti, hcp_viz_subj.translated, clinic_viz_subj.dti],
    [hcp_viz_subj.mask, hcp_viz_subj.mask, clinic_viz_subj.mask],
):
    diff_dir_map = pitn.viz.direction_map(dti * mask)
    # Set channels last for matplotlib
    diff_dir_map = diff_dir_map.transpose(1, 2, 3, 0)

    ax.imshow(np.rot90(diff_dir_map[diffusion_viz_slice]), interpolation=None)
    ax.set_xlabel(label)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xticklabels([])
    ax.set_yticklabels([])

if enable_fig_save:
    plt.savefig(experiment_results_dir / "Diff_Color_Maps.png")

---

## End Experiment

In [None]:
pl_logger.experiment.flush()
# Close tensorboard logger.
# Don't finalize if the experiment was for debugging.
if "debug" not in EXPERIMENT_NAME.casefold():
    pl_logger.finalize("success")
    # Experiment is complete, move the results directory to its final location.
    if experiment_results_dir != final_experiment_results_dir:
        print("Moving out of tmp location")
        experiment_results_dir = experiment_results_dir.rename(
            final_experiment_results_dir
        )
        log_txt_file = experiment_results_dir / log_txt_file.name

---