In [1]:
import os
import os.path as osp
import sys
sys.path.insert(1, '/labs/gevaertlab/users/yyhhli/code/vae/')

In [2]:
# import lidc dataset
from datasets import PATCH_DATASETS
from datasets.utils import sitk2tensor
lidc_train = PATCH_DATASETS['LIDCPatchAugDataset'](root_dir=None, transform=sitk2tensor, split='train')
lidc_val = PATCH_DATASETS['LIDCPatchAugDataset'](root_dir=None, transform=sitk2tensor, split='val')

  from .autonotebook import tqdm as notebook_tqdm
[04-15 14:52:17 | patch_ds:LIDCPatchAugDataset] patient split: train:699, val:88, test:88
[04-15 14:52:17 | patch_ds:LIDCPatchAugDataset] patient split: train:699, val:88, test:88


In [3]:
# get dataloaders
from torch.utils.data.dataloader import DataLoader
lidc_train_dataloader = DataLoader(dataset=lidc_train, batch_size=36, shuffle=False, drop_last=False, num_workers=4, pin_memory=False)
lidc_val_dataloader = DataLoader(dataset=lidc_val, batch_size=36, shuffle=False, drop_last=False, num_workers=4, pin_memory=False)

In [4]:
# import exporter
from evaluations.export import Exporter

exporter = Exporter(log_name="VAE3D32AUG", version=60, 
    dataloaders={"train": lidc_train_dataloader, "val": lidc_val_dataloader}, 
    task_names=["volume"])

[04-15 14:53:23 | instantiator:  <module>] Created a temporary directory at /tmp/tmpofjgu33f
[04-15 14:53:23 | instantiator:    _write] Writing /tmp/tmpofjgu33f/_remote_module_non_sriptable.py
[04-15 14:53:27 | patch_ds:LIDCPatchAugDataset] patient split: train:699, val:88, test:88
[04-15 14:53:27 | patch_ds:LNDbPatch32AugDataset] patient split: train:168, val:22, test:22
[04-15 14:53:28 | patch_stanfordradiogenomics:StanfordRadiogenomicsPatchAugDataset] patient split: train:100, test:43


In [5]:
embeddings, data_names, label_dict = exporter.get_data()

[04-15 14:53:28 | export:  Exporter] initializing embeddings
[04-15 14:53:32 | patch_ds:LIDCPatchAugDataset] patient split: train:699, val:88, test:88
[04-15 14:53:32 | patch_ds:LNDbPatch32AugDataset] patient split: train:168, val:22, test:22
[04-15 14:53:32 | patch_stanfordradiogenomics:StanfordRadiogenomicsPatchAugDataset] patient split: train:100, test:43


 | 54.0 secs.


In [6]:
# select smallest and largest 5% nodules
import numpy as np
volume = np.array(label_dict["volume"]['train']) # numpy array
smallest_5_idx = volume.argsort()[:int(len(volume)*0.05)]
largest_5_idx = volume.argsort()[-int(len(volume)*0.05):]

In [7]:
np.mean(volume[largest_5_idx] - volume[smallest_5_idx])

7444.667388783828

In [8]:
embeddings_train = embeddings["train"]
embeddings_train = np.array(embeddings_train)
embeddings_train.shape

(5304, 4096)

In [9]:
# get their embeddings
smallest_5_embeddings = embeddings_train[smallest_5_idx]
largest_5_embeddings = embeddings_train[largest_5_idx]

In [10]:
data_names["train"][:5]

['LIDC-IDRI-0001.84.Aug00',
 'LIDC-IDRI-0001.85.Aug00',
 'LIDC-IDRI-0001.86.Aug00',
 'LIDC-IDRI-0001.87.Aug00',
 'LIDC-IDRI-0002.88.Aug00']

In [11]:
# calculate the difference
smallest_5_embeddings_mean = smallest_5_embeddings.mean(axis=0)
largest_5_embeddings_mean = largest_5_embeddings.mean(axis=0)
diff_vector = largest_5_embeddings_mean - smallest_5_embeddings_mean

In [12]:
# select another batch of nodules (36 nodules) from train dataset
median_idx = volume.argsort()[int(len(volume)*0.5)-18: int(len(volume)*0.5)+18]
median_embeddings = embeddings_train[median_idx]

In [13]:
# generate images
from evaluations.evaluator import ReconEvaluator
vis_dir="/labs/gevaertlab/users/yyhhli/code/vae/EDA/image_synthesize_experiment/results/"
evaluator = ReconEvaluator(vis_dir=vis_dir, log_name='VAE3D32AUG', version=60)

[04-15 15:15:53 | patch_ds:LIDCPatchAugDataset] patient split: train:699, val:88, test:88
[04-15 15:15:54 | patch_ds:LNDbPatch32AugDataset] patient split: train:168, val:22, test:22
[04-15 15:15:54 | patch_stanfordradiogenomics:StanfordRadiogenomicsPatchAugDataset] patient split: train:100, test:43


In [44]:
# experiment 1: enlarge and shrink the nodules
import torch
half_vector = torch.from_numpy(diff_vector[:2048]).type(torch.FloatTensor).to(evaluator.module.device) / 1.732
# enlarge
enlarged_embeddings = torch.from_numpy(median_embeddings[:, :2048]) + half_vector
# shrink
shrinked_embeddings = torch.from_numpy(median_embeddings[:, :2048]) - half_vector

In [45]:
median_images = evaluator.generate(torch.from_numpy(median_embeddings[:, :2048]).type(torch.float))
enlarged_images = evaluator.generate(enlarged_embeddings.type(torch.float))
shrinked_images = evaluator.generate(shrinked_embeddings.type(torch.float))

In [46]:
# visualize all the images
from utils.visualization import vis3d_tensor
vis3d_tensor(median_images, save_path=osp.join(vis_dir, "test_nodules_median.jpeg"))
vis3d_tensor(enlarged_images, save_path=osp.join(vis_dir, "test_nodules_enlarged.jpeg"))
vis3d_tensor(shrinked_images, save_path=osp.join(vis_dir, "test_nodules_shrinked.jpeg"))

In [47]:
# experiment 2: small to large nodules and large to small nodules
small_embeddings = smallest_5_embeddings[:36,:2048]
large_embeddings = largest_5_embeddings[:36,:2048]

In [48]:
# convert large to small and small to large using vector
s2l_embeddings = torch.from_numpy(small_embeddings) + half_vector * 1.732
l2s_embeddings = torch.from_numpy(large_embeddings) - half_vector * 1.732

In [49]:
# generate images
small_images = evaluator.generate(torch.from_numpy(small_embeddings).type(torch.float))
large_images = evaluator.generate(torch.from_numpy(large_embeddings).type(torch.float))
s2l_images = evaluator.generate(s2l_embeddings.type(torch.float))
l2s_images = evaluator.generate(l2s_embeddings.type(torch.float))

In [50]:
# plot all images
vis3d_tensor(small_images, save_path=osp.join(vis_dir, "test_nodules_small.jpeg"))
vis3d_tensor(large_images, save_path=osp.join(vis_dir, "test_nodules_large.jpeg"))
vis3d_tensor(s2l_images, save_path=osp.join(vis_dir, "test_nodules_s2l.jpeg"))
vis3d_tensor(l2s_images, save_path=osp.join(vis_dir, "test_nodules_l2s.jpeg"))
