# Train baseline model for nodule classification

## Setup environment

### [Optional]: Install dependencies

In [None]:
!pip install "monai[nibabel,skimage,pillow,tqdm]" pytorch_lightning wandb

### [Optional]: Download data

In [None]:
!mkdir -p ../data/full/processed/images ../data/full/processed/masks ../data/full/cache

In [None]:
!gsutil cp gs://lung-cancer-detection/lidc-idri/processed/nodules.zip ../data/full/processed

In [None]:
!gsutil cp gs://lung-cancer-detection/lidc-idri/processed/meta.zip ../data/full/processed

In [None]:
!gsutil cp gs://lung-cancer-detection/lidc-idri/splits.zip ../data/full/

In [None]:
!unzip ../data/full/processed/nodules.zip -d ../data/full/processed

In [None]:
!unzip ../data/full/processed/meta.zip -d ../data/full/processed

In [None]:
!unzip ../data/full/splits.zip -d ../data/full

### [Optional]: Enable module import

In [None]:
!ln -s ./../lung_cancer_detection

## Load modules and configuration

### Import modules

In [1]:
from pathlib import Path

import numpy as np
import wandb
import torch
from monai.networks.nets import DenseNet
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger

from lung_cancer_detection.data.nodule import ClassificationDataModule
from lung_cancer_detection.models.classification import NoduleClassificationModule
from lung_cancer_detection.utils import load_config, load_json, preview_dataset

### Load configuration file

In [2]:
cp = Path("../configs/cloud.yaml").absolute()
cp.exists()

True

In [3]:
config = load_config(cp)
config

{'random_seed': 47,
 'wandb': {'offline': False, 'project': 'lung-cancer-detection'},
 'artifacts': {'data': {'name': 'lidc-idri-raw',
   'version': 'v1',
   'type': 'dataset',
   'description': 'Zipped dataset of all chest CT scans, masks and nodule volumes in npy format, including scan and nodule metadata. Updated with new volume size for nodules.'},
  'train': {'name': 'lidc-train',
   'version': 'v1',
   'type': 'dataset',
   'description': 'List of patient IDs included in the training set'},
  'valid': {'name': 'lidc-valid',
   'version': 'v1',
   'type': 'dataset',
   'description': 'List of patient IDs included in the validation set'},
  'class_model': {'name': 'nodule-classification-model',
   'version': 'v0',
   'type': 'model',
   'description': 'Basic DenseNet for classifying lung nodules regarding their malignancy'},
  'seg_model': {'name': 'nodule-segmentation-model',
   'version': 'v1',
   'type': 'model',
   'description': 'Basic UNet for segmenting lung nodules in chest

## Explore datasets

In [4]:
splits = [
    load_json(Path(config["data"]["split_dir"])/"train.json"), 
    load_json(Path(config["data"]["split_dir"])/"valid.json")
]
label_mapping = ([1,2,3,4,5], [0,0,0,1,1])

In [5]:
dm = ClassificationDataModule(
    data_dir=Path(config["data"]["data_dir"]),
    cache_dir=(Path()/"../data/cache/").absolute(),
    splits=splits,
    min_anns=config["data"]["min_anns"],
    exclude_labels=[],
    label_mapping=label_mapping,
    batch_size=config["data"]["batch_size"]
)

In [6]:
dm.setup()

In [7]:
print(f"Number of training examples: {len(dm.train_ds)}")
print(f"Number of validation examples: {len(dm.val_ds)}")

Number of training examples: 2155
Number of validation examples: 470


In [8]:
train_labels = np.array([image["label"].numpy() for image in dm.train_ds])
valid_labels = np.array([image["label"].numpy() for image in dm.val_ds])

In [9]:
print(f"Percentage of malignant training nodules: {np.mean(train_labels):.4f}")
print(f"Percentage of malignant training nodules: {np.mean(valid_labels):.4f}")

Percentage of malignant training nodules: 0.2158
Percentage of malignant training nodules: 0.2234


## Validation check: overfit one batch

In [10]:
dm = ClassificationDataModule(
    data_dir=Path(config["data"]["data_dir"]),
    cache_dir=(Path()/"../data/cache/").absolute(),
    splits=splits,
    min_anns=config["data"]["min_anns"],
    exclude_labels=[],
    label_mapping=label_mapping,
    batch_size=config["data"]["batch_size"]
)

In [11]:
net = DenseNet(
    spatial_dims=config["class_model"]["spatial_dims"],
    in_channels=config["class_model"]["in_channels"],
    out_channels=config["class_model"]["out_channels"],
)

In [12]:
model = NoduleClassificationModule(net, num_classes=config["class_model"]["num_classes"])

In [13]:
trainer = Trainer(gpus=1, overfit_batches=1, max_epochs=20)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [14]:
trainer.fit(model, datamodule=dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | DenseNet         | 11.2 M
1 | loss      | CrossEntropyLoss | 0     
2 | train_acc | Accuracy         | 0     
3 | val_acc   | Accuracy         | 0     
-----------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.979    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  'You requested to overfit but enabled val/test dataloader shuffling.'
  'You requested to overfit but enabled training dataloader shuffling.'


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

  "Relying on `self.log('val_loss', ...)` to set the ModelCheckpoint monitor is deprecated in v1.2"


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

## Run experiment

### Configure experiment

In [15]:
dm = ClassificationDataModule(
    data_dir=Path(config["data"]["data_dir"]),
    cache_dir=(Path()/"../data/cache/").absolute(),
    splits=splits,
    min_anns=config["data"]["min_anns"],
    exclude_labels=[],
    label_mapping=label_mapping,
    batch_size=config["data"]["batch_size"]
)

In [16]:
net = DenseNet(
    spatial_dims=config["class_model"]["spatial_dims"],
    in_channels=config["class_model"]["in_channels"],
    out_channels=config["class_model"]["out_channels"],
)
model = NoduleClassificationModule(net, num_classes=config["class_model"]["num_classes"], lr=1e-5)

In [17]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mfelixpeters[0m (use `wandb login --relogin` to force relogin)


True

In [18]:
logger = WandbLogger(project=config["wandb"]["project"], job_type="training")

In [19]:
es = EarlyStopping(monitor="val_loss", verbose=True)
mc = ModelCheckpoint(monitor="val_loss", filename="{epoch}-{step}-{val_loss:.4f}-{val_acc:.4f}", verbose=True, save_top_k=1)
callbacks = [es, mc]

In [20]:
trainer = Trainer(
    logger=logger,
    callbacks=callbacks,
    **config["experiment"]
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [21]:
trainer.logger.experiment.use_artifact(config["artifacts"]["train"]["name"] + ":" + config["artifacts"]["train"]["version"])
trainer.logger.experiment.use_artifact(config["artifacts"]["valid"]["name"] + ":" + config["artifacts"]["valid"]["version"])

<Artifact QXJ0aWZhY3Q6NDIzMjE4OA==>

### Train model

In [22]:
trainer.fit(model, datamodule=dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | DenseNet         | 11.2 M
1 | loss      | CrossEntropyLoss | 0     
2 | train_acc | Accuracy         | 0     
3 | val_acc   | Accuracy         | 0     
-----------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.979    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  f'Your {mode}_dataloader has `shuffle=True`, it is best practice to turn'


Training: 0it [00:00, ?it/s]

  "The signature of `Callback.on_train_epoch_end` has changed in v1.3."


Validating: 0it [00:00, ?it/s]

Metric val_loss improved. New best score: 0.684
Epoch 0, global step 16: val_loss reached 0.68355 (best 0.68355), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=0-step=16-val_loss=0.6836-val_acc=0.5596.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 0.677
Epoch 1, global step 33: val_loss reached 0.67685 (best 0.67685), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=1-step=33-val_loss=0.6769-val_acc=0.5681.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.061 >= min_delta = 0.0. New best score: 0.616
Epoch 2, global step 50: val_loss reached 0.61609 (best 0.61609), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=2-step=50-val_loss=0.6161-val_acc=0.7340.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.028 >= min_delta = 0.0. New best score: 0.588
Epoch 3, global step 67: val_loss reached 0.58774 (best 0.58774), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=3-step=67-val_loss=0.5877-val_acc=0.7489.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.022 >= min_delta = 0.0. New best score: 0.566
Epoch 4, global step 84: val_loss reached 0.56614 (best 0.56614), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=4-step=84-val_loss=0.5661-val_acc=0.7638.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 0.554
Epoch 5, global step 101: val_loss reached 0.55362 (best 0.55362), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=5-step=101-val_loss=0.5536-val_acc=0.7766.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.542
Epoch 6, global step 118: val_loss reached 0.54207 (best 0.54207), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=6-step=118-val_loss=0.5421-val_acc=0.7872.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.538
Epoch 7, global step 135: val_loss reached 0.53799 (best 0.53799), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=7-step=135-val_loss=0.5380-val_acc=0.7915.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.533
Epoch 8, global step 152: val_loss reached 0.53264 (best 0.53264), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=8-step=152-val_loss=0.5326-val_acc=0.7915.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 0.526
Epoch 9, global step 169: val_loss reached 0.52583 (best 0.52583), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=9-step=169-val_loss=0.5258-val_acc=0.7894.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.523
Epoch 10, global step 186: val_loss reached 0.52275 (best 0.52275), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=10-step=186-val_loss=0.5228-val_acc=0.7894.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.522
Epoch 11, global step 203: val_loss reached 0.52179 (best 0.52179), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=11-step=203-val_loss=0.5218-val_acc=0.7894.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 12, global step 220: val_loss was not in top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.519
Epoch 13, global step 237: val_loss reached 0.51940 (best 0.51940), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=13-step=237-val_loss=0.5194-val_acc=0.7830.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.518
Epoch 14, global step 254: val_loss reached 0.51755 (best 0.51755), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=14-step=254-val_loss=0.5176-val_acc=0.7872.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.513
Epoch 15, global step 271: val_loss reached 0.51283 (best 0.51283), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=15-step=271-val_loss=0.5128-val_acc=0.7957.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.512
Epoch 16, global step 288: val_loss reached 0.51203 (best 0.51203), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210709_113857-2qwdlv1t/files/lung-cancer-detection/2qwdlv1t/checkpoints/epoch=16-step=288-val_loss=0.5120-val_acc=0.7915.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 17, global step 305: val_loss was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 18, global step 322: val_loss was not in top 1


Validating: 0it [00:00, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.512. Signaling Trainer to stop.
Epoch 19, global step 339: val_loss was not in top 1


### Finish experiment

In [23]:
model_artifact = wandb.Artifact(
    config["artifacts"]["class_model"]["name"],
    type=config["artifacts"]["class_model"]["type"],
    description=config["artifacts"]["class_model"]["description"],
)
model_artifact.add_file(mc.best_model_path)
trainer.logger.experiment.log_artifact(model_artifact)

<wandb.sdk.wandb_artifacts.Artifact at 0x7f32a64f1e10>

In [24]:
wandb.finish()

VBox(children=(Label(value=' 129.67MB of 129.67MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=…

0,1
val_loss,0.51524
val_acc,0.78936
epoch,19.0
trainer/global_step,339.0
_runtime,225.0
_timestamp,1625830962.0
_step,25.0
train_loss,0.11314
train_acc,1.0


0,1
val_loss,██▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▆▇▇▇██████████████
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
trainer/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
_runtime,▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
_timestamp,▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
train_loss,█▆▅▃▂▁
train_acc,▁▆▆▇██
