# Train baseline model for nodule classification

## Setup environment

### [Optional]: Install dependencies

In [None]:
!pip install "monai[nibabel,skimage,pillow,tqdm]" pytorch_lightning wandb

### [Optional]: Download data

In [None]:
!mkdir -p ../data/full/processed/images ../data/full/processed/masks ../data/full/cache

In [None]:
!gsutil cp gs://lung-cancer-detection/lidc-idri/processed/nodules.zip ../data/full/processed

In [None]:
!gsutil cp gs://lung-cancer-detection/lidc-idri/processed/meta.zip ../data/full/processed

In [None]:
!gsutil cp gs://lung-cancer-detection/lidc-idri/splits.zip ../data/full/

In [None]:
!unzip ../data/full/processed/nodules.zip -d ../data/full/processed

In [None]:
!unzip ../data/full/processed/meta.zip -d ../data/full/processed

In [None]:
!unzip ../data/full/splits.zip -d ../data/full

### [Optional]: Enable module import

In [None]:
!ln -s ./../lung_cancer_detection

## Load modules and configuration

### Import modules

In [1]:
from pathlib import Path

import numpy as np
import wandb
import torch
from monai.networks.nets import DenseNet
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger

from lung_cancer_detection.data.nodule import ClassificationDataModule
from lung_cancer_detection.models.classification import NoduleClassificationModule
from lung_cancer_detection.utils import load_config, load_json, preview_dataset

### Load configuration file

In [2]:
cp = Path("../configs/cloud.yaml").absolute()
cp.exists()

True

In [3]:
config = load_config(cp)
config

{'random_seed': 47,
 'wandb': {'offline': False, 'project': 'lung-cancer-detection'},
 'artifacts': {'data': {'name': 'lidc-idri-raw',
   'version': 'v1',
   'type': 'dataset',
   'description': 'Zipped dataset of all chest CT scans, masks and nodule volumes in npy format, including scan and nodule metadata. Updated with new volume size for nodules.'},
  'train': {'name': 'lidc-train',
   'version': 'v1',
   'type': 'dataset',
   'description': 'List of patient IDs included in the training set'},
  'valid': {'name': 'lidc-valid',
   'version': 'v1',
   'type': 'dataset',
   'description': 'List of patient IDs included in the validation set'},
  'class_model': {'name': 'nodule-classification-model',
   'version': 'v0',
   'type': 'model',
   'description': 'Basic DenseNet for classifying lung nodules regarding their malignancy'},
  'seg_model': {'name': 'nodule-segmentation-model',
   'version': 'v1',
   'type': 'model',
   'description': 'Basic UNet for segmenting lung nodules in chest

## Explore datasets

In [4]:
splits = [
    load_json(Path(config["data"]["split_dir"])/"train.json"), 
    load_json(Path(config["data"]["split_dir"])/"valid.json")
]
label_mapping = ([1,2,3,4,5], [0,0,0,1,1])

In [5]:
dm = ClassificationDataModule(
    data_dir=Path(config["data"]["data_dir"]),
    cache_dir=(Path()/"../data/cache/").absolute(),
    splits=splits,
    min_anns=config["data"]["min_anns"],
    exclude_labels=[],
    label_mapping=label_mapping,
    batch_size=config["data"]["batch_size"]
)

In [6]:
dm.setup()

In [7]:
print(f"Number of training examples: {len(dm.train_ds)}")
print(f"Number of validation examples: {len(dm.val_ds)}")

Number of training examples: 2155
Number of validation examples: 470


In [8]:
train_labels = np.array([image["label"].numpy() for image in dm.train_ds])
valid_labels = np.array([image["label"].numpy() for image in dm.val_ds])

In [9]:
print(f"Percentage of malignant training nodules: {np.mean(train_labels):.4f}")
print(f"Percentage of malignant training nodules: {np.mean(valid_labels):.4f}")

Percentage of malignant training nodules: 0.2158
Percentage of malignant training nodules: 0.2234


## Validation check: overfit one batch

In [10]:
dm = ClassificationDataModule(
    data_dir=Path(config["data"]["data_dir"]),
    cache_dir=(Path()/"../data/cache/").absolute(),
    splits=splits,
    min_anns=config["data"]["min_anns"],
    exclude_labels=[],
    label_mapping=label_mapping,
    aug_prob=0.1,
    batch_size=config["data"]["batch_size"]
)

In [11]:
net = DenseNet(
    spatial_dims=config["class_model"]["spatial_dims"],
    in_channels=config["class_model"]["in_channels"],
    out_channels=config["class_model"]["out_channels"],
)

In [12]:
model = NoduleClassificationModule(net, num_classes=config["class_model"]["num_classes"])

In [13]:
trainer = Trainer(gpus=1, overfit_batches=1, max_epochs=20)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [14]:
trainer.fit(model, datamodule=dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | DenseNet         | 11.2 M
1 | loss      | CrossEntropyLoss | 0     
2 | train_acc | Accuracy         | 0     
3 | val_acc   | Accuracy         | 0     
-----------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.979    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  'You requested to overfit but enabled val/test dataloader shuffling.'
  'You requested to overfit but enabled training dataloader shuffling.'


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

  "Relying on `self.log('val_loss', ...)` to set the ModelCheckpoint monitor is deprecated in v1.2"


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

## Run experiment

### Configure experiment

In [88]:
config = load_config(cp)

In [89]:
dm = ClassificationDataModule(
    data_dir=Path(config["data"]["data_dir"]),
    cache_dir=(Path()/"../data/cache/").absolute(),
    splits=splits,
    min_anns=config["data"]["min_anns"],
    exclude_labels=[],
    label_mapping=label_mapping,
    batch_size=config["data"]["batch_size"]
)

In [90]:
net = DenseNet(
    spatial_dims=config["class_model"]["spatial_dims"],
    in_channels=config["class_model"]["in_channels"],
    out_channels=config["class_model"]["out_channels"],
    dropout_prob=config["class_model"]["dropout"],
)
model = NoduleClassificationModule(net, num_classes=config["class_model"]["num_classes"], lr=config["class_model"]["lr"])

In [91]:
wandb.login()

True

In [92]:
logger = WandbLogger(project=config["wandb"]["project"], job_type="training")

In [93]:
es = EarlyStopping(monitor="val_loss", verbose=True)
mc = ModelCheckpoint(monitor="val_loss", filename="{epoch}-{step}-{val_loss:.4f}-{val_acc:.4f}", verbose=True, save_top_k=1)
callbacks = [es, mc]

In [94]:
trainer = Trainer(
    logger=logger,
    callbacks=callbacks,
    **config["experiment"]
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [95]:
trainer.logger.experiment.use_artifact(config["artifacts"]["train"]["name"] + ":" + config["artifacts"]["train"]["version"])
trainer.logger.experiment.use_artifact(config["artifacts"]["valid"]["name"] + ":" + config["artifacts"]["valid"]["version"])

<Artifact QXJ0aWZhY3Q6NDIzMjE4OA==>

### Train model

In [96]:
trainer.fit(model, datamodule=dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | DenseNet         | 11.2 M
1 | loss      | CrossEntropyLoss | 0     
2 | train_acc | Accuracy         | 0     
3 | val_acc   | Accuracy         | 0     
-----------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.979    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Metric val_loss improved. New best score: 0.646
Epoch 0, global step 67: val_loss reached 0.64597 (best 0.64597), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210712_085128-iru4609w/files/lung-cancer-detection/iru4609w/checkpoints/epoch=0-step=67-val_loss=0.6460-val_acc=0.6170.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.075 >= min_delta = 0.0. New best score: 0.571
Epoch 1, global step 135: val_loss reached 0.57072 (best 0.57072), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210712_085128-iru4609w/files/lung-cancer-detection/iru4609w/checkpoints/epoch=1-step=135-val_loss=0.5707-val_acc=0.7574.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 0.552
Epoch 2, global step 203: val_loss reached 0.55154 (best 0.55154), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210712_085128-iru4609w/files/lung-cancer-detection/iru4609w/checkpoints/epoch=2-step=203-val_loss=0.5515-val_acc=0.7723.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.540
Epoch 3, global step 271: val_loss reached 0.53967 (best 0.53967), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210712_085128-iru4609w/files/lung-cancer-detection/iru4609w/checkpoints/epoch=3-step=271-val_loss=0.5397-val_acc=0.7787.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.536
Epoch 4, global step 339: val_loss reached 0.53580 (best 0.53580), saving model to "/home/jupyter/lung-cancer-detection/nbs/wandb/run-20210712_085128-iru4609w/files/lung-cancer-detection/iru4609w/checkpoints/epoch=4-step=339-val_loss=0.5358-val_acc=0.7745.ckpt" as top 1


Validating: 0it [00:00, ?it/s]

Epoch 5, global step 407: val_loss was not in top 1


Validating: 0it [00:00, ?it/s]

Epoch 6, global step 475: val_loss was not in top 1


Validating: 0it [00:00, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.536. Signaling Trainer to stop.
Epoch 7, global step 543: val_loss was not in top 1


### Finish experiment

In [97]:
model_artifact = wandb.Artifact(
    config["artifacts"]["class_model"]["name"],
    type=config["artifacts"]["class_model"]["type"],
    description=config["artifacts"]["class_model"]["description"],
)
model_artifact.add_file(mc.best_model_path)
trainer.logger.experiment.log_artifact(model_artifact)

<wandb.sdk.wandb_artifacts.Artifact at 0x7fe74501c250>

In [98]:
wandb.finish()

VBox(children=(Label(value=' 129.67MB of 129.67MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=…

0,1
train_loss,0.25847
train_acc,0.96875
epoch,7.0
trainer/global_step,543.0
_runtime,144.0
_timestamp,1626080032.0
_step,17.0
val_loss,0.54732
val_acc,0.78298


0,1
train_loss,█▇▆▆▇▅▂▃▃▁
train_acc,▁▄▃▃▁▃▆▆▅█
epoch,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
trainer/global_step,▁▁▂▂▂▃▃▄▄▅▅▅▆▆▇▇▇█
_runtime,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇█
_timestamp,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇█
_step,▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇██
val_loss,█▃▂▁▁▂▁▂
val_acc,▁▇██████
