# CBISDDSM Classification Demo

## Configure environment

In [None]:
# Run this to change the working directory.
# TODO: pip install will resolve this issue...
import os
os.chdir("..")

In [None]:
import numpy as np
import torch
from imagiq.federated.nodes import Node
from imagiq.models import Model
from imagiq.datasets import breast_density
from imagiq.datasets import CBISDDSMDataset, LoadBreastDensity, LoadBreastDensityd
from monai.transforms import (
    Compose,
    LoadImaged,
    ScaleIntensityd,
    SqueezeDimd,
    AddChanneld,
    AsChannelFirstd,
    Lambdad,
    ToTensord,
    Resized,
    RandRotated,
    RandFlipd,
    RandHistogramShiftd,
    RandGaussianNoised,
    RandZoomd, 
    RepeatChanneld, 
    NormalizeIntensityd
)
from monai.networks.nets import densenet121, densenet169
from monai.data import CacheDataset
import sys
import pandas as pd

## Load dataset

CBISDDSM dataset (~100GB) are split into 10 zipped files (4GB each after compression). Specific zip file can be downloaded by setting an index number when creating CBISDDSMDataset, i.e. `CBISDDSMDataset(..., download=[0, 5]` will download the first and sixth zip file. 

In [None]:
train_transform = Compose( [
    LoadImaged( keys='image'),
    Lambdad(keys='image', func=lambda x: x.T),
    AsChannelFirstd('image'),
    Resized('image', spatial_size=(225,225), mode='nearest'),
    ScaleIntensityd('image'),
    NormalizeIntensityd( 'image',
                        subtrahend=[0.449],
                        divisor=[0.226],
                        channel_wise=True
                       ),
    RandFlipd('image', spatial_axis=0, prob=0.5), 
    RandZoomd( 'image', min_zoom=0.9, max_zoom=1.5, prob=0.5, keep_size=True),
    ToTensord( ('image', 'label') ),
])

val_transform = Compose( [
    LoadImaged( keys='image'),
    Lambdad(keys='image', func=lambda x: x.T),
    AsChannelFirstd('image'),
    Resized('image', spatial_size=(225,225), mode='nearest'),
    ScaleIntensityd('image'),
    NormalizeIntensityd( 'image',
                        subtrahend=[0.449],
                        divisor=[0.226],
                        channel_wise=True
                       ),
    ToTensord( ('image', 'label') ),
])

train_ds = CBISDDSMDataset( section='training', transforms=train_transform, download=[0])
val_ds = CBISDDSMDataset( section='validation', transforms=val_transform, download=[0])
test_ds = CBISDDSMDataset( section='test', transforms=val_transform, download=[0])

In [None]:
breast_density.sanity_check( train_ds, val_ds, test_ds )

In [None]:
print( train_ds )
print( val_ds ) 
print( test_ds ) 

In [None]:
import matplotlib.pyplot as plt

plt.subplots(3, 3, figsize=(8, 8))
for i, data in enumerate(train_ds):
    if i==9:
        break
    im, label = data['image'], data['label']
    arr = np.array(im)
    plt.subplot(3, 3, i + 1)
    plt.imshow(arr[0, :, :], cmap='gray', vmin=0, vmax=1)
    plt.axis('off')
    plt.title(label)
plt.tight_layout()
plt.show()

## Create virtual nodes (institutions)

In [None]:
# Create local nodes at ports 8000, 8001, 8002
node1_port = 8000
node2_port = 8001
node3_port = 8002

node1 = Node("localhost", node1_port)  # a virtual computer
node2 = Node("localhost", node2_port)  # another virtual computer
node3 = Node("localhost", node3_port)  

node1.start()
node2.start()
node3.start()

## Establish Connections

In [None]:
node1.connect_to("localhost", node2_port)
node1.connect_to("localhost", node3_port)

node2.connect_to("localhost", node1_port)
node2.connect_to("localhost", node3_port)

node3.connect_to("localhost", node1_port)
node3.connect_to("localhost", node2_port)

## Prepare a model

In [None]:
denseNet121 = densenet121(
    spatial_dims=2,
    in_channels=1,
    out_channels=4,
    pretrained=True
)
denseNet121.class_layers = torch.nn.Sequential( 
    torch.nn.ReLU( inplace=True ), 
    torch.nn.AdaptiveAvgPool2d( output_size=1),
    torch.nn.Flatten(start_dim=1, end_dim=-1), 
    torch.nn.Linear(in_features=1024, out_features=4, bias=True),
    torch.nn.Softmax()
)

denseNet169 = densenet169(spatial_dims=2, in_channels=1, out_channels=4, pretrained=True)
denseNet169.class_layers = torch.nn.Sequential( 
    torch.nn.ReLU( inplace=True ), 
    torch.nn.AdaptiveAvgPool2d( output_size=1),
    torch.nn.Flatten(start_dim=1, end_dim=-1), 
    torch.nn.Linear(in_features=1664, out_features=4, bias=True),
    torch.nn.Softmax()
)

In [None]:
node1.add_model([
    Model(denseNet121, 'hospitalA_DenseNet121'), 
    Model(denseNet169, 'hospitalA_DenseNet169')
])
node2.add_model([
    Model(denseNet121, 'hospitalB_DenseNet121'), 
    Model(denseNet169, 'hospitalB_DenseNet169')
])
node3.add_model([
    Model(denseNet121, 'hospitalC_DenseNet121'), 
    Model(denseNet169, 'hospitalC_DenseNet169')
])

In [None]:
print( 'HospitalA model bench:', node1.model_bench )
print( 'HospitalB model bench:', node2.model_bench )
print( 'HospitalC model bench:', node3.model_bench )

## Train @ Hospital A

In [None]:
# helper loss function
def cross_entorpy_with_onehot(input, target):
    _, labels = target.max(dim=1)
    return torch.nn.CrossEntropyLoss()(input, labels)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print( 'device:', device )

# Models in hospital A
# TODO: this should happen in the node class (e.g. node1.train_all())
history = [None] * len(node1.model_bench)
for i, model in enumerate(node1.model_bench):
    optimizer = torch.optim.Adam( model.net.parameters(), 5e-3)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=0.1, patience=5)
    history[i] = model.train(
        dataset=train_ds,
        loss_function=cross_entorpy_with_onehot,
        optimizer=optimizer,
        epochs=100,
        metrics=["AUC"],
        batch_size=16,
        device=device,
        validation_dataset=val_ds,
        scheduler=scheduler, 
        earlystop={'patience':5, 'delta':0}
)

In [None]:
# show the training result

%matplotlib inline
cols = 4
plt.figure( figsize=(50, 10) ) 

plt.subplot( 1, cols, 1 )
for i in range(len(history)):
    plt.plot( history[i]['loss'] )
plt.title( 'training loss vs epoch' )
plt.legend( [x.name for x in node1.model_bench] )
plt.xlabel('epochs')
plt.ylabel('loss')

plt.subplot( 1, cols, 2 )
for i in range(len(history)):
    plt.plot( history[i]['val_loss'] )
plt.title( 'validation loss vs epoch' )
plt.legend( [x.name for x in node1.model_bench] )
plt.xlabel('epochs')
plt.ylabel('loss')

plt.subplot( 1, cols, 3 )
for i in range(len(history)):
    plt.plot( history[i]['auc'] )
plt.title( 'training average auc vs epoch' )
plt.legend( [x.name for x in node1.model_bench] )
plt.xlabel('epochs')
plt.ylabel('auc')

plt.subplot( 1, cols, 4 )
for i in range(len(history)):
    plt.plot( history[i]['val_auc'] )
plt.title( 'validation average auc vs epoch' )
plt.legend( [x.name for x in node1.model_bench] )
plt.xlabel('epochs')
plt.ylabel('auc')

In [None]:
# Commit the model updates
node1.commit_models('initial commit')

for model in node1.model_bench:
    print(model.name)
    print(model.history, '\n')

In [None]:
# Share models with other hospitals
node1.broadcast_models()

In [None]:
print( 'Hospital B model bench:', node2.model_bench )
print( 'Hospital B model bench:', node3.model_bench )

## Training @ Hospital B

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print( 'device:', device )

# Models in hospital B
# TODO: this should happen in the node class (e.g. node1.train_all())
history = [None] * len(node2.model_bench)
for i, model in enumerate(node2.model_bench):
    optimizer = torch.optim.Adam( model.net.parameters(), 5e-3)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=0.1, patience=5)
    history[i] = model.train(
        dataset=train_ds,
        loss_function=cross_entorpy_with_onehot,
        optimizer=optimizer,
        epochs=10,
        metrics=["AUC"],
        batch_size=16,
        device=device,
        validation_dataset=val_ds,
        scheduler=scheduler
)

In [None]:
# show training result
cols = 4
plt.figure( figsize=(50, 10) ) 

plt.subplot( 1, cols, 1 )
for i in range(len(history)):
    plt.plot( history[i]['loss'] )
plt.title( 'training loss vs epoch' )
plt.legend( [x.name for x in node2.model_bench] )
plt.xlabel('epochs')
plt.ylabel('loss')

plt.subplot( 1, cols, 2 )
for i in range(len(history)):
    plt.plot( history[i]['val_loss'] )
plt.title( 'validation loss vs epoch' )
plt.legend( [x.name for x in node2.model_bench] )
plt.xlabel('epochs')
plt.ylabel('loss')

plt.subplot( 1, cols, 3 )
for i in range(len(history)):
    plt.plot( history[i]['auc'] )
plt.title( 'training average auc vs epoch' )
plt.legend( [x.name for x in node2.model_bench] )
plt.xlabel('epochs')
plt.ylabel('auc')

plt.subplot( 1, cols, 4 )
for i in range(len(history)):
    plt.plot( history[i]['val_auc'] )
plt.title( 'validation average auc vs epoch' )
plt.legend( [x.name for x in node2.model_bench] )
plt.xlabel('epochs')
plt.ylabel('auc')

In [None]:
# Commit the model updates
node2.commit_models('model updates')

for model in node2.model_bench:
    print(model.name)
    print(model.history, '\n')

## Summary

This jupyter notebook demonstrates the use of `CBISDDSMDataset` class for multi-class classification problem wraped with `Model` class, which is contained in `Node`'s model_bench. Also, we demoonstrated a commit functionality where a snapshot of the model can be tracked. Furthermore, we demonstrated the communication feature between different `Node`s where we can share models with other peers.

In [None]:
node1.destroy()
node2.destroy()
node3.destroy()