## Configure EnvironmentÂ¶

In [None]:
import os
os.chdir("..")

In [None]:
import imagiq.federated as iqf
import numpy as np
import torch
from imagiq.models import Model, load_model
from imagiq.datasets import NIHDataset
from monai.transforms import (
    Compose,
    LoadImaged,
    ScaleIntensityd,
    SqueezeDimd,
    AddChanneld,
    AsChannelFirstd,
    Lambdad,
    ToTensord,
    Resized,
    RandRotated,
    RandFlipd,
    RandHistogramShiftd,
    RandZoomd,
    RandGaussianNoised,
    CastToTyped
)

from monai.networks.nets import se_resnet50, se_resnet101, densenet121, densenet169, densenet201
from monai.data import CacheDataset
import sys
import matplotlib.pyplot as plt

#not neccessary needed
import gc
import time

## Create and start local nodes

In [None]:
port = 18010
node = iqf.nodes.Node("localhost", port)
node.start()

## Load data

In [None]:
train_ds = NIHDataset( "training", download=[0])
test_ds = NIHDataset("test", download=[0])
val_ds = NIHDataset("validation", download=[0])

def rand_idx(i, j):
    while True:
        idx = np.random.randint(j)
        if idx != i:
            return idx

def generate_data(dataset):
    tmps = [[] for _ in range(5)]
    for i, data in enumerate(dataset):
        if data["label"][1]:  # if atelectasis, more likely go in node1
            data["label"] = 1-data["label"][0]
            if np.random.rand() < 0.8:
                tmps[0].append(data)
            else:
                tmps[rand_idx(0, 5)].append(data)
        elif data["label"][3]:  # effusion, more likely go in node2
            data["label"] = 1-data["label"][0]
            if np.random.rand() < 0.8:
                tmps[1].append(data)
            else:
                tmps[rand_idx(1, 5)].append(data)
        elif data["label"][4]:  # infiltration, more likely go in node3
            data["label"] = 1-data["label"][0]
            if np.random.rand() < 0.8:
                tmps[2].append(data)
            else:
                tmps[rand_idx(2, 5)].append(data)
        elif data["label"][6]:  # nodule, more likely go in node4
            data["label"] = 1-data["label"][0]
            if np.random.rand() < 0.8:
                tmps[3].append(data)
            else:
                tmps[rand_idx(3, 5)].append(data)
        else:  # for other findings 
            data["label"] = 1-data["label"][0]
            tmps[rand_idx(-1, 5)].append(data)
    return tmps

np.random.seed(123)

train_tmps = generate_data(train_ds)
test_tmps  = generate_data(test_ds)
val_tmps = generate_data(val_ds)

In [None]:
train_transforms = Compose([
    LoadImaged("image"), 
    Lambdad("image", func=lambda x: np.mean(x, axis=2) if len(x.shape) == 3 else x),
    AsChannelFirstd("image"),
    AddChanneld("image"),
    ScaleIntensityd("image"),
    Resized("image", spatial_size=(224,224), mode="nearest"),
    RandRotated("image", range_x=15, prob=0.5, keep_size=True),
    RandFlipd("image", spatial_axis=0, prob=0.5),
    RandZoomd("image", min_zoom=0.9, max_zoom=1.1, prob=0.5, keep_size=True)
])

val_transforms = Compose([
    LoadImaged("image"),
    Lambdad("image", func=lambda x: np.mean(x, axis=2) if len(x.shape) == 3 else x),
    AsChannelFirstd("image"),
    AddChanneld("image"),
    ScaleIntensityd("image"),
    Resized("image", spatial_size=(224,224), mode="nearest")
])

biasIdx = 0
train_dataset = CacheDataset(train_tmps[biasIdx], train_transforms)
test_dataset  = CacheDataset(test_tmps[biasIdx], val_transforms)
val_dataset  = CacheDataset(val_tmps[biasIdx], val_transforms)

## Prepare model bench

In [None]:
node.add_model([
    Model(se_resnet50(spatial_dims=2, in_channels=1, num_classes=2), 'model_1'),
    Model(se_resnet101(spatial_dims=2, in_channels=1, num_classes=2), 'model_2'),
    Model(se_resnet50(spatial_dims=2, in_channels=1, num_classes=2), 'model_3'),
    Model(se_resnet101(spatial_dims=2, in_channels=1, num_classes=2), 'model_4'),
    Model(se_resnet50(spatial_dims=2, in_channels=1, num_classes=2), 'model_5')
])

In [None]:
#check the model that the node has received
node.model_bench

In [None]:
#use the following if your machine has a low GPU memory
gc.collect()
torch.cuda.empty_cache()

In [None]:
## Train models
histories = [None] * len(node.model_bench)

for idx, model in enumerate( node.model_bench ):
    print( model.name ) 
    optimizer = torch.optim.Adam( model.net.parameters(), 5e-3)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=0.1, patience=5)    
    histories[idx] = model.train(
        train_dataset,
        torch.nn.CrossEntropyLoss(),
        optimizer,
        epochs=1,
        metrics=["AUC"],
        batch_size=5,
        device="cuda:0",
        validation_dataset=val_dataset,
        scheduler=scheduler
    )

    gc.collect()
    torch.cuda.empty_cache()
    time.sleep(5)

In [None]:
plt.figure( figsize=(20, 6) ) 

plt.subplot( 1, 2, 1)
for idx in range( len(histories) ):
    plt.plot( histories[idx]['val_loss'] )
plt.legend( [model.name for model in node.model_bench] )
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title( 'epoch vs validation loss' )

plt.subplot( 1, 2, 2 )
for idx in range( len(histories) ):
    plt.plot( histories[idx]['val_auc'] )
plt.legend( [model.name for model in node.model_bench] )
plt.xlabel('epoch')
plt.ylabel('auc')
plt.title( 'epoch vs validation auc' )

## Create Ensemble with validation dataset
**size**: is the number of model you want to combine<br>
**models**:default is all models in model bench, but you can pass in different models<br>
**diversity_measure**: default to be auc. You can also write gd for generalized_diversity.<br>
**voted_method**:votes aggregating measurement. "majority" and "probability" available<br>

In [None]:
ensemble = node.create_ensemble(
    size=3,
    dataset=val_dataset,
    test_dataset=test_dataset,
    models = node.model_bench,
    vote_method="probability"
)

## Ensemble result on validation data set

### Create Ensemble model
**percentage**: default is 0. Only works when you have *gd* for diversity_measure, which means the percentage that the diveristy will takes in the selection standard. For example, with percentage equals to 0.2, the equation would be 0.8*auc +0.2*diversity <br>
**method**: "normal":create an ensemble by trying all combinations, "hill_climbing" create an ensemble with Optimized method, takes shorted time, may lead to a comparable worst result <br>

In [None]:
ensemble.create(percentage=0,method="normal")

### Return Ensemble model by hill climbing method

Similar to all combinations
**plot_chart**: Default to false. True to plot a chart of auc and the time it compares.

In [None]:
ensemble.create(percentage=0,plot_chart=True,method="hill_climbing")

## Check the Result Records

**uid**: the unique id for that record<br>
**time**: timestamp of the time you get the result<br>
**size**: the number of model you selected<br>
**function_name**: through which function you get this result,"evaluate on test_dataset","hill_climbing","ensemble_size_k"<br>
**diversity_measure**,**voted_measure** -- whatever you use in that method<br>
**model_bench**: the model pool you entered -- model_uids<br>
**selected_models**: A dictionary. This is the result we get with two section,"uid" and "name". You can use uid for future prediction<br>

**diversity_score**,**ensemble_val_auc**,**ensemble_val_acc**:the result evaluation you have -- for validations <br>
**ensemble_test_auc**,**ensemble_test_acc**,**pred**:the result evaluation you have -- for test dataset<br>

In [None]:
ensemble.history

## Ensemble result on test data set

**batch size**: default with 1<br>
**ensemble_measure**: similar to above description<br>
**test**: Default is False - predict with validation data, True to work with test data<br>

In [None]:
#update the best result to result_model
ensemble.set_best_ensemble()

In [None]:
ensemble.predict(test_dataset)

## Save all result

In [None]:
ensemble.save("/Result")