# Feature Extraction Experiments

In [1]:
import os
import pathlib
from dataclasses import dataclass
import tensorflow as tf
import pandas as pd
import wandb

from bcd.model.network.shainnet import ShainNetConfig, ShainNetFactory
from bcd.model.repo import ModelRepo
from bcd.model.base import DenseNet
from bcd.model.experiment import Experiment

pd.set_option('display.max_rows',999)

## Configuration

In [2]:
# Project Parameters
%env "WANDB_NOTEBOOK_NAME" "experiments.ipynb"
datasets = {"Development":{"name": "CBIS-DDSM_10","directory": "data/image/1_final/training_10/training/"},
            "Stage": {"name": "CBIS-DDSM_30","directory": "data/image/1_final/training_30/training/"},
            "Production": {"name": "CBIS-DDSM","directory": "data/image/1_final/training/training/"},
            }
mode = "Development"
project = f"Breast-Cancer-Detection-{mode}" 

# Experiment Parameters
force = False  # Whether to retrain if the model and config already exists.

# Training Config
metrics = ['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
loss = "binary_crossentropy"
epochs = 100 # Maximum number of epochs to train, subject to any early stopping callback.
learning_rate = [1e-3, 1e-4, 1e-5]
training_config = {"loss": loss, "epochs": epochs, "learning_rate": learning_rate}

# Network Config
activation = "sigmoid" # Network configuration common to all networks.
    
# Fine Tuning Config
ft_fine_tune = False
ft_iceblocks = 10 # Number of blocks of layers to potentially thaw.
ft_sessions= 5  # Number of iceblocks to actually unthaw
ft_learning_rate_init = 1e-5 # Learning rate at the beginning of fine tuning.
ft_learning_rate_end = 1e-8 # The lowest learning rate subject to learning rate decay during fine tuning.
ft_epochs_init: int = 5  # Number of epochs to train first session
ft_epochs_end: int = 50  # Number of epochs to train last session.
fine_tune_config = {"fine_tune": ft_fine_tune, "iceblocks": ft_iceblocks, "sessions": ft_sessions, 
                    "learning_rate_init": ft_learning_rate_init, 
                    "learning_rate_end": ft_learning_rate_end,
                    "epochs_init": ft_epochs_init,
                    "epochs_end": ft_epochs_end,
                    }

# Dataset params
dataset = datasets[mode]["name"]
batch_size = 64 if mode == "Production" else 32
input_shape = (224,224,3)
output_shape = 1
train_dir = pathlib.Path(datasets[mode]["directory"]).with_suffix('') 
dataset_config = {"dataset": dataset, "batch_size": batch_size, "input_shape": input_shape, "output_shape": output_shape}

# Checkpoint Config
ckpt_monitor = "val_accuracy"
ckpt_verbose = 1
ckpt_save_best_only = True
ckpt_save_weights_only = False
ckpt_mode = "auto"
checkpoint_config = {"monitor": ckpt_monitor, "verbose": ckpt_verbose, "save_best_only": ckpt_save_best_only, "mode": ckpt_mode}

# Early stop parameters 
es_min_delta = 0.0001
es_monitor = "val_loss"  # Monitor validation loss for early stopping
es_patience = 10  # The number of epochs for which lack of improvement is tolerated 
es_restore_best_weights = True  # Returns the best weights rather than the weights at the last epoch.
es_verbose = 1
early_stop_config = {"min_delta": es_min_delta, "monitor": es_monitor, "patience": es_patience, 
                     "restore_best_weights": es_restore_best_weights, "verbose": es_verbose}

# Reduce LR on Plateau Parameters
rlr_monitor = "val_loss"
rlr_factor = 0.5
rlr_patience = 3
rlr_verbose = 1
rlr_mode = "auto"
rlr_min_delta = 1e-4
rlr_min_lr=1e-10
learning_rate_config = {"monitor": rlr_monitor, "factor": rlr_factor, "patience": rlr_patience, "verbose": 
                        rlr_verbose, "mode": rlr_mode, "min_delta": rlr_min_delta, "min_lr": rlr_min_lr}



env: "WANDB_NOTEBOOK_NAME"="experiments.ipynb"


## Experiment Config

In [3]:
config = {
    "project": project,       
    "mode": mode,
    "dataset_config": dataset_config,
    "fine_tune_config": fine_tune_config,
    "training_config": training_config,
    "checkpoint_config": checkpoint_config,
    "early_stop_config": early_stop_config,
    "learning_rate_config": learning_rate_config,
    
}

## Load Data

In [4]:
# Training DataSet (10%)
train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    labels="inferred",
    color_mode="rgb",
    image_size=(224,224),
    shuffle=True,
    validation_split=0.2,
    subset='training',
    interpolation="bilinear",
    seed=123,
    batch_size=batch_size)

# Validation DataSet (10%)
val_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    labels="inferred",
    color_mode="rgb",
    image_size=(224,224),
    shuffle=True,
    validation_split=0.2,
    subset='validation',
    interpolation="bilinear",
    seed=123,
    batch_size=batch_size)

Found 276 files belonging to 2 classes.
Using 221 files for training.
Found 276 files belonging to 2 classes.
Using 55 files for validation.


In [5]:
val_ds.class_names

['benign', 'malignant']

## Callbacks

In [6]:
early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor=es_monitor, 
                                                       min_delta=es_min_delta,
                                                       patience=es_patience, 
                                                       restore_best_weights=es_restore_best_weights,
                                                       verbose=es_verbose)

reduce_lr_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor=rlr_monitor,
                                                          factor=rlr_factor,
                                                          patience=rlr_patience,
                                                          verbose=rlr_verbose,
                                                          mode=rlr_mode,
                                                          min_delta=rlr_min_delta,
                                                          min_lr=rlr_min_lr)
callbacks = [early_stop_callback, reduce_lr_callback]

## Dependencies

In [9]:
repo = ModelRepo(mode = mode, project=project)
optimizer=tf.keras.optimizers.Adam

## ShainNet 

In [10]:
network_config = ShainNetConfig(activation=activation)
factory = ShainNetFactory(config=network_config, input_shape=input_shape, output_shape=output_shape, activation=activation)
densenet = factory.create(base_model=DenseNet())


## ShainNet
Instantiate and run the project.

In [11]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33maistudio[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [12]:
def sweep(config):
    experiment = Experiment(network=densenet, config=config, repo=repo, 
                        callbacks=callbacks, metrics=metrics, 
                        optimizer=optimizer, force=force)
    return experiment.run(train_ds=train_ds, val_ds=val_ds)

In [13]:
sweep_config = {"method": "random"}
metric = {"name": "val_accuracy", "goal": "maximize"}
sweep_config["metric"] = metric

In [14]:
parameters = {"learning_rate": {"values": [1e-3, 1e-4, 1e-5]}}
sweep_config["parameters"] = parameters

In [15]:
def main():
    wandb.init(project="Breast-Cancer-Detection-Development")
    score = sweep(wandb.config)
    wandb.log({"score": score})

In [16]:
import pprint
pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'maximize', 'name': 'val_accuracy'},
 'parameters': {'learning_rate': {'values': [0.001, 0.0001, 1e-05]}}}


In [17]:
sweep_id = wandb.sweep(sweep=sweep_config, project="Breast-Cancer-Detection-Development")
wandb.agent(sweep_id, function=main, count=10)

Create sweep with ID: 9pctusqn
Sweep URL: https://wandb.ai/aistudio/Breast-Cancer-Detection-Development/sweeps/9pctusqn


[34m[1mwandb[0m: Agent Starting Run: 42ogv9fq with config:
[34m[1mwandb[0m: 	learning_rate: 1e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.021 MB of 0.021 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run 42ogv9fq errored: KeyError('project')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 42ogv9fq errored: KeyError('project')
[34m[1mwandb[0m: Agent Starting Run: spq330cr with config:
[34m[1mwandb[0m: 	learning_rate: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.021 MB of 0.021 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run spq330cr errored: KeyError('project')
[34m[1mwandb[0m: [32m[41mERROR[0m Run spq330cr errored: KeyError('project')
[34m[1mwandb[0m: Agent Starting Run: hy33naky with config:
[34m[1mwandb[0m: 	learning_rate: 1e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.021 MB of 0.021 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run hy33naky errored: KeyError('project')
[34m[1mwandb[0m: [32m[41mERROR[0m Run hy33naky errored: KeyError('project')
Detected 3 failed runs in the first 60 seconds, killing sweep.
[34m[1mwandb[0m: [32m[41mERROR[0m Detected 3 failed runs in the first 60 seconds, killing sweep.
[34m[1mwandb[0m: To disable this check set WANDB_AGENT_DISABLE_FLAPPING=true
