In [None]:
# run notebooks with functions necessary for this notebook
# please modify the path if it differs
%run model_functions.ipynb

In [None]:
import random
import cv2
import numpy as np
import sys
import pandas as pd 
import matplotlib.pyplot as plt
import tensorflow as tf
import pydicom
import tensorflow.keras

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from tqdm.notebook import tqdm

import wandb
from wandb.keras import WandbCallback


#######################################################
#                  EXPERIMENT SETTINGS                #
#                                                     #
# please modify if you want to use different settings #
#######################################################

TRAIN_IMG_PATH = "/dataset/jpg/" # path to the directory with imput images
TRAIN_DF_PATH = "dataframes/experiments.csv"
random.seed(12345)

# jpg data are preprocessed, so the training is faster, but they cannot use preprocessing techniques as dicom can
IMAGE_FORMAT = "jpg" # valid values are "jpg" or "dicom"

ARCHITECTURE = ["EfficientNetB0", "DenseNet121", "InceptionV3", "ResNet50"]
POOLING = ["avg", "max"]
BATCH_SIZE = [8, 16, 32, 64]
EPOCHS = 1
LEARNING_RATE = [0.00005, 0.00002, 0.00001, 0.000005]
DECAY_STEPS = [500, 1000, 1500]
DECAY_RATE = [0.95, 0.97, 0.99]
METRICS = [tf.keras.metrics.AUC(multi_label=True)]
OPTIMIZER = tf.keras.optimizers.Adam

NOISE = [True, False]
BALANCED = [True, False]

# jpg data are preprocessed, so this setting won't have impact on the data
# in the case you want to experiment with preprocessing, download preprocessed jpeg data with desired settings or use dicom data
WINDOW_WIDTH = [80, 200]
WINDOW_CENTER = [40, 80]
CONTEXT_3D = [True, False]
CLAHE = [True, False]

In [None]:
def train_model(model, training_generator, validation_generator, config):
    """
    Trains a CNN model and plots the process using wandb.ai.
    
    :param model: the CNN model to train
    :param training_generator: a generator of the training data
    :param validation_generator: a generator of the validation data
    :param config: a wandb.ai config
    """
    for i in range(config.epochs):
        print(f'Epoch {i+1}')
        model.fit(x=training_generator, epochs=1, callbacks=[WandbCallback()])
        loss, auc = model.evaluate(x=validation_generator, callbacks=[WandbCallback()])
        wandb.log({'val_loss': loss, 'val_auc': auc})

In [None]:
def cross_validation():
    """
    Cross-validates a CNN model.
    """
    dataframe = pd.read_csv(TRAIN_DF_PATH)
    rd_seed = 123
    n_splits = 3
    
    wandb.init(config={"epochs": EPOCHS,
                       "loss_function": "weighted_multi_label_log_loss",
                       "desc": ""
                      })
    config = wandb.config
    
    dataframe.drop(index=dataframe.loc[dataframe['ID'] == "ID_6431af929"].index, inplace=True)
    dataframe.drop(index=dataframe.loc[dataframe['ID'] == "ID_00de64f80"].index, inplace=True)
    studies = dataframe['Study'].unique()
    
    for i, (train, valid) in enumerate(KFold(n_splits=n_splits, shuffle=True, random_state = rd_seed).split(studies)):
        print(f'Cross-validation fold {i}')
        tf.keras.backend.clear_session()
        if config.balanced:
            X_train, y_train, X_valid, y_valid = get_balanced_train_valid_tuples(dataframe, studies[train], studies[valid])
        else:
            X_train, y_train, X_valid, y_valid = get_train_valid_tuples(dataframe, studies[train], studies[valid])
            
        training_generator = DataGenerator(X_train, y_train, TRAIN_IMG_PATH, dataframe, batch_size=config.batch_size, dim=(224, 224), n_classes=6, shuffle=True, window=(config.window_center, config.window_width), context3d=config.context3d, clahe=config.clahe, noise=config.noise, image_format=IMAGE_FORMAT)
        validation_generator = DataGenerator(X_valid, y_valid, TRAIN_IMG_PATH, dataframe, batch_size=config.batch_size, dim=(224, 224), n_classes=6, shuffle=True, window=(config.window_center, config.window_width), context3d=config.context3d, augment=False, clahe=config.clahe, noise=config.noise, image_format=IMAGE_FORMAT)
    
        lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=config.learning_rate, decay_steps=config.decay_steps, decay_rate=config.decay_rate)
        model = create_model(config.architecture, (224, 224, 3), config.pooling, OPTIMIZER, lr_schedule, weighted_multi_label_log_loss, METRICS)
    
        train_model(model, training_generator, validation_generator, config)

In [None]:
sweep_config = {
  'method': 'grid', 
  'metric': {
      'name': 'val_loss',
      'goal': 'minimize'
  },
  'parameters': {
      'batch_size': {
          'values': BATCH_SIZE
      },
      'learning_rate':{
          'values': LEARNING_RATE
      },
      'decay_steps':{
          'values': DECAY_STEPS
      },
      'decay_rate':{
          'values': DECAY_RATE 
      },
      'context3d':{
          'values': CONTEXT_3D
      },
      'clahe':{
          'values': CLAHE
      },
      'window_width':{
          'values': WINDOW_WIDTH
      },
      'window_center':{
          'values': WINDOW_CENTER
      },
      'pooling':{
          'values': POOLING
      },
      'architecture': {
          'values': ARCHITECTURE
      },
      'noise': {
          'values': NOISE
      },
      'balanced': {
          'values': BALANCED
      }
  }
}

sweep_id = wandb.sweep(sweep_config, project="Detection-of-defects-in-CT-images")
wandb.agent(sweep_id, function=cross_validation)