# Setup

In [None]:
!pip uninstall -y kaggle
!pip install -qq -U "kaggle" "tensorflow-model-optimization" "gcsfs" "focal-loss" "keras-swa"

Uninstalling kaggle-1.5.6:
  Successfully uninstalled kaggle-1.5.6
[K     |████████████████████████████████| 61kB 2.2MB/s 
[K     |████████████████████████████████| 174kB 9.1MB/s 
[K     |████████████████████████████████| 81kB 11.1MB/s 
[K     |████████████████████████████████| 296kB 37.3MB/s 
[?25h  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Building wheel for keras-swa (setup.py) ... [?25l[?25hdone


In [None]:
import os
assert os.environ['COLAB_TPU_ADDR'], 'Make sure to select TPU from Edit > Notebook settings > Hardware accelerator'

In [None]:
TPU_NAME = 'grpc://' + os.environ['COLAB_TPU_ADDR']
TPU_NAME

'grpc://10.30.252.170:8470'

In [None]:
import sys

if 'google.colab' in sys.modules:
    from google.colab import auth
    auth.authenticate_user()

# tensorflow/models Image Classification

In [None]:
!rm -rf models
!git clone https://github.com/tensorflow/models.git -b v2.2.0

Cloning into 'models'...
remote: Enumerating objects: 37725, done.[K
remote: Total 37725 (delta 0), reused 0 (delta 0), pack-reused 37725[K
Receiving objects: 100% (37725/37725), 522.71 MiB | 48.00 MiB/s, done.
Resolving deltas: 100% (25239/25239), done.
Note: checking out '93490036e00f37ecbe6693b9ff4ae488bb8e9270'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by performing another checkout.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -b with the checkout command again. Example:

  git checkout -b <new-branch-name>



# Pipeline

In [None]:
#@title Dataset

import sys
sys.path.append("models")
import os
from typing import Any, List, Optional, Tuple, Mapping, Union
from absl import logging
from dataclasses import dataclass
import tensorflow as tf
import tensorflow_datasets as tfds


from official.modeling.hyperparams import base_config
from official.vision.image_classification import augment
from official.vision.image_classification import preprocessing
from official.vision.image_classification import dataset_factory


# def cutmix(image, label, PROBABILITY=0.4):
#     # input image - is a batch of images of size [n,dim,dim,3] not a single image of [dim,dim,3]
#     # output - a batch of images with cutmix applied
    
#     DIM = img_size    
#     imgs = []; labs = []
    
#     for j in range(BATCH_SIZE):
        
#         #random_uniform( shape, minval=0, maxval=None)        
#         # DO CUTMIX WITH PROBABILITY DEFINED ABOVE
#         P = tf.cast(tf.random.uniform([], 0, 1) <= PROBABILITY, tf.int32)
        
#         # CHOOSE RANDOM IMAGE TO CUTMIX WITH
#         k = tf.cast(tf.random.uniform([], 0, BATCH_SIZE), tf.int32)
        
#         # CHOOSE RANDOM LOCATION
#         x = tf.cast(tf.random.uniform([], 0, DIM), tf.int32)
#         y = tf.cast(tf.random.uniform([], 0, DIM), tf.int32)
        
#         # Beta(1, 1)
#         b = tf.random.uniform([], 0, 1) # this is beta dist with alpha=1.0
        

#         WIDTH = tf.cast(DIM * tf.math.sqrt(1-b),tf.int32) * P
#         ya = tf.math.maximum(0,y-WIDTH//2)
#         yb = tf.math.minimum(DIM,y+WIDTH//2)
#         xa = tf.math.maximum(0,x-WIDTH//2)
#         xb = tf.math.minimum(DIM,x+WIDTH//2)
        
#         # MAKE CUTMIX IMAGE
#         one = image[j,ya:yb,0:xa,:]
#         two = image[k,ya:yb,xa:xb,:]
#         three = image[j,ya:yb,xb:DIM,:]        
#         #ya:yb
#         middle = tf.concat([one,two,three],axis=1)

#         img = tf.concat([image[j,0:ya,:,:],middle,image[j,yb:DIM,:,:]],axis=0)
#         imgs.append(img)
        
#         # MAKE CUTMIX LABEL
#         a = tf.cast(WIDTH*WIDTH/DIM/DIM,tf.float32)
#         lab1 = label[j,]
#         lab2 = label[k,]
#         labs.append((1-a)*lab1 + a*lab2)

#     image2 = tf.reshape(tf.stack(imgs),(BATCH_SIZE,DIM,DIM,3))
#     label2 = tf.reshape(tf.stack(labs),(BATCH_SIZE, nb_classes))
#     return image2,label2

class DatasetBuilder(dataset_factory.DatasetBuilder):
  def load_records(self) -> tf.data.Dataset:
    """Return a dataset loading files with TFRecords."""
    logging.info('Using TFRecords to load data.')

    if self.config.filenames is None:
      if self.config.data_dir is None:
        raise ValueError('Dataset must specify a path for the data files.')

      file_pattern = os.path.join(self.config.data_dir,
                                  '{}*'.format(self.config.split))
      
      if self.config.split in ['train', 'validation']:
        shuffle = True
      else:
        shuffle = False

      dataset = tf.data.Dataset.list_files(file_pattern, shuffle=shuffle)
    else:
      dataset = tf.data.Dataset.from_tensor_slices(self.config.filenames)
      if self.is_training:
        # Shuffle the input files.
        dataset.shuffle(buffer_size=self.config.file_shuffle_buffer_size)

    return dataset

  def pipeline(self,
               dataset: tf.data.Dataset,
               input_context: tf.distribute.InputContext = None
              ) -> tf.data.Dataset:
    """Build a pipeline fetching, shuffling, and preprocessing the dataset."""
    if input_context and input_context.num_input_pipelines > 1:
      dataset = dataset.shard(input_context.num_input_pipelines,
                              input_context.input_pipeline_id)

    if self.is_training and not self.config.cache:
      dataset = dataset.repeat()

    if self.config.builder == 'records':
      # Read the data from disk in parallel
      buffer_size = 8 * 1024 * 1024  # Use 8 MiB per file
      dataset = dataset.interleave(
          lambda name: tf.data.TFRecordDataset(name, buffer_size=buffer_size),
          cycle_length=16,
          num_parallel_calls=tf.data.experimental.AUTOTUNE)
    
    dataset = dataset.prefetch(self.global_batch_size)

    if self.config.cache:
      dataset = dataset.cache()

    if self.is_training:
      dataset = dataset.shuffle(self.config.shuffle_buffer_size)
      dataset = dataset.repeat()

    # Parse, pre-process, and batch the data in parallel
    if self.config.builder == 'records':
      if self.config.split in ['train', 'validation']:
        preprocess = self.parse_record
      else:
        preprocess = self.parse_test_record
    else:
      preprocess = self.preprocess

    dataset = dataset.map(preprocess,
                          num_parallel_calls=tf.data.experimental.AUTOTUNE)

    # Dataset balancing utilities
    @tf.function
    def class_func(image, label):
      return label

    @tf.function
    def drop_extra_label(extra_label, image_and_label):
      return image_and_label

    # if self.is_training:
    #   # Balance the dataset
    #   TARGET_DIST = [0.5, 0.5]
    #   INITIAL_DIST = [0.95, 0.05]

    #   resampler = tf.data.experimental.rejection_resample(
    #       class_func, 
    #       target_dist=TARGET_DIST,
    #       # seed=42,
    #       initial_dist=INITIAL_DIST
    #   )
    #   dataset = dataset.apply(resampler)

    dataset = dataset.batch(self.batch_size, drop_remainder=self.is_training)
    
    # if self.is_training:
    #   # The resampler returns creates (class, example) pairs from the output of the class_func. 
    #   # In this case, the example was already a (feature, label) pair, 
    #   # so use map to drop the extra copy of the labels
    #   dataset = dataset.map(
    #       drop_extra_label,
    #       num_parallel_calls=tf.data.experimental.AUTOTUNE
    #   )

    if self.config.split in ['test']:
      options = tf.data.Options()
      options.experimental_optimization.parallel_batch = True
      options.experimental_optimization.map_fusion = True
      # Note: Disabled map vectorization for balanced sampling
      options.experimental_optimization.map_vectorization.enabled = True
      options.experimental_optimization.map_parallelization = True
      dataset = dataset.with_options(options)
      
    elif self.is_training and self.config.deterministic_train is not None:
      options = tf.data.Options()
      # options.experimental_deterministic = self.config.deterministic_train
      options.experimental_slack = self.config.use_slack
      options.experimental_optimization.parallel_batch = True
      options.experimental_optimization.map_fusion = True
      # Note: Disabled map vectorization for balanced sampling
      options.experimental_optimization.map_vectorization.enabled = True
      options.experimental_optimization.map_parallelization = True
      dataset = dataset.with_options(options)

    # Prefetch overlaps in-feed with training
    # Note: autotune here is not recommended, as this can lead to memory leaks.
    # Instead, use a constant prefetch size like the the number of devices.
    dataset = dataset.prefetch(self.config.num_devices)

    return dataset

  @tf.function
  def parse_record(self, record: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
    """Parse an ImageNet record from a serialized string Tensor."""
    keys_to_features = {
        'image':
            tf.io.FixedLenFeature((), tf.string, ''),
        "age_approx": tf.io.FixedLenFeature([], tf.int64, -1),  
        "sex": tf.io.FixedLenFeature([], tf.int64, -1),  
        "anatom_site_general_challenge": tf.io.FixedLenFeature([], tf.int64, -1),
        'target':
            tf.io.FixedLenFeature([], tf.int64, -1)
    }
    
    parsed = tf.io.parse_single_example(record, keys_to_features)

    parsed['age_approx'] = tf.cast(parsed['age_approx'], tf.int32)
    parsed['sex'] = tf.cast(parsed['sex'], tf.int32)
    parsed['anatom_site_general_challenge'] = tf.cast(tf.one_hot(parsed['anatom_site_general_challenge'], 7), tf.int32)

    anatom = [tf.cast(parsed['anatom_site_general_challenge'][i], dtype = tf.float32) for i in range(7)]
    tab_data = [tf.cast(parsed[tfeat], dtype=tf.float32) for tfeat in ['age_approx', 'sex']]
    tabular = tf.stack(tab_data + anatom)

    # label = tf.reshape(parsed['target'], shape=[1])
    label = parsed['target']
    label = tf.cast(label, dtype=tf.int32)

    # image_bytes = tf.reshape(parsed['image'], shape=[])
    image_bytes = parsed['image']
    image, label = self.preprocess(image_bytes, label)

    # return image, label
    return {'image': image, 'metadata':  tabular}, label

  def parse_test_record(self, record: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
    """Parse an ImageNet record from a serialized string Tensor."""
    keys_to_features = {
        'image':
            tf.io.FixedLenFeature((), tf.string, ''),
        "age_approx": tf.io.FixedLenFeature([], tf.int64, -1),  
        "sex": tf.io.FixedLenFeature([], tf.int64, -1),  
        "anatom_site_general_challenge": tf.io.FixedLenFeature([], tf.int64, -1),
        'target':
            tf.io.FixedLenFeature([], tf.int64, -1),
        "image_name": 
            tf.io.FixedLenFeature((), tf.string)
    }
    
    parsed = tf.io.parse_single_example(record, keys_to_features)

    parsed['age_approx'] = tf.cast(parsed['age_approx'], tf.int32)
    parsed['sex'] = tf.cast(parsed['sex'], tf.int32)
    parsed['anatom_site_general_challenge'] = tf.cast(tf.one_hot(parsed['anatom_site_general_challenge'], 7), tf.int32)

    anatom = [tf.cast(parsed['anatom_site_general_challenge'][i], dtype = tf.float32) for i in range(7)]
    tab_data = [tf.cast(parsed[tfeat], dtype=tf.float32) for tfeat in ['age_approx', 'sex']]
    tabular = tf.stack(tab_data + anatom)

    # label = tf.reshape(parsed['target'], shape=[1])
    label = parsed['target']
    label = tf.cast(label, dtype=tf.int32)

    # image_bytes = tf.reshape(parsed['image'], shape=[])
    image_bytes = parsed['image']
    image, _ = self.preprocess(image_bytes, label)

    image_name = parsed['image_name']

    # return image, image_name
    return {'image': image, 'metadata':  tabular}, image_name

  def preprocess(self, image: tf.Tensor, label: tf.Tensor
                ) -> Tuple[tf.Tensor, tf.Tensor]:
    """Apply image preprocessing and augmentation to the image and label."""
    if self.is_training:
      image = preprocessing.preprocess_for_train(
          image,
          image_size=self.image_size,
          mean_subtract=self.config.mean_subtract,
          standardize=self.config.standardize,
          dtype=self.dtype,
          augmenter=self.augmenter)
    else:
      image = preprocessing.preprocess_for_eval(
          image,
          image_size=self.image_size,
          num_channels=self.num_channels,
          mean_subtract=self.config.mean_subtract,
          standardize=self.config.standardize,
          dtype=self.dtype)

    label = tf.cast(label, tf.int32)
    if self.config.one_hot:
      label = tf.one_hot(label, self.num_classes)
      label = tf.reshape(label, [self.num_classes])

    return image, label


dataset_factory.DatasetBuilder = DatasetBuilder

In [None]:
#@title EfficientNet Model
import math
import os
from typing import Any, Dict, Optional, Text, Tuple

from absl import logging
import tensorflow as tf

from official.modeling import tf_utils
from official.vision.image_classification.efficientnet import efficientnet_model
from official.vision.image_classification.efficientnet.common_modules import TpuBatchNormalization


class ModelConfig(efficientnet_model.ModelConfig):
  num_classes: int = 2


class EfficientNet(tf.keras.Model):
  """Wrapper class for an EfficientNet Keras model.
  Contains helper methods to build, manage, and save metadata about the model.
  """
  def __init__(self,
               config: ModelConfig = None,
               overrides: Dict[Text, Any] = None):
    """Create an EfficientNet model.
    Args:
      config: (optional) the main model parameters to create the model
      overrides: (optional) a dict containing keys that can override
                 config
    """
    overrides = overrides or {}
    config = config or ModelConfig()

    self.config = config.replace(**overrides)

    input_channels = self.config.input_channels
    model_name = self.config.model_name
    input_shape = (None, None, input_channels)  # Should handle any size image

    image_input = tf.keras.layers.Input(shape=input_shape, name='image')

    output = efficientnet_model.efficientnet(image_input, self.config)

    logging.info('Building model %s with params %s',
                 model_name,
                 self.config)

    super(EfficientNet, self).__init__(
        inputs=image_input, 
        outputs=output, 
        name=model_name)

  @classmethod
  def from_name(cls,
                model_name: Text,
                model_weights_path: Text = None,
                copy_to_local: bool = False,
                overrides: Dict[Text, Any] = None):
    """Construct an EfficientNet model from a predefined model name"""
    model_configs = dict(efficientnet_model.MODEL_CONFIGS)
    overrides = dict(overrides) if overrides else {}

    # One can define their own custom models if necessary
    model_configs.update(overrides.pop('model_config', {}))

    if model_name not in model_configs:
      raise ValueError('Unknown model name {}'.format(model_name))

    config = model_configs[model_name]

    model = cls(config=config, overrides=overrides)

    # Pop the classification layer
    output = model.layers[-4].output
    model = tf.keras.Model(model.inputs, output)
    # print(model.summary())
    if model_weights_path:
      if copy_to_local:
        tmp_file = os.path.join('/tmp', model_name + '.h5')
        model_weights_file = os.path.join(model_weights_path, 'model.h5')
        tf.io.gfile.copy(model_weights_file, tmp_file, overwrite=True)
        model_weights_path = tmp_file

      loaded_model = tf.keras.models.load_model(model_weights_path, compile=False)
      loaded_model = tf.keras.Model(loaded_model.inputs, loaded_model.output)
      # print(loaded_model.summary())
      model.set_weights(loaded_model.get_weights())

    # Image input 
    image_input = model.input

    # Get the bias initializer
    initial_bias = -2.3498501
    activation = tf_utils.get_activation(config.activation)
    # activation = 'relu'
    bn_axis = 1 if config.data_format == 'channels_first' else -1

    # Cast to float32 in case we have a different model dtype
    # feature_vector_output = tf.cast(model.output, tf.float32)
    feature_vector_output = model.output
    # feature_vector_output = tf.keras.layers.GlobalAveragePooling2D()(feature_vector_output)

    # Metadata inputs
    metadata_input = tf.keras.layers.Input(shape=(9,), name='metadata')
    metadata = tf.keras.layers.Dense(
              64,
              kernel_initializer=efficientnet_model.DENSE_KERNEL_INITIALIZER,
              bias_initializer=tf.keras.initializers.Constant(initial_bias))(metadata_input)
    metadata = TpuBatchNormalization(
                  axis=bn_axis,
                  momentum=config.bn_momentum,
                  epsilon=config.bn_epsilon)(metadata)
    metadata = tf.keras.layers.Activation(activation)(metadata)

    # Concatenate features
    x = tf.keras.layers.concatenate([feature_vector_output, metadata])
  
    x = tf.keras.layers.Dense(
          512,
          kernel_initializer=efficientnet_model.DENSE_KERNEL_INITIALIZER,
          bias_initializer=tf.keras.initializers.Constant(initial_bias)
          )(x)
    x = tf.keras.layers.Activation(activation)(x)
    x = TpuBatchNormalization(
                  axis=bn_axis,
                  momentum=config.bn_momentum,
                  epsilon=config.bn_epsilon)(x)
    x = tf.keras.layers.Dropout(config.dropout_rate)(x)

    x = tf.keras.layers.Dense(
          192,
          kernel_initializer=efficientnet_model.DENSE_KERNEL_INITIALIZER,
          bias_initializer=tf.keras.initializers.Constant(initial_bias)
          )(x)
    x = tf.keras.layers.Activation(activation)(x)
    x = TpuBatchNormalization(
                  axis=bn_axis,
                  momentum=config.bn_momentum,
                  epsilon=config.bn_epsilon)(x)
    x = tf.keras.layers.Dropout(config.dropout_rate)(x)

    x = tf.keras.layers.Dense(
      1,
      kernel_initializer=efficientnet_model.DENSE_KERNEL_INITIALIZER,
      bias_initializer=tf.keras.initializers.Constant(initial_bias),
      # kernel_regularizer=tf.keras.regularizers.l2(config.weight_decay),
      # bias_regularizer=tf.keras.regularizers.l2(config.weight_decay),
      name='logits',
      # dtype='float32'
      )(x)

    x = tf.keras.layers.Activation('sigmoid', name='probs', dtype='float32')(x)

    all_inputs = [
        image_input,
        metadata_input
    ]

    model = tf.keras.Model(inputs=all_inputs, outputs=x)
    return model

efficientnet_model.EfficientNet = EfficientNet
efficientnet_model.ModelConfig = ModelConfig


In [None]:
#@title Callbacks
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function

import os
from absl import logging

import tensorflow as tf
import tensorflow.keras.backend as K
from typing import Any, List, MutableMapping
from sklearn.metrics import roc_auc_score

from official.utils.misc import keras_utils
from official.vision.image_classification import callbacks

import tensorflow as tf
from typeguard import typechecked
from tensorflow_addons.optimizers.average_wrapper import AveragedOptimizerWrapper



def get_callbacks(validation_dataset,
                  num_validation_samples,
                  model_checkpoint: bool = True,
                  include_tensorboard: bool = True,
                  time_history: bool = True,
                  reduce_lr: bool = True,
                  track_lr: bool = True,
                  write_model_weights: bool = True,
                  initial_step: int = 0,
                  batch_size: int = 0,
                  log_steps: int = 0,
                  model_dir: str = None) -> List[tf.keras.callbacks.Callback]:
  """Get all callbacks."""
  model_dir = model_dir or ''
  callbacks = []

  if model_checkpoint:
    # ckpt_full_path = os.path.join(model_dir, 'model.ckpt-{epoch:04d}')
    ckpt_full_path = os.path.join(model_dir, 'model.ckpt')
    callbacks.append(
        ModelCheckpoint(
            validation_dataset,
            num_validation_samples,
            ckpt_full_path, 
            monitor='val_auc',
            mode='max',
            # save_freq=250,
            save_best_only=True,
            save_weights_only=True, verbose=1))
  if include_tensorboard:
    callbacks.append(
        CustomTensorBoard(
            log_dir=model_dir,
            track_lr=track_lr,
            initial_step=initial_step,
            write_images=write_model_weights))
  
  if reduce_lr:
    callbacks.append(
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_auc', 
            factor=0.6, 
            patience=3, verbose=1, mode='max',
            min_delta=0.0001, min_lr=1e-8))
    
  # callbacks.append(
  #     AverageModelCheckpoint(
  #         filepath=os.path.join(model_dir, 'best_model/model.ckpt'), 
  #         save_weights_only=True,
  #         update_weights=True))

  return callbacks


class ModelCheckpoint(tf.keras.callbacks.ModelCheckpoint):
    def __init__(self, validation_dataset, num_validation_samples, *args, **kwargs):
        super(ModelCheckpoint, self).__init__(*args, **kwargs)
        self.validation_dataset = validation_dataset
        labels_dataset = self.validation_dataset.map(lambda image, label: label).unbatch()
        NUM_VALIDATION_IMAGES = num_validation_samples
        self.y_true = next(iter(labels_dataset.batch(NUM_VALIDATION_IMAGES))).numpy()
        
    def _save_model(self, epoch, logs):
        y_pred = self.model.predict(self.validation_dataset, verbose=0)
        # y_pred = K.sigmoid(y_pred)
        current = roc_auc_score(self.y_true, y_pred)
        # print("ROC-AUC - epoch: {:d} - score: {:.6f}\n".format(epoch+1, score))

        if isinstance(self.save_freq,
                      int) or self.epochs_since_last_save >= self.period:
          self.epochs_since_last_save = 0
          filepath = self._get_file_path(epoch, logs)

          try:
            if self.save_best_only:
              if current is None:
                logging.warning('Can save best model only with %s available, '
                                'skipping.', self.monitor)
              else:
                if self.monitor_op(current, self.best):
                  if self.verbose > 0:
                    print('\nEpoch %05d: %s improved from %0.5f to %0.5f,'
                          ' saving model to %s' % (epoch + 1, self.monitor,
                                                  self.best, current, filepath))
                  self.best = current
                  if self.save_weights_only:
                    self.model.save_weights(filepath, overwrite=True)
                  else:
                    self.model.save(filepath, overwrite=True)
                else:
                  if self.verbose > 0:
                    print('\nEpoch %05d: %s did not improve from %0.5f (val_auc = %0.5f)' %
                          (epoch + 1, self.monitor, self.best, current))
            else:
              if self.verbose > 0:
                print('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath))
              if self.save_weights_only:
                self.model.save_weights(filepath, overwrite=True)
              else:
                self.model.save(filepath, overwrite=True)

            self._maybe_remove_file()
          except IOError as e:
            # `e.errno` appears to be `None` so checking the content of `e.args[0]`.
            if 'is a directory' in six.ensure_str(e.args[0]):
              raise IOError('Please specify a non-directory filepath for '
                            'ModelCheckpoint. Filepath used is an existing '
                            'directory: {}'.format(filepath))
              
class AverageModelCheckpoint(ModelCheckpoint):
    r"""The callback that should be used with optimizers that extend
    AverageWrapper, i.e., MovingAverage and StochasticAverage optimizers.
    It saves and, optionally, assigns the averaged weights.
    Args:
        update_weights: If True, assign the moving average weights
            to the model, and save them. If False, keep the old
            non-averaged weights, but the saved model uses the
            average weights.
        See `tf.keras.callbacks.ModelCheckpoint` for the other args.
    """

    @typechecked
    def __init__(
        self,
        update_weights: bool,
        filepath: str,
        monitor: str = "val_loss",
        verbose: int = 0,
        save_best_only: bool = False,
        save_weights_only: bool = False,
        mode: str = "auto",
        save_freq: str = "epoch",
        **kwargs
    ):
        self.update_weights = update_weights
        super().__init__(
            filepath,
            monitor,
            verbose,
            save_best_only,
            save_weights_only,
            mode,
            save_freq,
            **kwargs,
        )

    def set_model(self, model):
        if not isinstance(model.optimizer, AveragedOptimizerWrapper):
            raise TypeError(
                "AverageModelCheckpoint is only used when training"
                "with MovingAverage or StochasticAverage"
            )
        return super().set_model(model)

    def _save_model(self, epoch, logs):
        assert isinstance(self.model.optimizer, AveragedOptimizerWrapper)

        if self.update_weights:
            self.model.optimizer.assign_average_vars(self.model.variables)
            return super()._save_model(epoch, logs)
        else:
            # Note: `model.get_weights()` gives us the weights (non-ref)
            # whereas `model.variables` returns references to the variables.
            non_avg_weights = self.model.get_weights()
            self.model.optimizer.assign_average_vars(self.model.variables)
            # result is currently None, since `super._save_model` doesn't
            # return anything, but this may change in the future.
            result = super()._save_model(epoch, logs)
            self.model.set_weights(non_avg_weights)
            return result
              
callbacks.get_callbacks = get_callbacks

In [None]:
#@title Classifier Trainer
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

import pprint
from typing import Any, Tuple, Text, Optional, Mapping

from absl import app
from absl import flags
from absl import logging
import tensorflow as tf
import tensorflow_addons as tfa
from focal_loss import BinaryFocalLoss

from official.vision.image_classification.classifier_trainer import *
from official.vision.image_classification.classifier_trainer import (
    _get_params_from_flags, _get_dataset_builders
)

from official.vision.image_classification import classifier_trainer

import tensorflow.keras.backend as K


class AUC(tf.keras.metrics.AUC):
  def update_state(self, y_true, y_pred, sample_weight=None):
    y_pred = K.sigmoid(y_pred)
    return super(AUC, self).update_state(y_true, y_pred, sample_weight)


class ROCAUCLoss(tf.keras.losses.Loss):
    """ ROC AUC Score.
    Approximates the Area Under Curve score, using approximation based on
    the Wilcoxon-Mann-Whitney U statistic.
    Yan, L., Dodier, R., Mozer, M. C., & Wolniewicz, R. (2003).
    Optimizing Classifier Performance via an Approximation to the Wilcoxon-Mann-Whitney Statistic.
    Measures overall performance for a full range of threshold levels.
    Arguments:
        y_pred: `Tensor`. Predicted values.
        y_true: `Tensor` . Targets (labels), a probability distribution.
    """
    def call(self, y_true, y_pred):
        pos = tf.boolean_mask(y_pred, tf.cast(y_true, tf.bool))
        neg = tf.boolean_mask(y_pred, ~tf.cast(y_true, tf.bool))
        pos = tf.expand_dims(pos, 0)
        neg = tf.expand_dims(neg, 1)
        # original paper suggests performance is robust to exact parameter choice
        # gamma = 0.2
        # p     = 3
        gamma = 0.5
        p     = 2
        difference = tf.zeros_like(pos * neg) + pos - neg - gamma
        masked = tf.boolean_mask(difference, difference < 0.0)
        return tf.reduce_sum(tf.pow(-masked, p))

class SurrogateLoss(tf.keras.losses.Loss):
    def call(self, y_true, y_pred):
        weights = 1.0
        surrogate_type = 'xent'
        labels = y_true
        logits = y_pred

        # Convert inputs to tensors and standardize dtypes.
        labels, logits, weights, original_shape = self._prepare_labels_logits_weights(
            labels, logits, weights)

        # Create tensors of pairwise differences for logits and labels, and
        # pairwise products of weights. These have shape
        # [batch_size, batch_size, num_labels].
        logits_difference = tf.expand_dims(logits, 0) - tf.expand_dims(logits, 1)
        labels_difference = tf.expand_dims(labels, 0) - tf.expand_dims(labels, 1)
        weights_product = tf.expand_dims(weights, 0) * tf.expand_dims(weights, 1)

        signed_logits_difference = labels_difference * logits_difference
        raw_loss = self.weighted_surrogate_loss(
            labels=tf.ones_like(signed_logits_difference),
            logits=signed_logits_difference,
            surrogate_type=surrogate_type)
        weighted_loss = weights_product * raw_loss

        # Zero out entries of the loss where labels_difference zero (so loss is only
        # computed on pairs with different labels).
        loss = tf.math.reduce_mean(tf.math.abs(labels_difference) * weighted_loss, 0) * 0.5
        loss = tf.reshape(loss, original_shape)
        loss = tf.reduce_sum(loss[0])
        return loss

    def weighted_sigmoid_cross_entropy_with_logits(self, labels,
                                                  logits,
                                                  positive_weights=1.0,
                                                  negative_weights=1.0,
                                                  name=None):
        labels, logits, positive_weights, negative_weights = self.prepare_loss_args(
            labels, logits, positive_weights, negative_weights)

        softplus_term = tf.math.add(tf.math.maximum(-logits, 0.0),
                              tf.math.log(1.0 + tf.math.exp(-tf.math.abs(logits))))
        weight_dependent_factor = (
            negative_weights + (positive_weights - negative_weights) * labels)
        return (negative_weights * (logits - labels * logits) +
                weight_dependent_factor * softplus_term)
          
    def weighted_surrogate_loss(self, labels,
                                logits,
                                surrogate_type='xent',
                                positive_weights=1.0,
                                negative_weights=1.0,
                                name=None):
        if surrogate_type == 'xent':
          return self.weighted_sigmoid_cross_entropy_with_logits(
              logits=logits,
              labels=labels,
              positive_weights=positive_weights,
              negative_weights=negative_weights,
              name=name)
        raise ValueError('surrogate_type %s not supported.' % surrogate_type)

    def _prepare_labels_logits_weights(self, labels, logits, weights):
        # Convert `labels` and `logits` to Tensors and standardize dtypes.
        logits = tf.convert_to_tensor(logits, name='logits')
        labels = self.convert_and_cast(labels, 'labels', logits.dtype)
        weights = self.convert_and_cast(weights, 'weights', logits.dtype)

        try:
          labels.get_shape().merge_with(logits.get_shape())
        except ValueError:
          raise ValueError('logits and labels must have the same shape (%s vs %s)' %
                          (logits.get_shape(), labels.get_shape()))

        original_shape = labels.get_shape().as_list()
        if labels.get_shape().ndims > 0:
          original_shape[0] = -1
        if labels.get_shape().ndims <= 1:
          labels = tf.reshape(labels, [-1, 1])
          logits = tf.reshape(logits, [-1, 1])

        if weights.get_shape().ndims == 1:
          # Weights has shape [batch_size]. Reshape to [batch_size, 1].
          weights = tf.reshape(weights, [-1, 1])
        if weights.get_shape().ndims == 0:
          # Weights is a scalar. Change shape of weights to match logits.
          weights *= tf.ones_like(logits)

        return labels, logits, weights, original_shape

    def convert_and_cast(self, value, name, dtype):
        return tf.cast(tf.convert_to_tensor(value), name=name, dtype=dtype)

    def prepare_loss_args(self, labels, logits, positive_weights, negative_weights):
        logits = tf.convert_to_tensor(logits, name='logits')
        labels = self.convert_and_cast(labels, 'labels', logits.dtype)
        if len(labels.get_shape()) == 2 and len(logits.get_shape()) == 3:
          labels = tf.expand_dims(labels, [2])

        positive_weights = self.convert_and_cast(positive_weights, 'positive_weights', 
                                            logits.dtype)
        positive_weights = self.expand_outer(positive_weights, logits.get_shape().ndims)
        negative_weights = self.convert_and_cast(negative_weights, 'negative_weights', 
                                            logits.dtype)
        negative_weights = self.expand_outer(negative_weights, logits.get_shape().ndims)
        return labels, logits, positive_weights, negative_weights

    def expand_outer(self, tensor, rank):
        if tensor.get_shape().ndims is None:
          raise ValueError('tensor dimension must be known.')
        if len(tensor.get_shape()) > rank:
          raise ValueError(
              '`rank` must be at least the current tensor dimension: (%s vs %s).' %
              (rank, len(tensor.get_shape())))
        while len(tensor.get_shape()) < rank:
          tensor = tf.expand_dims(tensor, 0)
        return tensor

def _get_metrics(one_hot: bool) -> Mapping[Text, Any]:
  """Get a dict of available metrics to track."""
  if one_hot:
    return {
        'auc': tf.keras.metrics.AUC(name='auc'),
        # 'auc': AUC(name='auc'),
    }
  else:
    return {
        'auc': tf.keras.metrics.AUC(name='auc'),
        # 'auc': AUC(name='auc'),
    }

def train_and_eval(
    params: base_configs.ExperimentConfig,
    strategy_override: tf.distribute.Strategy) -> Mapping[str, Any]:
  """Runs the train and eval path using compile/fit."""
  logging.info('Running train and eval.')

  # Note: for TPUs, strategy and scope should be created before the dataset
  strategy = strategy_override or distribution_utils.get_distribution_strategy(
      distribution_strategy=params.runtime.distribution_strategy,
      all_reduce_alg=params.runtime.all_reduce_alg,
      num_gpus=params.runtime.num_gpus,
      tpu_address=params.runtime.tpu)

  strategy_scope = distribution_utils.get_strategy_scope(strategy)

  logging.info('Detected %d devices.',
               strategy.num_replicas_in_sync if strategy else 1)

  label_smoothing = params.model.loss.label_smoothing
  one_hot = label_smoothing and label_smoothing > 0

  builders = _get_dataset_builders(params, strategy, one_hot)
  datasets = [builder.build() if builder else None for builder in builders]

  # Unpack datasets and builders based on train/val/test splits
  train_builder, validation_builder = builders  # pylint: disable=unbalanced-tuple-unpacking
  train_dataset, validation_dataset = datasets

  train_epochs = params.train.epochs
  train_steps = params.train.steps or train_builder.num_steps
  validation_steps = params.evaluation.steps or validation_builder.num_steps

  initialize(params, train_builder)

  logging.info('Global batch size: %d', train_builder.global_batch_size)

  with strategy_scope:
    model_params = params.model.model_params.as_dict()
    model = get_models()[params.model.name](**model_params)
    # model = get_models()[params.model.name](**model_params)
    learning_rate = optimizer_factory.build_learning_rate(
        params=params.model.learning_rate,
        batch_size=train_builder.global_batch_size,
        train_steps=train_steps)
    optimizer = optimizer_factory.build_optimizer(
        optimizer_name=params.model.optimizer.name,
        base_learning_rate=learning_rate,
        params=params.model.optimizer.as_dict())

    metrics_map = _get_metrics(one_hot)
    metrics = [metrics_map[metric] for metric in params.train.metrics]

    # if one_hot:
    #   loss_obj = losses.CategoricalCrossentropy(
    #       label_smoothing=params.model.loss.label_smoothing)
    # else:
    #   loss_obj = losses.SparseCategoricalCrossentropy()

    # if one_hot:
    #   loss_obj = losses.BinaryCrossentropy(
    #       label_smoothing=params.model.loss.label_smoothing)
    # else:
    #   loss_obj = losses.BinaryCrossentropy()

    loss_obj = BinaryFocalLoss(
          pos_weight=0.8,
          gamma=2,
          label_smoothing=params.model.loss.label_smoothing
    )

    # loss_obj = tfr.keras.losses.get(
    #     tfr.losses.RankingLossKey.SIGMOID_CROSS_ENTROPY_LOSS)

    # loss_obj = SurrogateLoss()
    # loss_obj = losses.RankBoostLoss()
    # loss_obj = losses.ROCAUCLoss()

    model.compile(optimizer=optimizer,
                  loss=loss_obj,
                  metrics=metrics)
    
    initial_epoch = 0
    if params.train.resume_checkpoint:
      initial_epoch = resume_from_checkpoint(model=model,
                                             model_dir=params.model_dir,
                                             train_steps=train_steps)

  serialize_config(params=params, model_dir=params.model_dir)

  callbacks = custom_callbacks.get_callbacks(
      validation_dataset,
      params.validation_dataset.num_examples,
      model_checkpoint=params.train.callbacks.enable_checkpoint_and_export,
      include_tensorboard=params.train.callbacks.enable_tensorboard,
      time_history=params.train.callbacks.enable_time_history,
      track_lr=params.train.tensorboard.track_lr,
      write_model_weights=params.train.tensorboard.write_model_weights,
      initial_step=initial_epoch * train_steps,
      batch_size=train_builder.global_batch_size,
      log_steps=params.train.time_history.log_steps,
      model_dir=params.model_dir)

  if params.evaluation.skip_eval:
    validation_kwargs = {}
  else:
    validation_kwargs = {
        'validation_data': validation_dataset,
        'validation_steps': validation_steps,
        'validation_freq': params.evaluation.epochs_between_evals,
    }

  history = model.fit(
      train_dataset,
      epochs=train_epochs,
      steps_per_epoch=train_steps,
      initial_epoch=initial_epoch,
      callbacks=callbacks,
      **validation_kwargs)

  validation_output = None
  if not params.evaluation.skip_eval:
    validation_output = model.evaluate(
        validation_dataset, steps=validation_steps, verbose=2)

  stats = common.build_stats(history,
                             validation_output,
                             callbacks)
  return stats

classifier_trainer.train_and_eval = train_and_eval
classifier_trainer._get_metrics = _get_metrics

In [None]:
#@title Optimizers

"""Optimizer factory for vision tasks."""
import tensorflow as tf
import tensorflow_addons as tfa

from typing import Any, Dict, Text
from official.vision.image_classification import optimizer_factory
from official.vision.image_classification.configs import base_configs
from official.vision.image_classification import learning_rate

def build_optimizer(
    optimizer_name: Text,
    base_learning_rate: tf.keras.optimizers.schedules.LearningRateSchedule,
    params: Dict[Text, Any]):
  optimizer_name = optimizer_name.lower()
  logging.info('Building %s optimizer with params %s', optimizer_name, params)

  if optimizer_name == 'sgd':
    logging.info('Using SGD optimizer')
    nesterov = params.get('nesterov', False)
    optimizer = tf.keras.optimizers.SGD(learning_rate=base_learning_rate,
                                        nesterov=nesterov)
  elif optimizer_name == 'momentum':
    logging.info('Using momentum optimizer')
    nesterov = params.get('nesterov', False)
    optimizer = tf.keras.optimizers.SGD(learning_rate=base_learning_rate,
                                        momentum=params['momentum'],
                                        nesterov=nesterov)
  elif optimizer_name == 'rmsprop':
    logging.info('Using RMSProp')
    rho = params.get('decay', None) or params.get('rho', 0.9)
    momentum = params.get('momentum', 0.9)
    epsilon = params.get('epsilon', 1e-07)
    optimizer = tf.keras.optimizers.RMSprop(learning_rate=base_learning_rate,
                                            rho=rho,
                                            momentum=momentum,
                                            epsilon=epsilon)
  elif optimizer_name == 'adam':
    logging.info('Using Adam')
    beta_1 = params.get('beta_1', 0.9)
    beta_2 = params.get('beta_2', 0.999)
    epsilon = params.get('epsilon', 1e-07)
    optimizer = tf.keras.optimizers.Adam(learning_rate=base_learning_rate,
                                         beta_1=beta_1,
                                         beta_2=beta_2,
                                         epsilon=epsilon)
  elif optimizer_name == 'nadam':
    logging.info('Using Nadam')
    beta_1 = params.get('beta_1', 0.9)
    beta_2 = params.get('beta_2', 0.999)
    epsilon = params.get('epsilon', 1e-07)
    optimizer = tf.keras.optimizers.Nadam(learning_rate=base_learning_rate,
                                         beta_1=beta_1,
                                         beta_2=beta_2,
                                         epsilon=epsilon)
  elif optimizer_name == 'adamw':
    logging.info('Using AdamW')
    weight_decay = params.get('weight_decay', 0.01)
    beta_1 = params.get('beta_1', 0.9)
    beta_2 = params.get('beta_2', 0.999)
    epsilon = params.get('epsilon', 1e-07)
    optimizer = tfa.optimizers.AdamW(weight_decay=weight_decay,
                                     learning_rate=base_learning_rate,
                                     beta_1=beta_1,
                                     beta_2=beta_2,
                                     epsilon=epsilon)
  elif optimizer_name == 'lamb':
    logging.info('Using LAMB')
    weight_decay = params.get('weight_decay', 0.0)
    beta_1 = params.get('beta_1', 0.9)
    beta_2 = params.get('beta_2', 0.999)
    epsilon = params.get('epsilon', 1e-06)
    optimizer = tfa.optimizers.LAMB(weight_decay_rate=weight_decay,
                                     learning_rate=base_learning_rate,
                                     beta_1=beta_1,
                                     beta_2=beta_2,
                                     epsilon=epsilon)
  elif optimizer_name == 'radam':
    logging.info('Using RAdam')
    weight_decay = params.get('weight_decay', 0.0)
    beta_1 = params.get('beta_1', 0.9)
    beta_2 = params.get('beta_2', 0.999)
    epsilon = params.get('epsilon', 1e-07)
    optimizer = tfa.optimizers.RectifiedAdam(weight_decay=weight_decay,
                                     learning_rate=base_learning_rate,
                                     beta_1=beta_1,
                                     beta_2=beta_2,
                                     epsilon=epsilon)
  else:
    raise ValueError('Unknown optimizer %s' % optimizer_name)

  moving_average_decay = params.get('moving_average_decay', 0.)
  if moving_average_decay is not None and moving_average_decay > 0.:
    logging.info('Including moving average decay.')
    optimizer = tfa.optimizers.MovingAverage(
        optimizer,
        average_decay=params['moving_average_decay'],
        num_updates=None)
  if params.get('lookahead', None):
    logging.info('Using lookahead optimizer.')
    optimizer = tfa.optimizers.Lookahead(optimizer)
  elif params.get('swa', None):
    logging.info('Using SWA optimizer.')
    optimizer = tfa.optimizers.SWA(optimizer)

  return optimizer

def build_learning_rate(params: base_configs.LearningRateConfig,
                        batch_size: int = None,
                        train_steps: int = None):
  """Build the learning rate given the provided configuration."""
  decay_type = params.name
  base_lr = params.initial_lr
  decay_rate = params.decay_rate
  if params.decay_epochs is not None:
    decay_steps = params.decay_epochs * train_steps
  else:
    decay_steps = 0
  if params.warmup_epochs is not None:
    warmup_steps = params.warmup_epochs * train_steps
  else:
    warmup_steps = 0

  lr_multiplier = params.scale_by_batch_size

  if lr_multiplier and lr_multiplier > 0:
    # Scale the learning rate based on the batch size and a multiplier
    base_lr *= lr_multiplier * batch_size
    logging.info('Scaling the learning rate based on the batch size '
                 'multiplier. New base_lr: %f', base_lr)
    
  if decay_type == 'none':
    logging.info('No decay schedule')    
    lr = base_lr
  elif decay_type == 'exponential':
    logging.info('Using exponential learning rate with: '
                 'initial_learning_rate: %f, decay_steps: %d, '
                 'decay_rate: %f', base_lr, decay_steps, decay_rate)
    lr = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=base_lr,
        decay_steps=decay_steps,
        decay_rate=decay_rate)
  elif decay_type == 'piecewise_constant_with_warmup':
    logging.info('Using Piecewise constant decay with warmup. '
                 'Parameters: batch_size: %d, epoch_size: %d, '
                 'warmup_epochs: %d, boundaries: %s, multipliers: %s',
                 batch_size, params.examples_per_epoch,
                 params.warmup_epochs, params.boundaries,
                 params.multipliers)
    lr = learning_rate.PiecewiseConstantDecayWithWarmup(
        batch_size=batch_size,
        epoch_size=params.examples_per_epoch,
        warmup_epochs=params.warmup_epochs,
        boundaries=params.boundaries,
        multipliers=params.multipliers)
  elif decay_type == 'cosine_with_warmup':
    logging.info('Using Cosine Decay with Warmup')    
    lr = CosineDecayWithWarmup(
          batch_size=batch_size,
          total_steps=train_epochs * train_steps,
          warmup_steps=warmup_steps)
  if warmup_steps > 0:
    if decay_type != 'piecewise_constant_with_warmup':
      logging.info('Applying %d warmup steps to the learning rate',
                   warmup_steps)
      lr = learning_rate.WarmupDecaySchedule(lr, warmup_steps)
  return lr


class CosineDecayWithWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
  """Class to generate learning rate tensor."""

  def __init__(self, batch_size: int, total_steps: int, warmup_steps: int):
    """Creates the consine learning rate tensor with linear warmup.
    Args:
      batch_size: The training batch size used in the experiment.
      total_steps: Total training steps.
      warmup_steps: Steps for the warm up period.
    """
    super(CosineDecayWithWarmup, self).__init__()
    base_lr_batch_size = 256
    self._total_steps = total_steps
    self._init_learning_rate = learning_rate.BASE_LEARNING_RATE * batch_size / base_lr_batch_size
    self._warmup_steps = warmup_steps

  def __call__(self, global_step: int):
    global_step = tf.cast(global_step, dtype=tf.float32)
    warmup_steps = self._warmup_steps
    init_lr = self._init_learning_rate
    total_steps = self._total_steps

    linear_warmup = global_step / warmup_steps * init_lr

    cosine_learning_rate = init_lr * (tf.cos(np.pi *
                                             (global_step - warmup_steps) /
                                             (total_steps - warmup_steps)) +
                                      1.0) / 2.0

    learning_rate = tf.where(global_step < warmup_steps, linear_warmup,
                             cosine_learning_rate)
    return learning_rate

  def get_config(self):
    return {
        "total_steps": self._total_steps,
        "warmup_learning_rate": self._warmup_learning_rate,
        "warmup_steps": self._warmup_steps,
        "init_learning_rate": self._init_learning_rate,
    }


optimizer_factory.build_optimizer = build_optimizer
optimizer_factory.build_learning_rate = build_learning_rate

# Train

In [None]:
from absl import app
from absl import flags

In [None]:
define_classifier_flags()

In [None]:
%%writefile config.yaml

# Training configuration for EfficientNet 
runtime:
  model_dir: null
  mode: 'train_and_eval'
  distribution_strategy: 'tpu'
  run_eagerly: False
  enable_xla: True
train_dataset:
  name: 'imagenet2012'
  data_dir: null
  builder: 'records'
  split: 'train'
  one_hot: False

  # Normalization params
  # mean_subtract: True
  # standardize: True

  num_classes: 2
  # num_examples: 26488
  num_examples: 46648
  image_size: 512
  batch_size: 32
  use_per_replica_batch_size: True
  dtype: 'bfloat16'
  augmenter:
    name: 'autoaugment'
validation_dataset:
  name: 'imagenet2012'
  data_dir: null
  builder: 'records'
  split: 'validation'
  cache: True
  one_hot: False

  # Normalization params
  # mean_subtract: True
  # standardize: True

  num_classes: 2
  # num_examples: 6638
  num_examples: 11809
  image_size: 512
  batch_size: 64
  use_per_replica_batch_size: True
  dtype: 'bfloat16'
model:
  model_params:
    model_name: 'efficientnet-b3'
    model_weights_path: 'gs://recursion-kaggle/melanoma/efficientnet_b3_feature-vector'
    overrides:
      num_classes: 2
      batch_norm: 'tpu'
      dtype: 'bfloat16'
      dropout_rate: 0.3
  optimizer:
    # # RMSProp
    # name: 'rmsprop'
    # momentum: 0.9
    # decay: 0.9

    # Adam
    name: 'adam'
    beta_1: 0.9
    beta_2: 0.999
    epsilon: 0.0000001
    moving_average_decay: 0.0
    # decay: 0.9
    lookahead: False
    swa: False
    # # SGD
    # name: 'sgd'
    # momentum: 0.9
    # # decay: 0.9
    # nesterov: True

  learning_rate:
    # initial_lr: 0.0003
    name: 'cosine_with_warmup'
    # warmup_epochs: 12
    initial_lr: 0.0001
    name: 'none'
    warmup_epochs: 0

  loss:
    label_smoothing: 0.0
  num_classes: 2
train:
  resume_checkpoint: False
  epochs: 25
  metrics: ['auc']
  callbacks:
    enable_checkpoint_and_export: True
    enable_tensorboard: False
evaluation:
  epochs_between_evals: 1

Overwriting config.yaml


In [None]:
import random 
import numpy as np

def seed_everything(seed=42):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    # os.environ['TF_DETERMINISTIC_OPS'] = '1'
    # os.environ['TF_KERAS'] = '1'
    random.seed(seed)
    
seed_everything(42)

In [None]:
#@title Train
logging.set_verbosity(logging.INFO)

if __name__ == '__main__':
  if '-f' in sys.argv:
    sys.argv.remove('-f')
  flags.FLAGS.mode = 'train_and_eval' 
  flags.FLAGS.model_type = 'efficientnet' 
  flags.FLAGS.dataset = 'imagenet' 
  flags.FLAGS.tpu = TPU_NAME 
  flags.FLAGS.model_dir = 'gs://recursion-kaggle/melanoma/models/model_b5_456' #@param {type:"string"}
  flags.FLAGS.data_dir = 'gs://recursion-kaggle/melanoma/stratified_ex/fold0' #@param {type:"string"}
  flags.FLAGS.config_file = 'config.yaml' #@param {type:"string"}

  app.run(main)

I0628 16:26:25.164272 139928829757312 classifier_trainer.py:185] Base params: {'evaluation': {'epochs_between_evals': 1, 'skip_eval': False, 'steps': None},
 'export': {'checkpoint': None, 'destination': None},
 'mode': None,
 'model': {'learning_rate': {'boundaries': None,
                             'decay_epochs': 2.4,
                             'decay_rate': 0.97,
                             'examples_per_epoch': None,
                             'initial_lr': 0.008,
                             'multipliers': None,
                             'name': 'exponential',
                             'scale_by_batch_size': 0.0078125,
                             'warmup_epochs': 5},
           'loss': {'label_smoothing': 0.1,
                    'loss_scale': None,
                    'name': 'categorical_crossentropy'},
           'model_params': {'copy_to_local': False,
                            'model_name': 'efficientnet-b0',
                            'model_weights_path': 

INFO:tensorflow:Initializing the TPU system: grpc://10.30.252.170:8470


I0628 16:26:25.231431 139928829757312 tpu_strategy_util.py:72] Initializing the TPU system: grpc://10.30.252.170:8470


INFO:tensorflow:Clearing out eager caches


I0628 16:26:25.297745 139928829757312 tpu_strategy_util.py:100] Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


I0628 16:26:32.281572 139928829757312 tpu_strategy_util.py:123] Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


I0628 16:26:32.284646 139928829757312 tpu_system_metadata.py:140] Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


I0628 16:26:32.285845 139928829757312 tpu_system_metadata.py:141] *** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


I0628 16:26:32.287651 139928829757312 tpu_system_metadata.py:142] *** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


I0628 16:26:32.288605 139928829757312 tpu_system_metadata.py:144] *** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


I0628 16:26:32.289468 139928829757312 tpu_system_metadata.py:146] *** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


I0628 16:26:32.290786 139928829757312 tpu_system_metadata.py:146] *** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


I0628 16:26:32.291608 139928829757312 tpu_system_metadata.py:146] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


I0628 16:26:32.292473 139928829757312 tpu_system_metadata.py:146] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


I0628 16:26:32.293260 139928829757312 tpu_system_metadata.py:146] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


I0628 16:26:32.294251 139928829757312 tpu_system_metadata.py:146] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


I0628 16:26:32.295117 139928829757312 tpu_system_metadata.py:146] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


I0628 16:26:32.296406 139928829757312 tpu_system_metadata.py:146] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


I0628 16:26:32.297228 139928829757312 tpu_system_metadata.py:146] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


I0628 16:26:32.299665 139928829757312 tpu_system_metadata.py:146] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


I0628 16:26:32.300487 139928829757312 tpu_system_metadata.py:146] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


I0628 16:26:32.301246 139928829757312 tpu_system_metadata.py:146] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


I0628 16:26:32.303053 139928829757312 tpu_system_metadata.py:146] *** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)
I0628 16:26:32.304190 139928829757312 <ipython-input-7-8a309cd37e10>:206] Detected 8 devices.
W0628 16:26:32.304748 139928829757312 classifier_trainer.py:109] label_smoothing not applied, so datasets will not be one hot encoded.
I0628 16:26:32.305449 139928829757312 dataset_factory.py:174] Using augmentation: autoaugment
I0628 16:26:32.306210 139928829757312 dataset_factory.py:174] Using augmentation: None
I0628 16:26:32.306848 139928829757312 <ipython-input-4-dd4a5ce285fb>:72] Using TFRecords to load data.


Instructions for updating:
`seed2` arg is deprecated.Use sample_distorted_bounding_box_v2 instead.


W0628 16:26:33.344040 139928829757312 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/image_ops_impl.py:2827: sample_distorted_bounding_box (from tensorflow.python.ops.image_ops_impl) is deprecated and will be removed in a future version.
Instructions for updating:
`seed2` arg is deprecated.Use sample_distorted_bounding_box_v2 instead.
I0628 16:26:41.267786 139928829757312 <ipython-input-4-dd4a5ce285fb>:72] Using TFRecords to load data.
I0628 16:26:41.348410 139928829757312 dataset_builder.py:202] Load pre-computed datasetinfo (eg: splits) from bucket.
I0628 16:26:41.393894 139928829757312 dataset_info.py:431] Loading info from GCS for imagenet2012/5.0.0
I0628 16:26:41.561135 139928829757312 dataset_info.py:403] Field info.description from disk and from code do not match. Keeping the one from code.
I0628 16:26:41.963446 139928829757312 <ipython-input-7-8a309cd37e10>:224] Global batch size: 256
I0628 16:26:41.984492 139928829757312 efficientnet_mode

Epoch 1/25
Epoch 00001: val_auc improved from -inf to 0.78565, saving model to gs://recursion-kaggle/melanoma/models/model_b5_456/model.ckpt
Epoch 2/25
Epoch 00002: val_auc improved from 0.78565 to 0.84353, saving model to gs://recursion-kaggle/melanoma/models/model_b5_456/model.ckpt
Epoch 3/25
Epoch 00003: val_auc improved from 0.84353 to 0.89105, saving model to gs://recursion-kaggle/melanoma/models/model_b5_456/model.ckpt
Epoch 4/25
Epoch 00004: val_auc improved from 0.89105 to 0.90025, saving model to gs://recursion-kaggle/melanoma/models/model_b5_456/model.ckpt
Epoch 5/25
Epoch 00005: val_auc did not improve from 0.90025 (val_auc = 0.89597)
Epoch 6/25
Epoch 00006: val_auc did not improve from 0.90025 (val_auc = 0.89256)
Epoch 7/25
Epoch 00007: val_auc improved from 0.90025 to 0.90378, saving model to gs://recursion-kaggle/melanoma/models/model_b5_456/model.ckpt
Epoch 8/25
Epoch 00008: val_auc improved from 0.90378 to 0.91216, saving model to gs://recursion-kaggle/melanoma/models/m

# Test

In [None]:
#@title Enter Kaggle Credentials
import os
from getpass import getpass
os.environ['KAGGLE_USERNAME'] = 'ranik40' #@param {type:"string"}
print('Enter your kaggle key')
os.environ['KAGGLE_KEY'] = getpass() 

Enter your kaggle key
··········


In [None]:
#@title Submission
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

import pprint
from typing import Any, Tuple, Text, Optional, Mapping

from absl import app
from absl import flags
from absl import logging
import tensorflow as tf
import pandas as pd
import numpy as np
from focal_loss import BinaryFocalLoss

from official.vision.image_classification.classifier_trainer import *
from official.vision.image_classification.classifier_trainer import (
    _get_params_from_flags
)
from official.vision.image_classification import classifier_trainer

import tensorflow.keras.backend as K


def _get_dataset_builders(params: base_configs.ExperimentConfig,
                          strategy: tf.distribute.Strategy,
                          one_hot: bool
                         ) -> Tuple[Any, Any]:
  """Create and return train and validation dataset builders."""
  if one_hot:
    logging.warning('label_smoothing > 0, so datasets will be one hot encoded.')
  else:
    logging.warning('label_smoothing not applied, so datasets will not be one '
                    'hot encoded.')

  num_devices = strategy.num_replicas_in_sync if strategy else 1

  image_size = get_image_size_from_model(params)

  dataset_configs = [
      params.validation_dataset
  ]
  for config in dataset_configs:
    if config is not None and config.has_data:
      builder = dataset_factory.DatasetBuilder(
          config,
          image_size=image_size or config.image_size,
          num_devices=num_devices,
          one_hot=one_hot)
    else:
      builder = None

  return builder


def resume_from_checkpoint(model: tf.keras.Model,
                           model_dir: str) -> int:
  logging.info('Load from checkpoint is enabled.')
  latest_checkpoint = tf.train.latest_checkpoint(model_dir)
  logging.info('latest_checkpoint: %s', latest_checkpoint)
  if not latest_checkpoint:
    logging.info('No checkpoint detected.')
    return 0

  logging.info('Checkpoint file %s found and restoring from '
               'checkpoint', latest_checkpoint)
  model.load_weights(latest_checkpoint)
  logging.info('Completed loading from checkpoint.')


def train_and_eval(
    params: base_configs.ExperimentConfig,
    strategy_override: tf.distribute.Strategy) -> Mapping[str, Any]:
  """Runs the train and eval path using compile/fit."""
  logging.info('Running train and eval.')

  # Note: for TPUs, strategy and scope should be created before the dataset
  strategy = strategy_override or distribution_utils.get_distribution_strategy(
      distribution_strategy=params.runtime.distribution_strategy,
      all_reduce_alg=params.runtime.all_reduce_alg,
      num_gpus=params.runtime.num_gpus,
      tpu_address=params.runtime.tpu)

  strategy_scope = distribution_utils.get_strategy_scope(strategy)

  logging.info('Detected %d devices.',
               strategy.num_replicas_in_sync if strategy else 1)

  label_smoothing = params.model.loss.label_smoothing
  one_hot = label_smoothing and label_smoothing > 0

  builder = _get_dataset_builders(params, strategy, one_hot)
  dataset = builder.build()

  validation_builder = builder  # pylint: disable=unbalanced-tuple-unpacking
  validation_dataset = dataset

  validation_steps = params.evaluation.steps or validation_builder.num_steps

  initialize(params, validation_builder)

  logging.info('Global batch size: %d', validation_builder.global_batch_size)

  with strategy_scope:
    model_params = params.model.model_params.as_dict()
    model = get_models()[params.model.name](**model_params)
    if params.train.resume_checkpoint:
      resume_from_checkpoint(model=model, model_dir=params.model_dir)

  serialize_config(params=params, model_dir=params.model_dir)

  # Generate submission
  # GCS_PATH = 'gs://kds-c89313da1d85616eec461ab327fed61e1335defb486fb7729cf897b1'
  GCS_PATH ='gs://recursion-kaggle/melanoma'
  sub = pd.read_csv(GCS_PATH + '/sample_submission.csv')

  test_ids_ds = validation_dataset.map(lambda image, idnum: idnum).unbatch()
  NUM_TEST_IMAGES = 10982
  test_ids = next(iter(test_ids_ds.batch(NUM_TEST_IMAGES))).numpy().astype('U')

  validation_dataset = validation_dataset.map(lambda image, idnum: image)

  probabilities = model.predict(validation_dataset)
  probabilities = np.concatenate(probabilities)

  print('Generating submission.csv file...')
  print(test_ids)
  print(probabilities)
  
  pred_df = pd.DataFrame({'image_name': test_ids, 
                          'target': probabilities})
  pred_df.head()

  # sub.head()
  # del sub['target']
  # sub = sub.merge(pred_df, on='image_name')
  SUBMISSION_FILE = '/content/submission.csv'
  pred_df.to_csv(SUBMISSION_FILE, index=False)
  pred_df.head()

classifier_trainer._get_dataset_builders = _get_dataset_builders
classifier_trainer.train_and_eval = train_and_eval
classifier_trainer.resume_from_checkpoint = resume_from_checkpoint

In [None]:
define_classifier_flags()

In [None]:
%%writefile config.yaml

# Training configuration for EfficientNet trained on ImageNet on TPUs.
runtime:
  model_dir: null
  mode: 'train_and_eval'
  distribution_strategy: 'tpu'
  run_eagerly: False
  enable_xla: True
validation_dataset:
  name: 'imagenet2012'
  data_dir: null
  builder: 'records'
  split: 'test'
  one_hot: False
  num_classes: 2
  num_examples: 6625
  image_size: 512
  batch_size: 64
  use_per_replica_batch_size: True
  dtype: 'bfloat16'
model:
  model_params:
    model_name: 'efficientnet-b3'
    overrides:
      num_classes: 2
      batch_norm: 'tpu'
      dtype: 'bfloat16'
  loss:
    label_smoothing: 0.0
  num_classes: 2
train:
  resume_checkpoint: True

In [None]:
#@title Test
logging.set_verbosity(logging.INFO)

if __name__ == '__main__':
  if '-f' in sys.argv:
    sys.argv.remove('-f')
  flags.FLAGS.mode = 'train_and_eval' 
  flags.FLAGS.model_type = 'efficientnet' 
  flags.FLAGS.dataset = 'imagenet' 
  flags.FLAGS.tpu = TPU_NAME 
  flags.FLAGS.model_dir = 'gs://recursion-kaggle/melanoma/models/model_b5_456' #@param {type:"string"}
  flags.FLAGS.data_dir = 'gs://recursion-kaggle/melanoma/stratified_ex/test' #@param {type:"string"}
  flags.FLAGS.config_file = 'config.yaml' #@param {type:"string"}

  app.run(main)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

sub = pd.read_csv('submission.csv')

plt.hist(sub.target,bins=100)
plt.ylim((0,100))
plt.show()

In [None]:
sub.head(10)

In [None]:
!kaggle competitions submit -c siim-isic-melanoma-classification -f submission.csv -m "b4 0.9375 img+meta"

In [None]:
from google.colab import files
files.download('submission.csv')