This kernel is created to run few experiments of my own to answer two very important questions:

* **What's the best way to use the cadence snippet?** Should we use it **channel-wise or spatially?** **Should we use all 6 spectrograms or just the ones with aliens' signal?**

* **Mixup** is giving a significant performance boost. But what's the **gain in percentage**? How much are the models trained with Mixup dependent on random initialization?

You can find the detailed summary in this discussion post [here](https://www.kaggle.com/c/seti-breakthrough-listen/discussion/245152). For an interactive summary check out this [W&B report](http://wandb.me/seti-img-mixup-exp). 

# 🧰 Imports and Setups

In [None]:
!pip install -q --upgrade wandb
import wandb
print(wandb.__version__)
from wandb.keras import WandbCallback

wandb.login()

In [None]:
import tensorflow as tf
print(tf.__version__)
from tensorflow.keras import layers
from tensorflow.keras import models
import tensorflow_addons as tfa
from tensorflow.keras import mixed_precision

import tensorflow_probability as tfp
tfd = tfp.distributions

import os
import gc
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
from functools import partial

from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

# 📀 Hyperparameters

In [None]:
TRAIN_PATH = '../input/seti-breakthrough-listen/train/'
AUTOTUNE = tf.data.AUTOTUNE

CONFIG = dict (
    img_width = 224,
    img_height = 224,
    batch_size = 32,
    epochs = 100,
    learning_rate = 1e-3,
    competition = 'seti',
    _wandb_kernel = 'ayut',
    architecture = "CNN",
    infra = "Kaggle",
)

# 🔨 Build Input Pipeline

In [None]:
# Note: Please run this cell once and run all your experiments using the train_df and valid_df.
df = pd.read_csv('../input/seti-breakthrough-listen/train_labels.csv')
df = df.sample(5000).reset_index(drop=True)
print(f'Number of train images: {len(df)}')
df['img_path'] = df['id'].apply(lambda x: f'../input/seti-breakthrough-listen/train/{x[0]}/{x}.npy')

train_df, valid_df = train_test_split(df, test_size=0.2, stratify=df['target'].values)
print(len(train_df), len(valid_df))
df.head()

In [None]:
class_weights = compute_class_weight('balanced', 
                                    classes=np.unique(train_df['target'].values),
                                    y=train_df['target'].values)

class_weights_dict = {key: val for key, val in zip(np.unique(train_df['target'].values), class_weights)}
class_weights_dict                                                         

In [None]:
def load_npy(path, mode):
    # load npy data
    data = np.load(path.numpy()).astype(np.float32)
    
    if mode==0:
        # channel wise full stack
        data = np.dstack((data[0], data[1], data[2], data[3], data[4], data[5])) 
        return data # (273, 256, 6)
    
    elif mode==1:
        # channel wise target stack
        data = np.dstack((data[0], data[2], data[4]))
        return data # (273, 256, 3)
    
    elif mode==2:
        # Spatially stack spectrograms
        data = np.vstack(data).transpose((1, 0))
        data = tf.expand_dims(data, -1)
        return data # (256, 1638, 1)
    
    elif mode==3:
        # Spatially stack target spectrograms
        data = np.vstack((data[0], data[2], data[4])).transpose((1, 0))
        data = tf.expand_dims(data, -1)
        return data # (256, 819, 1)
    
    elif mode==4:
        # Spatially stack target and normalize
        data = np.vstack((data[0], data[2], data[4])).transpose((1, 0))
        data = ((data - np.mean(data, axis=0)) / np.std(data, axis=0))
        data = tf.expand_dims(data, -1)
        return data # (256, 819, 1)
    
    elif mode==5:
        # Spatially stack target spectrograms, clip and then normalize
        data = np.vstack((data[0], data[2], data[4])).transpose((1, 0))
        data = ((np.clip(data, -1, 3) + 1) / 4 * 255).astype(np.uint8)
        data = tf.image.convert_image_dtype(data, tf.float32)
        data = tf.expand_dims(data, -1)
        return data # (256, 819, 1)
    
@tf.function
def load_resize_spec(df_dict, mode):
    # Load image
    [image,] = tf.py_function(load_npy, [df_dict['img_path'], mode], [tf.float32])
    
    if mode==0:
        image.set_shape((273, 256, 6))
    elif mode==1:
        image.set_shape((273, 256, 3))
    elif mode==2:
        image.set_shape((256, 1638, 1))
    elif mode==3 or mode==4 or mode==5:
        image.set_shape((256, 819, 1))
    
    # Resize image
    image = tf.image.resize(image, (CONFIG['img_height'], CONFIG['img_width'])) # (224, 224, channel)
    # Simple augmentations
    image = tf.image.random_flip_left_right(image)
    
    # Parse label
    label = df_dict['target']
    label = tf.one_hot(label, depth=2)
    
    return image, label

# Mixup
@tf.function
def mixup(a, b, alpha=1.0):
    # unpack (image, label) pairs
    (image1, label1), (image2, label2) = a, b

    # define beta distribution
    dist = tfd.Beta([alpha], [alpha])
    # sample from this distribution
    l = dist.sample(1)[0][0]

    # mixup augmentation
    img = l*image1+(1-l)*image2
    lab = l*label1+(1-l)*label2

    return img, lab

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

def get_dataloaders(train_df, valid_df, mode):
    # Train Loader
    trainloader = tf.data.Dataset.from_tensor_slices(dict(train_df))
    # Valid Loader
    validloader = tf.data.Dataset.from_tensor_slices(dict(valid_df))

    trainloader = (
        trainloader
        .shuffle(1024)
        .map(partial(load_resize_spec, mode=mode), num_parallel_calls=AUTOTUNE)
        .batch(CONFIG['batch_size'])
        .prefetch(AUTOTUNE)
    )

    validloader = (
        validloader
        .map(partial(load_resize_spec, mode=mode), num_parallel_calls=AUTOTUNE)
        .batch(CONFIG['batch_size'])
        .prefetch(AUTOTUNE)
    )
    
    return trainloader, validloader


def get_mixup_dataloaders(train_df, valid_df, mode, alpha=1.0):
    # Train Loader
    trainloader1 = tf.data.Dataset.from_tensor_slices(dict(train_df)).shuffle(1024).map(partial(load_resize_spec, mode=mode), num_parallel_calls=AUTOTUNE)
    trainloader2 = tf.data.Dataset.from_tensor_slices(dict(train_df)).shuffle(1024).map(partial(load_resize_spec, mode=mode), num_parallel_calls=AUTOTUNE)

    trainloader = tf.data.Dataset.zip((trainloader1, trainloader2))

    # Valid Loader
    validloader = tf.data.Dataset.from_tensor_slices(dict(valid_df))

    trainloader = (
        trainloader
        .shuffle(1024)
        .map(partial(mixup, alpha=alpha), num_parallel_calls=AUTOTUNE)
        .batch(CONFIG['batch_size'])
        .prefetch(AUTOTUNE)
    )

    validloader = (
        validloader
        .map(partial(load_resize_spec, mode=mode), num_parallel_calls=AUTOTUNE)
        .batch(CONFIG['batch_size'])
        .prefetch(AUTOTUNE)
    )
    
    return trainloader, validloader

In [None]:
#sanity check
# Prepare dataloaders
trainloader, validloader = get_mixup_dataloaders(train_df, valid_df, 5)
imgs, labels = next(iter(trainloader))

# 🐤 Model

In [None]:
def get_model(mode):
    base_model = tf.keras.applications.EfficientNetB0(input_shape=(CONFIG['img_height'], CONFIG['img_width'], 3), include_top=False, weights='imagenet')
    base_model.trainabe = True

    if mode==0:
        inputs = layers.Input((CONFIG['img_height'], CONFIG['img_width'], 6))
        x = layers.Conv2D(3, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu')(inputs)
    elif mode==1:
        inputs = layers.Input((CONFIG['img_height'], CONFIG['img_width'], 3))
        x = inputs
    else:
        inputs = layers.Input((CONFIG['img_height'], CONFIG['img_width'], 1))
        x = layers.Conv2D(3, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu')(inputs)
        
    x = base_model(x, training=True)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.5)(x)
    
    outputs = layers.Dense(2)(x)
    outputs = layers.Activation('sigmoid', dtype='float32', name='predictions')(outputs)
    
    return models.Model(inputs, outputs)

tf.keras.backend.clear_session() 
model = get_model(mode=1)
model.summary()

# Callbacks

In [None]:
# Callbacks
earlystopper = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=5, verbose=0, mode='min',
    restore_best_weights=True
)

# Experiments to Run: Select the experiment mode

In [None]:
MODES = {
    'channel-wise-full': 0,
    'channel-wise-target': 1, 
    'spatial-full': 2,
    'spatial-target': 3, 
    'spatial-target-normalize': 4,
    'spatial-target-clip': 5
}
MODES_ID_TO_EXP = {val: key for key, val in MODES.items()}

USE_MIXUP = True
mode = MODES['spatial-target-clip'] # Please change the key here
exp_name = MODES_ID_TO_EXP[mode]

print(f'Running the experiment : {exp_name} and with/without mixup: {USE_MIXUP}')

# 🚄 Train to find best image arrangement

You can find the detailed summary in this discussion post [here](https://www.kaggle.com/c/seti-breakthrough-listen/discussion/245152). For an interactive summary check out this [W&B report](http://wandb.me/seti-img-mixup-exp). 

In [None]:
# Prepare dataloaders
if USE_MIXUP:
    trainloader, validloader = get_dataloaders(train_df, valid_df, mode=mode)
else:
    trainloader, validloader = get_mixup_dataloaders(train_df, valid_df, mode=mode)

SEEDS = [42, 64, 524]

for i in range(3):
    # Initialize model
    tf.keras.backend.clear_session()
    tf.random.set_seed(SEEDS[i])
    model = get_model(mode=mode)

    # Compile model
    optimizer = tf.keras.optimizers.Adam(learning_rate=CONFIG['learning_rate'])
    model.compile(optimizer, 
                  loss='binary_crossentropy',
                  metrics=[tf.keras.metrics.AUC(curve='ROC')])


    # Update CONFIG dict with the name of the model.
    CONFIG['seed'] = SEEDS[i]
    CONFIG['model_name'] = 'EfficientNetB0'
    CONFIG['group'] = exp_name
    print('Training configuration: ', CONFIG)

    # Initialize W&B run
    run = wandb.init(project='kaggle-seti-exp', 
                     config=CONFIG,
                     group=CONFIG['group'], 
                     job_type='train')

    # Train
    _ = model.fit(trainloader, 
                  epochs=CONFIG['epochs'],
                  validation_data=validloader,
                  class_weight=class_weights_dict,
                  callbacks=[WandbCallback(),
                             earlystopper])

    # Evaluate
    loss, auc = model.evaluate(validloader)
    wandb.log({'Val AUC-ROC': auc})

    # Close W&B run
    run.finish()

    del model
    _ = gc.collect()

## [Check out the dasbhboard here $\rightarrow$](https://wandb.ai/ayush-thakur/kaggle-seti-exp?workspace=user-ayush-thakur) 

## [Experiment summary report here $\rightarrow$](http://wandb.me/seti-img-mixup-exp)

![img](https://i.imgur.com/InH4hEi.gif)

# Find the best alpha value for Mixup

The mixup augmentation mixes two images pixel-wise and mixes their labels as well. This is done by weighted element-wise sum where the weight is sampled from the [Beta Distribution](https://en.wikipedia.org/wiki/Beta_distribution). 

The Beta distribution depends on two parameters - `alpha` and `beta`. In the context of Mixup, the `alpha` and `beta` takes the same value and the value is less or equal to 1.0. You can play with this interactive chart [here](https://keisan.casio.com/exec/system/1180573226).

In this experiment we will use different values of alpha (beta) and find out:
* if there is any effect of alpha on this dataset, <br>
* if yes, what's the optimal value to use. 

In [None]:
ALPHAS = [0.2, 0.4, 0.6, 0.8, 1.0]
VAL_AUC_ROC = []
SEEDS = [42, 64, 524]

for alpha in ALPHAS:
    # Prepare dataloaders
    trainloader, validloader = get_mixup_dataloaders(train_df, valid_df, mode=mode, alpha=alpha)
    # Run the experiment 3 times.
    for i in range(3):
        # Initialize model
        tf.keras.backend.clear_session()
        tf.random.set_seed(SEEDS[i])
        model = get_model(mode=mode)

        # Compile model
        optimizer = tf.keras.optimizers.Adam(learning_rate=CONFIG['learning_rate'])
        model.compile(optimizer, 
                      loss='binary_crossentropy',
                      metrics=[tf.keras.metrics.AUC(curve='ROC')])


        # Update CONFIG dict with the name of the model.
        CONFIG['seed'] = SEEDS[i]
        CONFIG['model_name'] = 'EfficientNetB0'
        CONFIG['group'] = f'Mixup-Alpha-{alpha}'
        print('Training configuration: ', CONFIG)

        # Initialize W&B run
        run = wandb.init(project='kaggle-seti-exp2', 
                         config=CONFIG,
                         group=CONFIG['group'], 
                         job_type='train')

        # Train
        _ = model.fit(trainloader, 
                      epochs=CONFIG['epochs'],
                      validation_data=validloader,
                      class_weight=class_weights_dict,
                      callbacks=[WandbCallback(),
                                 earlystopper])

        # Evaluate
        loss, auc = model.evaluate(validloader)
        VAL_AUC_ROC.append(auc)
        wandb.log({'Val AUC-ROC': auc})

        # Close W&B run
        run.finish()

        del model
        _ = gc.collect()

## [Check out the W&B Dashboard here $\rightarrow$](http://wandb.me/kaggle-seti-alpha-mixup)

![img](https://i.imgur.com/Ca02x9U.gif)